10102 lines
280 KiB
JSON
10102 lines
280 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4571,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.007656967840735069,
|
|
"grad_norm": 13.997882752776386,
|
|
"learning_rate": 3.4934497816593887e-07,
|
|
"loss": 0.6676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6494572758674622,
|
|
"step": 5,
|
|
"valid_targets_mean": 5030.6,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 0.015313935681470138,
|
|
"grad_norm": 16.042150352702777,
|
|
"learning_rate": 7.860262008733626e-07,
|
|
"loss": 0.6736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6432679295539856,
|
|
"step": 10,
|
|
"valid_targets_mean": 5537.1,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 0.022970903522205207,
|
|
"grad_norm": 13.768207464178623,
|
|
"learning_rate": 1.222707423580786e-06,
|
|
"loss": 0.657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5933096408843994,
|
|
"step": 15,
|
|
"valid_targets_mean": 5550.1,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 0.030627871362940276,
|
|
"grad_norm": 11.169416861656957,
|
|
"learning_rate": 1.6593886462882098e-06,
|
|
"loss": 0.6091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5853960514068604,
|
|
"step": 20,
|
|
"valid_targets_mean": 4935.4,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 0.03828483920367534,
|
|
"grad_norm": 7.70196829557587,
|
|
"learning_rate": 2.096069868995633e-06,
|
|
"loss": 0.5846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5225365161895752,
|
|
"step": 25,
|
|
"valid_targets_mean": 4887.4,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 0.045941807044410414,
|
|
"grad_norm": 4.492171953461492,
|
|
"learning_rate": 2.5327510917030567e-06,
|
|
"loss": 0.5128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5070335268974304,
|
|
"step": 30,
|
|
"valid_targets_mean": 5811.4,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 0.05359877488514548,
|
|
"grad_norm": 2.953910981015742,
|
|
"learning_rate": 2.9694323144104806e-06,
|
|
"loss": 0.4624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46929267048835754,
|
|
"step": 35,
|
|
"valid_targets_mean": 4496.3,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 0.06125574272588055,
|
|
"grad_norm": 1.7185069687468981,
|
|
"learning_rate": 3.406113537117904e-06,
|
|
"loss": 0.4371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45726278424263,
|
|
"step": 40,
|
|
"valid_targets_mean": 4568.8,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 0.06891271056661562,
|
|
"grad_norm": 1.2031548219005852,
|
|
"learning_rate": 3.842794759825328e-06,
|
|
"loss": 0.4393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40769797563552856,
|
|
"step": 45,
|
|
"valid_targets_mean": 5177.7,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 0.07656967840735068,
|
|
"grad_norm": 1.0641842496634077,
|
|
"learning_rate": 4.279475982532751e-06,
|
|
"loss": 0.4176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4184693992137909,
|
|
"step": 50,
|
|
"valid_targets_mean": 4836.5,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 0.08422664624808576,
|
|
"grad_norm": 0.828475734146539,
|
|
"learning_rate": 4.716157205240175e-06,
|
|
"loss": 0.3962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39677250385284424,
|
|
"step": 55,
|
|
"valid_targets_mean": 5396.1,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 0.09188361408882083,
|
|
"grad_norm": 0.7394723747598436,
|
|
"learning_rate": 5.152838427947598e-06,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4369763731956482,
|
|
"step": 60,
|
|
"valid_targets_mean": 5712.2,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 0.0995405819295559,
|
|
"grad_norm": 0.7090139134911532,
|
|
"learning_rate": 5.589519650655022e-06,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3982025682926178,
|
|
"step": 65,
|
|
"valid_targets_mean": 5389.4,
|
|
"valid_targets_min": 2834
|
|
},
|
|
{
|
|
"epoch": 0.10719754977029096,
|
|
"grad_norm": 0.593009086861499,
|
|
"learning_rate": 6.0262008733624455e-06,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3946113586425781,
|
|
"step": 70,
|
|
"valid_targets_mean": 5376.4,
|
|
"valid_targets_min": 2715
|
|
},
|
|
{
|
|
"epoch": 0.11485451761102604,
|
|
"grad_norm": 0.636095693800591,
|
|
"learning_rate": 6.462882096069869e-06,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32831794023513794,
|
|
"step": 75,
|
|
"valid_targets_mean": 5254.6,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 0.1225114854517611,
|
|
"grad_norm": 0.6038700595775229,
|
|
"learning_rate": 6.8995633187772934e-06,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3565768003463745,
|
|
"step": 80,
|
|
"valid_targets_mean": 4155.2,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 0.13016845329249618,
|
|
"grad_norm": 0.9336428663228459,
|
|
"learning_rate": 7.336244541484717e-06,
|
|
"loss": 0.3673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34234848618507385,
|
|
"step": 85,
|
|
"valid_targets_mean": 4569.0,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.13782542113323124,
|
|
"grad_norm": 0.4930514762752864,
|
|
"learning_rate": 7.77292576419214e-06,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29909199476242065,
|
|
"step": 90,
|
|
"valid_targets_mean": 5865.1,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.14548238897396631,
|
|
"grad_norm": 0.5516406220071951,
|
|
"learning_rate": 8.209606986899564e-06,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35991543531417847,
|
|
"step": 95,
|
|
"valid_targets_mean": 4539.4,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.15313935681470137,
|
|
"grad_norm": 0.512386584582494,
|
|
"learning_rate": 8.646288209606988e-06,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31739532947540283,
|
|
"step": 100,
|
|
"valid_targets_mean": 5295.0,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 0.16079632465543645,
|
|
"grad_norm": 0.5553107691519705,
|
|
"learning_rate": 9.082969432314411e-06,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34066033363342285,
|
|
"step": 105,
|
|
"valid_targets_mean": 4267.7,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 0.16845329249617153,
|
|
"grad_norm": 0.5564940658094145,
|
|
"learning_rate": 9.519650655021835e-06,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3298887014389038,
|
|
"step": 110,
|
|
"valid_targets_mean": 4755.4,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 0.17611026033690658,
|
|
"grad_norm": 0.5091671224165091,
|
|
"learning_rate": 9.956331877729258e-06,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32190486788749695,
|
|
"step": 115,
|
|
"valid_targets_mean": 4750.4,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 0.18376722817764166,
|
|
"grad_norm": 0.4905326318477757,
|
|
"learning_rate": 1.0393013100436682e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3529035151004791,
|
|
"step": 120,
|
|
"valid_targets_mean": 5538.1,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 0.19142419601837674,
|
|
"grad_norm": 0.5048261747177855,
|
|
"learning_rate": 1.0829694323144107e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30244866013526917,
|
|
"step": 125,
|
|
"valid_targets_mean": 4734.5,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 0.1990811638591118,
|
|
"grad_norm": 0.5361239607380978,
|
|
"learning_rate": 1.1266375545851529e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.308383584022522,
|
|
"step": 130,
|
|
"valid_targets_mean": 4705.6,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 0.20673813169984687,
|
|
"grad_norm": 0.46801822029453244,
|
|
"learning_rate": 1.1703056768558954e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29804354906082153,
|
|
"step": 135,
|
|
"valid_targets_mean": 5411.8,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 0.21439509954058192,
|
|
"grad_norm": 0.47487492624683453,
|
|
"learning_rate": 1.2139737991266376e-05,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26761284470558167,
|
|
"step": 140,
|
|
"valid_targets_mean": 4593.1,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 0.222052067381317,
|
|
"grad_norm": 0.4825202183874898,
|
|
"learning_rate": 1.2576419213973801e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27101266384124756,
|
|
"step": 145,
|
|
"valid_targets_mean": 4800.3,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 0.22970903522205208,
|
|
"grad_norm": 0.5108014073726733,
|
|
"learning_rate": 1.3013100436681223e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32333505153656006,
|
|
"step": 150,
|
|
"valid_targets_mean": 4791.2,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 0.23736600306278713,
|
|
"grad_norm": 0.516438690481993,
|
|
"learning_rate": 1.3449781659388648e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3009350001811981,
|
|
"step": 155,
|
|
"valid_targets_mean": 5810.0,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 0.2450229709035222,
|
|
"grad_norm": 0.5405272774787385,
|
|
"learning_rate": 1.388646288209607e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2833111882209778,
|
|
"step": 160,
|
|
"valid_targets_mean": 4378.3,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.25267993874425726,
|
|
"grad_norm": 0.48541608893286375,
|
|
"learning_rate": 1.4323144104803495e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2991043031215668,
|
|
"step": 165,
|
|
"valid_targets_mean": 5449.2,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 0.26033690658499237,
|
|
"grad_norm": 0.4369414808456571,
|
|
"learning_rate": 1.4759825327510919e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29198282957077026,
|
|
"step": 170,
|
|
"valid_targets_mean": 5611.1,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 0.2679938744257274,
|
|
"grad_norm": 0.4650699347244396,
|
|
"learning_rate": 1.5196506550218343e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635342478752136,
|
|
"step": 175,
|
|
"valid_targets_mean": 5909.5,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 0.27565084226646247,
|
|
"grad_norm": 0.38904002970727797,
|
|
"learning_rate": 1.5633187772925766e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23884126543998718,
|
|
"step": 180,
|
|
"valid_targets_mean": 6394.3,
|
|
"valid_targets_min": 2659
|
|
},
|
|
{
|
|
"epoch": 0.2833078101071976,
|
|
"grad_norm": 0.4430505165500373,
|
|
"learning_rate": 1.6069868995633188e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2831938862800598,
|
|
"step": 185,
|
|
"valid_targets_mean": 6014.1,
|
|
"valid_targets_min": 2080
|
|
},
|
|
{
|
|
"epoch": 0.29096477794793263,
|
|
"grad_norm": 0.5388790759052032,
|
|
"learning_rate": 1.6506550218340613e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2641902565956116,
|
|
"step": 190,
|
|
"valid_targets_mean": 5561.6,
|
|
"valid_targets_min": 2686
|
|
},
|
|
{
|
|
"epoch": 0.2986217457886677,
|
|
"grad_norm": 0.42663945124008945,
|
|
"learning_rate": 1.6943231441048035e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681276798248291,
|
|
"step": 195,
|
|
"valid_targets_mean": 5588.3,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 0.30627871362940273,
|
|
"grad_norm": 0.4943538108066963,
|
|
"learning_rate": 1.737991266375546e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2886020839214325,
|
|
"step": 200,
|
|
"valid_targets_mean": 4703.8,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 0.31393568147013784,
|
|
"grad_norm": 0.5295715062499827,
|
|
"learning_rate": 1.7816593886462882e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28761541843414307,
|
|
"step": 205,
|
|
"valid_targets_mean": 4730.3,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.3215926493108729,
|
|
"grad_norm": 0.5017011172561703,
|
|
"learning_rate": 1.8253275109170307e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.292784184217453,
|
|
"step": 210,
|
|
"valid_targets_mean": 5370.1,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 0.32924961715160794,
|
|
"grad_norm": 0.5384473207239906,
|
|
"learning_rate": 1.868995633187773e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2923944890499115,
|
|
"step": 215,
|
|
"valid_targets_mean": 4821.5,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 0.33690658499234305,
|
|
"grad_norm": 0.54252385993879,
|
|
"learning_rate": 1.9126637554585155e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25203627347946167,
|
|
"step": 220,
|
|
"valid_targets_mean": 4703.9,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 0.3445635528330781,
|
|
"grad_norm": 0.4590346647689083,
|
|
"learning_rate": 1.9563318777292576e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26956477761268616,
|
|
"step": 225,
|
|
"valid_targets_mean": 5403.2,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 0.35222052067381315,
|
|
"grad_norm": 0.591472293986723,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686610817909241,
|
|
"step": 230,
|
|
"valid_targets_mean": 5279.8,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 0.35987748851454826,
|
|
"grad_norm": 0.5351371337014471,
|
|
"learning_rate": 2.0436681222707423e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302329421043396,
|
|
"step": 235,
|
|
"valid_targets_mean": 4985.8,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 0.3675344563552833,
|
|
"grad_norm": 0.50551102825952,
|
|
"learning_rate": 2.0873362445414852e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26980355381965637,
|
|
"step": 240,
|
|
"valid_targets_mean": 4849.7,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 0.37519142419601836,
|
|
"grad_norm": 0.5476580929537973,
|
|
"learning_rate": 2.1310043668122274e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30016613006591797,
|
|
"step": 245,
|
|
"valid_targets_mean": 4602.2,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 0.38284839203675347,
|
|
"grad_norm": 0.538603533015244,
|
|
"learning_rate": 2.1746724890829696e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950373888015747,
|
|
"step": 250,
|
|
"valid_targets_mean": 4244.2,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 0.3905053598774885,
|
|
"grad_norm": 0.5405956170504431,
|
|
"learning_rate": 2.2183406113537118e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28181028366088867,
|
|
"step": 255,
|
|
"valid_targets_mean": 4765.5,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 0.3981623277182236,
|
|
"grad_norm": 0.4349349330799182,
|
|
"learning_rate": 2.2620087336244546e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23736879229545593,
|
|
"step": 260,
|
|
"valid_targets_mean": 5519.2,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 0.4058192955589586,
|
|
"grad_norm": 0.479699481190532,
|
|
"learning_rate": 2.3056768558951968e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.250985324382782,
|
|
"step": 265,
|
|
"valid_targets_mean": 4890.2,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 0.41347626339969373,
|
|
"grad_norm": 0.48110082764216044,
|
|
"learning_rate": 2.349344978165939e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2574954926967621,
|
|
"step": 270,
|
|
"valid_targets_mean": 5548.7,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 0.4211332312404288,
|
|
"grad_norm": 0.5259890409455534,
|
|
"learning_rate": 2.3930131004366812e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2683018445968628,
|
|
"step": 275,
|
|
"valid_targets_mean": 5199.2,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 0.42879019908116384,
|
|
"grad_norm": 0.4872321788705009,
|
|
"learning_rate": 2.436681222707424e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2631978988647461,
|
|
"step": 280,
|
|
"valid_targets_mean": 5506.2,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 0.43644716692189894,
|
|
"grad_norm": 0.49171872502106884,
|
|
"learning_rate": 2.4803493449781662e-05,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2735797166824341,
|
|
"step": 285,
|
|
"valid_targets_mean": 5573.5,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 0.444104134762634,
|
|
"grad_norm": 0.5457671973926761,
|
|
"learning_rate": 2.5240174672489084e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.272636353969574,
|
|
"step": 290,
|
|
"valid_targets_mean": 4353.8,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 0.45176110260336905,
|
|
"grad_norm": 0.6850256638019349,
|
|
"learning_rate": 2.567685589519651e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25342246890068054,
|
|
"step": 295,
|
|
"valid_targets_mean": 5045.3,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 0.45941807044410415,
|
|
"grad_norm": 0.5394245332540767,
|
|
"learning_rate": 2.6113537117903935e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2724701166152954,
|
|
"step": 300,
|
|
"valid_targets_mean": 4828.3,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 0.4670750382848392,
|
|
"grad_norm": 0.5175333803982558,
|
|
"learning_rate": 2.6550218340611357e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2658703625202179,
|
|
"step": 305,
|
|
"valid_targets_mean": 5024.8,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 0.47473200612557426,
|
|
"grad_norm": 0.4885980644156084,
|
|
"learning_rate": 2.698689956331878e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27572572231292725,
|
|
"step": 310,
|
|
"valid_targets_mean": 4917.0,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 0.48238897396630936,
|
|
"grad_norm": 0.9846395603780147,
|
|
"learning_rate": 2.7423580786026204e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2755519151687622,
|
|
"step": 315,
|
|
"valid_targets_mean": 4701.9,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 0.4900459418070444,
|
|
"grad_norm": 0.5476680571665029,
|
|
"learning_rate": 2.786026200873363e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31073257327079773,
|
|
"step": 320,
|
|
"valid_targets_mean": 5116.8,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 0.49770290964777947,
|
|
"grad_norm": 0.48787777876251054,
|
|
"learning_rate": 2.829694323144105e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24321046471595764,
|
|
"step": 325,
|
|
"valid_targets_mean": 4564.9,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.5053598774885145,
|
|
"grad_norm": 0.581959700960055,
|
|
"learning_rate": 2.8733624454148473e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2841954827308655,
|
|
"step": 330,
|
|
"valid_targets_mean": 5433.1,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.5130168453292496,
|
|
"grad_norm": 0.5197427771747507,
|
|
"learning_rate": 2.9170305676855898e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24852226674556732,
|
|
"step": 335,
|
|
"valid_targets_mean": 5383.6,
|
|
"valid_targets_min": 2532
|
|
},
|
|
{
|
|
"epoch": 0.5206738131699847,
|
|
"grad_norm": 0.5832961166696086,
|
|
"learning_rate": 2.960698689956332e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283719003200531,
|
|
"step": 340,
|
|
"valid_targets_mean": 4863.8,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 0.5283307810107197,
|
|
"grad_norm": 0.5190165410837098,
|
|
"learning_rate": 3.0043668122270745e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2984423041343689,
|
|
"step": 345,
|
|
"valid_targets_mean": 5248.1,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 0.5359877488514548,
|
|
"grad_norm": 0.5424715125473839,
|
|
"learning_rate": 3.0480349344978167e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27597424387931824,
|
|
"step": 350,
|
|
"valid_targets_mean": 4519.0,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 0.5436447166921899,
|
|
"grad_norm": 0.5792391186945091,
|
|
"learning_rate": 3.091703056768559e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26070481538772583,
|
|
"step": 355,
|
|
"valid_targets_mean": 4367.1,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 0.5513016845329249,
|
|
"grad_norm": 0.61377111290995,
|
|
"learning_rate": 3.1353711790393014e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30032479763031006,
|
|
"step": 360,
|
|
"valid_targets_mean": 4076.1,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 0.55895865237366,
|
|
"grad_norm": 0.4499780741388462,
|
|
"learning_rate": 3.1790393013100436e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26613035798072815,
|
|
"step": 365,
|
|
"valid_targets_mean": 5930.6,
|
|
"valid_targets_min": 2546
|
|
},
|
|
{
|
|
"epoch": 0.5666156202143952,
|
|
"grad_norm": 0.6040296111678708,
|
|
"learning_rate": 3.2227074235807864e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26369449496269226,
|
|
"step": 370,
|
|
"valid_targets_mean": 4356.6,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 0.5742725880551302,
|
|
"grad_norm": 0.5341830406768366,
|
|
"learning_rate": 3.2663755458515286e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30968743562698364,
|
|
"step": 375,
|
|
"valid_targets_mean": 5326.2,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 0.5819295558958653,
|
|
"grad_norm": 0.6231930070736144,
|
|
"learning_rate": 3.310043668122271e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29652100801467896,
|
|
"step": 380,
|
|
"valid_targets_mean": 4574.1,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 0.5895865237366003,
|
|
"grad_norm": 0.575410318717094,
|
|
"learning_rate": 3.353711790393013e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684730589389801,
|
|
"step": 385,
|
|
"valid_targets_mean": 4358.5,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 0.5972434915773354,
|
|
"grad_norm": 0.4673654478032364,
|
|
"learning_rate": 3.397379912663756e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2228243201971054,
|
|
"step": 390,
|
|
"valid_targets_mean": 5666.8,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 0.6049004594180705,
|
|
"grad_norm": 0.48675286049843114,
|
|
"learning_rate": 3.441048034934498e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23089733719825745,
|
|
"step": 395,
|
|
"valid_targets_mean": 5237.9,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 0.6125574272588055,
|
|
"grad_norm": 0.560591662575993,
|
|
"learning_rate": 3.48471615720524e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665928602218628,
|
|
"step": 400,
|
|
"valid_targets_mean": 4672.1,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 0.6202143950995406,
|
|
"grad_norm": 0.42731570802442403,
|
|
"learning_rate": 3.5283842794759824e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22624076902866364,
|
|
"step": 405,
|
|
"valid_targets_mean": 5482.8,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 0.6278713629402757,
|
|
"grad_norm": 0.5347556972076133,
|
|
"learning_rate": 3.572052401746725e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23603232204914093,
|
|
"step": 410,
|
|
"valid_targets_mean": 5739.4,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 0.6355283307810107,
|
|
"grad_norm": 0.5325226811766738,
|
|
"learning_rate": 3.6157205240174675e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2480698823928833,
|
|
"step": 415,
|
|
"valid_targets_mean": 4790.4,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 0.6431852986217458,
|
|
"grad_norm": 0.48048305351290477,
|
|
"learning_rate": 3.6593886462882097e-05,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24673986434936523,
|
|
"step": 420,
|
|
"valid_targets_mean": 5280.0,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 0.6508422664624809,
|
|
"grad_norm": 0.5456250025761155,
|
|
"learning_rate": 3.7030567685589525e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2480792999267578,
|
|
"step": 425,
|
|
"valid_targets_mean": 5479.8,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 0.6584992343032159,
|
|
"grad_norm": 0.564296936232767,
|
|
"learning_rate": 3.746724890829695e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2615630030632019,
|
|
"step": 430,
|
|
"valid_targets_mean": 5361.0,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 0.666156202143951,
|
|
"grad_norm": 0.48996027764636907,
|
|
"learning_rate": 3.790393013100437e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24756395816802979,
|
|
"step": 435,
|
|
"valid_targets_mean": 4916.1,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 0.6738131699846861,
|
|
"grad_norm": 0.44623955424804534,
|
|
"learning_rate": 3.834061135371179e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23491841554641724,
|
|
"step": 440,
|
|
"valid_targets_mean": 5672.4,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 0.6814701378254211,
|
|
"grad_norm": 0.45284851458486386,
|
|
"learning_rate": 3.877729257641922e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2342417687177658,
|
|
"step": 445,
|
|
"valid_targets_mean": 5099.3,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 0.6891271056661562,
|
|
"grad_norm": 0.5324943693738649,
|
|
"learning_rate": 3.921397379912664e-05,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27402836084365845,
|
|
"step": 450,
|
|
"valid_targets_mean": 4484.3,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 0.6967840735068913,
|
|
"grad_norm": 0.4646196239617948,
|
|
"learning_rate": 3.965065502183406e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24006010591983795,
|
|
"step": 455,
|
|
"valid_targets_mean": 4958.8,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 0.7044410413476263,
|
|
"grad_norm": 0.4405348727571011,
|
|
"learning_rate": 3.9999994165786676e-05,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22806835174560547,
|
|
"step": 460,
|
|
"valid_targets_mean": 5592.2,
|
|
"valid_targets_min": 2673
|
|
},
|
|
{
|
|
"epoch": 0.7120980091883614,
|
|
"grad_norm": 0.5562861867740798,
|
|
"learning_rate": 3.9999789968677496e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2417786568403244,
|
|
"step": 465,
|
|
"valid_targets_mean": 4365.9,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.7197549770290965,
|
|
"grad_norm": 0.5080252815095521,
|
|
"learning_rate": 3.999929406430558e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2633582651615143,
|
|
"step": 470,
|
|
"valid_targets_mean": 4811.5,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 0.7274119448698315,
|
|
"grad_norm": 0.5272035775867764,
|
|
"learning_rate": 3.999850645990394e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24008730053901672,
|
|
"step": 475,
|
|
"valid_targets_mean": 4771.0,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 0.7350689127105666,
|
|
"grad_norm": 0.4754003904766261,
|
|
"learning_rate": 3.999742716696021e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23725548386573792,
|
|
"step": 480,
|
|
"valid_targets_mean": 5124.4,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 0.7427258805513017,
|
|
"grad_norm": 0.6101466292156634,
|
|
"learning_rate": 3.999605620121641e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24445194005966187,
|
|
"step": 485,
|
|
"valid_targets_mean": 5324.8,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 0.7503828483920367,
|
|
"grad_norm": 0.4637006971657633,
|
|
"learning_rate": 3.9994393582668806e-05,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2397041618824005,
|
|
"step": 490,
|
|
"valid_targets_mean": 5648.9,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 0.7580398162327718,
|
|
"grad_norm": 0.5016071627691048,
|
|
"learning_rate": 3.999243933556753e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2539515197277069,
|
|
"step": 495,
|
|
"valid_targets_mean": 4441.4,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 0.7656967840735069,
|
|
"grad_norm": 0.4754215254300854,
|
|
"learning_rate": 3.9990193488416304e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2377694994211197,
|
|
"step": 500,
|
|
"valid_targets_mean": 5602.7,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 0.7733537519142419,
|
|
"grad_norm": 0.4442741247780648,
|
|
"learning_rate": 3.9987656073971946e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2323368787765503,
|
|
"step": 505,
|
|
"valid_targets_mean": 5325.0,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 0.781010719754977,
|
|
"grad_norm": 0.499596334503463,
|
|
"learning_rate": 3.998482712924397e-05,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620323896408081,
|
|
"step": 510,
|
|
"valid_targets_mean": 4717.8,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 0.7886676875957122,
|
|
"grad_norm": 0.4715666857814281,
|
|
"learning_rate": 3.9981706695493996e-05,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27169761061668396,
|
|
"step": 515,
|
|
"valid_targets_mean": 5186.8,
|
|
"valid_targets_min": 2363
|
|
},
|
|
{
|
|
"epoch": 0.7963246554364471,
|
|
"grad_norm": 0.5743001328378289,
|
|
"learning_rate": 3.997829481823515e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27855122089385986,
|
|
"step": 520,
|
|
"valid_targets_mean": 4762.1,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 0.8039816232771823,
|
|
"grad_norm": 0.47759303305733847,
|
|
"learning_rate": 3.997459154723144e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2423936426639557,
|
|
"step": 525,
|
|
"valid_targets_mean": 5297.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 0.8116385911179173,
|
|
"grad_norm": 0.46683109553118957,
|
|
"learning_rate": 3.9970596936496976e-05,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2846606373786926,
|
|
"step": 530,
|
|
"valid_targets_mean": 6037.3,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 0.8192955589586524,
|
|
"grad_norm": 0.4707779779562285,
|
|
"learning_rate": 3.996631104429521e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22087471187114716,
|
|
"step": 535,
|
|
"valid_targets_mean": 5019.9,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 0.8269525267993875,
|
|
"grad_norm": 0.5479924065007178,
|
|
"learning_rate": 3.9961733933138106e-05,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23031362891197205,
|
|
"step": 540,
|
|
"valid_targets_mean": 4613.0,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 0.8346094946401225,
|
|
"grad_norm": 0.5706626665016054,
|
|
"learning_rate": 3.9956865669785185e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2653731405735016,
|
|
"step": 545,
|
|
"valid_targets_mean": 4250.2,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 0.8422664624808576,
|
|
"grad_norm": 0.45823962344649066,
|
|
"learning_rate": 3.9951706325242595e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.214570090174675,
|
|
"step": 550,
|
|
"valid_targets_mean": 5601.8,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 0.8499234303215927,
|
|
"grad_norm": 0.4798067262429478,
|
|
"learning_rate": 3.9946255974762023e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22499428689479828,
|
|
"step": 555,
|
|
"valid_targets_mean": 4887.4,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 0.8575803981623277,
|
|
"grad_norm": 0.5354869537077036,
|
|
"learning_rate": 3.9940514697839654e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552046775817871,
|
|
"step": 560,
|
|
"valid_targets_mean": 4646.8,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 0.8652373660030628,
|
|
"grad_norm": 0.404422046345916,
|
|
"learning_rate": 3.993448257821498e-05,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2055911421775818,
|
|
"step": 565,
|
|
"valid_targets_mean": 5405.6,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 0.8728943338437979,
|
|
"grad_norm": 0.9512439674975726,
|
|
"learning_rate": 3.992815970386956e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22652213275432587,
|
|
"step": 570,
|
|
"valid_targets_mean": 5747.8,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.8805513016845329,
|
|
"grad_norm": 0.49581746326271475,
|
|
"learning_rate": 3.99215461670258e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25263649225234985,
|
|
"step": 575,
|
|
"valid_targets_mean": 4818.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.888208269525268,
|
|
"grad_norm": 0.5193294216924808,
|
|
"learning_rate": 3.9914642064145555e-05,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24224933981895447,
|
|
"step": 580,
|
|
"valid_targets_mean": 4697.4,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 0.8958652373660031,
|
|
"grad_norm": 0.5081023804449541,
|
|
"learning_rate": 3.990744749592871e-05,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25167709589004517,
|
|
"step": 585,
|
|
"valid_targets_mean": 5235.0,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 0.9035222052067381,
|
|
"grad_norm": 0.43150283564796665,
|
|
"learning_rate": 3.989996256731178e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24848490953445435,
|
|
"step": 590,
|
|
"valid_targets_mean": 5873.1,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 0.9111791730474732,
|
|
"grad_norm": 0.490082147198131,
|
|
"learning_rate": 3.9892187387466286e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544364631175995,
|
|
"step": 595,
|
|
"valid_targets_mean": 5240.1,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.9188361408882083,
|
|
"grad_norm": 0.47301661085243424,
|
|
"learning_rate": 3.9884122069797256e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23627778887748718,
|
|
"step": 600,
|
|
"valid_targets_mean": 4547.9,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 0.9264931087289433,
|
|
"grad_norm": 0.6020593350379793,
|
|
"learning_rate": 3.9875766731941514e-05,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25998401641845703,
|
|
"step": 605,
|
|
"valid_targets_mean": 5206.4,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 0.9341500765696784,
|
|
"grad_norm": 0.4343199562247558,
|
|
"learning_rate": 3.986712149576597e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21079614758491516,
|
|
"step": 610,
|
|
"valid_targets_mean": 5721.4,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 0.9418070444104135,
|
|
"grad_norm": 0.4959240813023885,
|
|
"learning_rate": 3.985818648736588e-05,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2344963252544403,
|
|
"step": 615,
|
|
"valid_targets_mean": 5268.9,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 0.9494640122511485,
|
|
"grad_norm": 0.44727712842280476,
|
|
"learning_rate": 3.984896183706291e-05,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21470850706100464,
|
|
"step": 620,
|
|
"valid_targets_mean": 4735.4,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.9571209800918836,
|
|
"grad_norm": 0.4770114196646438,
|
|
"learning_rate": 3.983944767940339e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.251364529132843,
|
|
"step": 625,
|
|
"valid_targets_mean": 5323.6,
|
|
"valid_targets_min": 1961
|
|
},
|
|
{
|
|
"epoch": 0.9647779479326187,
|
|
"grad_norm": 0.5049867980521303,
|
|
"learning_rate": 3.98296441531562e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861921489238739,
|
|
"step": 630,
|
|
"valid_targets_mean": 4449.9,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 0.9724349157733537,
|
|
"grad_norm": 0.4585093358813056,
|
|
"learning_rate": 3.9819551401310834e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2122216820716858,
|
|
"step": 635,
|
|
"valid_targets_mean": 5522.1,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 0.9800918836140888,
|
|
"grad_norm": 0.5195599231781355,
|
|
"learning_rate": 3.980916957107529e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2382555454969406,
|
|
"step": 640,
|
|
"valid_targets_mean": 4636.5,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 0.9877488514548239,
|
|
"grad_norm": 0.5243648163119432,
|
|
"learning_rate": 3.979849881387393e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2904030680656433,
|
|
"step": 645,
|
|
"valid_targets_mean": 4576.8,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.9954058192955589,
|
|
"grad_norm": 0.5320338443965096,
|
|
"learning_rate": 3.9787539285345245e-05,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518049478530884,
|
|
"step": 650,
|
|
"valid_targets_mean": 3986.9,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 1.003062787136294,
|
|
"grad_norm": 0.4543275980951745,
|
|
"learning_rate": 3.977629114533963e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21149328351020813,
|
|
"step": 655,
|
|
"valid_targets_mean": 4705.1,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 1.010719754977029,
|
|
"grad_norm": 0.4610185176199923,
|
|
"learning_rate": 3.9764754557917e-05,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2165687382221222,
|
|
"step": 660,
|
|
"valid_targets_mean": 4701.8,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 1.0183767228177643,
|
|
"grad_norm": 0.3938486079154482,
|
|
"learning_rate": 3.975292969134445e-05,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19741511344909668,
|
|
"step": 665,
|
|
"valid_targets_mean": 5695.2,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 1.0260336906584993,
|
|
"grad_norm": 0.4323156308860325,
|
|
"learning_rate": 3.974081671809376e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22385263442993164,
|
|
"step": 670,
|
|
"valid_targets_mean": 5580.4,
|
|
"valid_targets_min": 3056
|
|
},
|
|
{
|
|
"epoch": 1.0336906584992342,
|
|
"grad_norm": 0.47472316536136555,
|
|
"learning_rate": 3.97284158148389e-05,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535420060157776,
|
|
"step": 675,
|
|
"valid_targets_mean": 5442.3,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 1.0413476263399695,
|
|
"grad_norm": 0.462265375894869,
|
|
"learning_rate": 3.971572716245344e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21901056170463562,
|
|
"step": 680,
|
|
"valid_targets_mean": 5023.1,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 1.0490045941807045,
|
|
"grad_norm": 0.48248662335215253,
|
|
"learning_rate": 3.970275094600794e-05,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24081990122795105,
|
|
"step": 685,
|
|
"valid_targets_mean": 4785.9,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.0566615620214395,
|
|
"grad_norm": 0.4832238002221963,
|
|
"learning_rate": 3.968948735476721e-05,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2480914294719696,
|
|
"step": 690,
|
|
"valid_targets_mean": 4995.2,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 1.0643185298621747,
|
|
"grad_norm": 0.49185121927326225,
|
|
"learning_rate": 3.9675936582187574e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22463348507881165,
|
|
"step": 695,
|
|
"valid_targets_mean": 4966.1,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 1.0719754977029097,
|
|
"grad_norm": 0.4852510851215775,
|
|
"learning_rate": 3.966209882591404e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23765654861927032,
|
|
"step": 700,
|
|
"valid_targets_mean": 4573.9,
|
|
"valid_targets_min": 1632
|
|
},
|
|
{
|
|
"epoch": 1.0796324655436447,
|
|
"grad_norm": 0.7554692659763621,
|
|
"learning_rate": 3.9647974287777444e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21256719529628754,
|
|
"step": 705,
|
|
"valid_targets_mean": 5479.7,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 1.0872894333843799,
|
|
"grad_norm": 0.5138481666787079,
|
|
"learning_rate": 3.9633563173791454e-05,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24174924194812775,
|
|
"step": 710,
|
|
"valid_targets_mean": 4438.2,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 1.0949464012251149,
|
|
"grad_norm": 0.5480222726912267,
|
|
"learning_rate": 3.961886569414962e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23572365939617157,
|
|
"step": 715,
|
|
"valid_targets_mean": 4192.2,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 1.1026033690658499,
|
|
"grad_norm": 0.4970895044179645,
|
|
"learning_rate": 3.9603882063222254e-05,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2110937386751175,
|
|
"step": 720,
|
|
"valid_targets_mean": 4099.9,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 1.110260336906585,
|
|
"grad_norm": 0.3803758195568299,
|
|
"learning_rate": 3.958861249955336e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19598951935768127,
|
|
"step": 725,
|
|
"valid_targets_mean": 5967.6,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 1.11791730474732,
|
|
"grad_norm": 0.44999478220240136,
|
|
"learning_rate": 3.957305722585742e-05,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24348898231983185,
|
|
"step": 730,
|
|
"valid_targets_mean": 5976.7,
|
|
"valid_targets_min": 3547
|
|
},
|
|
{
|
|
"epoch": 1.125574272588055,
|
|
"grad_norm": 0.6543210798518898,
|
|
"learning_rate": 3.955721646901611e-05,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2491251528263092,
|
|
"step": 735,
|
|
"valid_targets_mean": 4430.6,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 1.13323124042879,
|
|
"grad_norm": 0.5116096170224053,
|
|
"learning_rate": 3.954109046007506e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24491837620735168,
|
|
"step": 740,
|
|
"valid_targets_mean": 5439.1,
|
|
"valid_targets_min": 3023
|
|
},
|
|
{
|
|
"epoch": 1.1408882082695253,
|
|
"grad_norm": 0.4657142210971288,
|
|
"learning_rate": 3.9524679434240426e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513515055179596,
|
|
"step": 745,
|
|
"valid_targets_mean": 5713.0,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 1.1485451761102603,
|
|
"grad_norm": 0.47461273340221555,
|
|
"learning_rate": 3.95079836308755e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22947482764720917,
|
|
"step": 750,
|
|
"valid_targets_mean": 4901.0,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 1.1562021439509955,
|
|
"grad_norm": 0.5357763463044893,
|
|
"learning_rate": 3.94910032934972e-05,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31903350353240967,
|
|
"step": 755,
|
|
"valid_targets_mean": 4305.5,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.1638591117917305,
|
|
"grad_norm": 0.44975376536296746,
|
|
"learning_rate": 3.947373866977251e-05,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732940912246704,
|
|
"step": 760,
|
|
"valid_targets_mean": 5532.4,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 1.1715160796324655,
|
|
"grad_norm": 0.5172389291640855,
|
|
"learning_rate": 3.945619001151487e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2661622166633606,
|
|
"step": 765,
|
|
"valid_targets_mean": 4083.5,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 1.1791730474732005,
|
|
"grad_norm": 0.5218749820672336,
|
|
"learning_rate": 3.9438357574680536e-05,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2443704754114151,
|
|
"step": 770,
|
|
"valid_targets_mean": 4730.9,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 1.1868300153139357,
|
|
"grad_norm": 0.5132882152134416,
|
|
"learning_rate": 3.9420241619364794e-05,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23841838538646698,
|
|
"step": 775,
|
|
"valid_targets_mean": 4711.1,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 1.1944869831546707,
|
|
"grad_norm": 0.4750023702302424,
|
|
"learning_rate": 3.940184240979822e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20796674489974976,
|
|
"step": 780,
|
|
"valid_targets_mean": 4511.1,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.202143950995406,
|
|
"grad_norm": 0.4336368898063612,
|
|
"learning_rate": 3.9383160214342775e-05,
|
|
"loss": 0.214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22842389345169067,
|
|
"step": 785,
|
|
"valid_targets_mean": 5245.5,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 1.209800918836141,
|
|
"grad_norm": 0.461955332746975,
|
|
"learning_rate": 3.9364195305487926e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19852107763290405,
|
|
"step": 790,
|
|
"valid_targets_mean": 4429.8,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 1.217457886676876,
|
|
"grad_norm": 0.44263849837581704,
|
|
"learning_rate": 3.934494795984666e-05,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2133197784423828,
|
|
"step": 795,
|
|
"valid_targets_mean": 6112.6,
|
|
"valid_targets_min": 2092
|
|
},
|
|
{
|
|
"epoch": 1.225114854517611,
|
|
"grad_norm": 0.46310194439122904,
|
|
"learning_rate": 3.932541845815145e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2494787722826004,
|
|
"step": 800,
|
|
"valid_targets_mean": 5680.5,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 1.2327718223583461,
|
|
"grad_norm": 0.534237892887851,
|
|
"learning_rate": 3.930560708525018e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2164071798324585,
|
|
"step": 805,
|
|
"valid_targets_mean": 4722.7,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 1.2404287901990811,
|
|
"grad_norm": 0.5447789231267499,
|
|
"learning_rate": 3.9285514130101916e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2128959447145462,
|
|
"step": 810,
|
|
"valid_targets_mean": 5702.2,
|
|
"valid_targets_min": 2581
|
|
},
|
|
{
|
|
"epoch": 1.2480857580398161,
|
|
"grad_norm": 0.4541670432548241,
|
|
"learning_rate": 3.926513988577282e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24208541214466095,
|
|
"step": 815,
|
|
"valid_targets_mean": 5627.6,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 1.2557427258805514,
|
|
"grad_norm": 0.5066058652602967,
|
|
"learning_rate": 3.924448464943174e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20324429869651794,
|
|
"step": 820,
|
|
"valid_targets_mean": 5178.3,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 1.2633996937212864,
|
|
"grad_norm": 0.4824685977219757,
|
|
"learning_rate": 3.922354872234596e-05,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21685411036014557,
|
|
"step": 825,
|
|
"valid_targets_mean": 5625.1,
|
|
"valid_targets_min": 2583
|
|
},
|
|
{
|
|
"epoch": 1.2710566615620214,
|
|
"grad_norm": 0.5000431811675772,
|
|
"learning_rate": 3.9202332409876814e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22782090306282043,
|
|
"step": 830,
|
|
"valid_targets_mean": 4467.5,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 1.2787136294027566,
|
|
"grad_norm": 0.46323331152102815,
|
|
"learning_rate": 3.918083602147515e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2275102138519287,
|
|
"step": 835,
|
|
"valid_targets_mean": 5634.8,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 1.2863705972434916,
|
|
"grad_norm": 0.4773797488887894,
|
|
"learning_rate": 3.91590598706769e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24412119388580322,
|
|
"step": 840,
|
|
"valid_targets_mean": 4812.3,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 1.2940275650842268,
|
|
"grad_norm": 0.5076582879336042,
|
|
"learning_rate": 3.913700427509847e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2958303689956665,
|
|
"step": 845,
|
|
"valid_targets_mean": 5836.1,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.3016845329249618,
|
|
"grad_norm": 0.5949340076342235,
|
|
"learning_rate": 3.911466955643209e-05,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23251014947891235,
|
|
"step": 850,
|
|
"valid_targets_mean": 5130.4,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 1.3093415007656968,
|
|
"grad_norm": 0.4670023051943422,
|
|
"learning_rate": 3.909205604044119e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2273961305618286,
|
|
"step": 855,
|
|
"valid_targets_mean": 5627.8,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 1.3169984686064318,
|
|
"grad_norm": 0.4559961360039699,
|
|
"learning_rate": 3.9069164056955556e-05,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20973217487335205,
|
|
"step": 860,
|
|
"valid_targets_mean": 5071.0,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 1.324655436447167,
|
|
"grad_norm": 1.055965972410453,
|
|
"learning_rate": 3.90459939398666e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2360004484653473,
|
|
"step": 865,
|
|
"valid_targets_mean": 4669.8,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 1.332312404287902,
|
|
"grad_norm": 0.4403899930388177,
|
|
"learning_rate": 3.902254602712242e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21914419531822205,
|
|
"step": 870,
|
|
"valid_targets_mean": 5948.8,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 1.339969372128637,
|
|
"grad_norm": 0.49919259365975516,
|
|
"learning_rate": 3.899882066072296e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2369828075170517,
|
|
"step": 875,
|
|
"valid_targets_mean": 4776.4,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 1.3476263399693722,
|
|
"grad_norm": 0.465536683491622,
|
|
"learning_rate": 3.897481818671493e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21700987219810486,
|
|
"step": 880,
|
|
"valid_targets_mean": 5091.3,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 1.3552833078101072,
|
|
"grad_norm": 0.38610096562448637,
|
|
"learning_rate": 3.895053895518679e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20991790294647217,
|
|
"step": 885,
|
|
"valid_targets_mean": 6598.3,
|
|
"valid_targets_min": 3662
|
|
},
|
|
{
|
|
"epoch": 1.3629402756508422,
|
|
"grad_norm": 0.3931810607665223,
|
|
"learning_rate": 3.892598332026368e-05,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20161226391792297,
|
|
"step": 890,
|
|
"valid_targets_mean": 5230.8,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 1.3705972434915774,
|
|
"grad_norm": 0.46758214305344503,
|
|
"learning_rate": 3.8901151640102214e-05,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2412872463464737,
|
|
"step": 895,
|
|
"valid_targets_mean": 5018.2,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.3782542113323124,
|
|
"grad_norm": 0.47535688154677647,
|
|
"learning_rate": 3.8876044276885264e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20891433954238892,
|
|
"step": 900,
|
|
"valid_targets_mean": 4952.4,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 1.3859111791730474,
|
|
"grad_norm": 0.3800112743149217,
|
|
"learning_rate": 3.885066159681668e-05,
|
|
"loss": 0.2212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20787039399147034,
|
|
"step": 905,
|
|
"valid_targets_mean": 5950.2,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 1.3935681470137826,
|
|
"grad_norm": 0.5717375079447776,
|
|
"learning_rate": 3.882500397011597e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2375756800174713,
|
|
"step": 910,
|
|
"valid_targets_mean": 4286.0,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 1.4012251148545176,
|
|
"grad_norm": 0.4423799703448592,
|
|
"learning_rate": 3.8799071771012865e-05,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2233145534992218,
|
|
"step": 915,
|
|
"valid_targets_mean": 5010.4,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 1.4088820826952526,
|
|
"grad_norm": 0.7674568548996449,
|
|
"learning_rate": 3.877286537774187e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25533410906791687,
|
|
"step": 920,
|
|
"valid_targets_mean": 5108.3,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 1.4165390505359878,
|
|
"grad_norm": 0.4672643825009114,
|
|
"learning_rate": 3.874638517253676e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24158219993114471,
|
|
"step": 925,
|
|
"valid_targets_mean": 4805.2,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 1.4241960183767228,
|
|
"grad_norm": 0.4803000711402261,
|
|
"learning_rate": 3.871963154162501e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2522413730621338,
|
|
"step": 930,
|
|
"valid_targets_mean": 4650.3,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 1.4318529862174578,
|
|
"grad_norm": 0.4622132223927811,
|
|
"learning_rate": 3.869260487522213e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2327278107404709,
|
|
"step": 935,
|
|
"valid_targets_mean": 4991.4,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 1.439509954058193,
|
|
"grad_norm": 0.4466120611595715,
|
|
"learning_rate": 3.866530556752601e-05,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2425045520067215,
|
|
"step": 940,
|
|
"valid_targets_mean": 4719.2,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 1.447166921898928,
|
|
"grad_norm": 0.4573816713524243,
|
|
"learning_rate": 3.8637734016711144e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2244962453842163,
|
|
"step": 945,
|
|
"valid_targets_mean": 4643.3,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.454823889739663,
|
|
"grad_norm": 0.458299903716274,
|
|
"learning_rate": 3.860989062492284e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2286277562379837,
|
|
"step": 950,
|
|
"valid_targets_mean": 4489.7,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 1.462480857580398,
|
|
"grad_norm": 0.5032857134165045,
|
|
"learning_rate": 3.858177579827133e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26451292634010315,
|
|
"step": 955,
|
|
"valid_targets_mean": 6107.7,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 1.4701378254211332,
|
|
"grad_norm": 0.4842909516880842,
|
|
"learning_rate": 3.8553389946825896e-05,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24591636657714844,
|
|
"step": 960,
|
|
"valid_targets_mean": 4898.8,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 1.4777947932618682,
|
|
"grad_norm": 0.4073502413011468,
|
|
"learning_rate": 3.8524733484608824e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20459729433059692,
|
|
"step": 965,
|
|
"valid_targets_mean": 6015.3,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 1.4854517611026035,
|
|
"grad_norm": 0.40576274013727376,
|
|
"learning_rate": 3.8495806829589416e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21941059827804565,
|
|
"step": 970,
|
|
"valid_targets_mean": 5581.0,
|
|
"valid_targets_min": 2235
|
|
},
|
|
{
|
|
"epoch": 1.4931087289433385,
|
|
"grad_norm": 0.6974578241353532,
|
|
"learning_rate": 3.8466610403677874e-05,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19623053073883057,
|
|
"step": 975,
|
|
"valid_targets_mean": 5069.4,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 1.5007656967840735,
|
|
"grad_norm": 0.45967960346904296,
|
|
"learning_rate": 3.8437144632719136e-05,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2094549983739853,
|
|
"step": 980,
|
|
"valid_targets_mean": 4371.3,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 1.5084226646248085,
|
|
"grad_norm": 0.42898168106530515,
|
|
"learning_rate": 3.840740994648669e-05,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20533907413482666,
|
|
"step": 985,
|
|
"valid_targets_mean": 4905.2,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 1.5160796324655437,
|
|
"grad_norm": 0.5771406355795161,
|
|
"learning_rate": 3.837740677867628e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26875361800193787,
|
|
"step": 990,
|
|
"valid_targets_mean": 4721.5,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 1.5237366003062787,
|
|
"grad_norm": 0.4902824785618498,
|
|
"learning_rate": 3.8347135566899616e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2583337128162384,
|
|
"step": 995,
|
|
"valid_targets_mean": 4502.8,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 1.5313935681470139,
|
|
"grad_norm": 0.44320917141796234,
|
|
"learning_rate": 3.831659675267793e-05,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20274272561073303,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4891.5,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 1.5390505359877489,
|
|
"grad_norm": 0.44737506920889025,
|
|
"learning_rate": 3.828579078143561e-05,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24201388657093048,
|
|
"step": 1005,
|
|
"valid_targets_mean": 5657.9,
|
|
"valid_targets_min": 3308
|
|
},
|
|
{
|
|
"epoch": 1.5467075038284839,
|
|
"grad_norm": 0.4987453352941786,
|
|
"learning_rate": 3.825471810249365e-05,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23472508788108826,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5547.6,
|
|
"valid_targets_min": 1485
|
|
},
|
|
{
|
|
"epoch": 1.5543644716692189,
|
|
"grad_norm": 0.48227849169472226,
|
|
"learning_rate": 3.822337916906311e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520979642868042,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4113.2,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.562021439509954,
|
|
"grad_norm": 0.40392077613965266,
|
|
"learning_rate": 3.8191774438238514e-05,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21043488383293152,
|
|
"step": 1020,
|
|
"valid_targets_mean": 5685.4,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 1.569678407350689,
|
|
"grad_norm": 0.4284123643349941,
|
|
"learning_rate": 3.815990437099118e-05,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21044665575027466,
|
|
"step": 1025,
|
|
"valid_targets_mean": 5459.0,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.5773353751914243,
|
|
"grad_norm": 0.4674790436901617,
|
|
"learning_rate": 3.81277694321625e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23535355925559998,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4770.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.5849923430321593,
|
|
"grad_norm": 0.3788371319532689,
|
|
"learning_rate": 3.809537009045714e-05,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20070108771324158,
|
|
"step": 1035,
|
|
"valid_targets_mean": 5832.2,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 1.5926493108728943,
|
|
"grad_norm": 0.5196182043035402,
|
|
"learning_rate": 3.8062706818436234e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26477953791618347,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3962.2,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 1.6003062787136293,
|
|
"grad_norm": 0.48358135151926473,
|
|
"learning_rate": 3.802978009251046e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25546973943710327,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4565.6,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 1.6079632465543645,
|
|
"grad_norm": 0.4289401594279837,
|
|
"learning_rate": 3.799659039293312e-05,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2194889783859253,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5301.4,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 1.6156202143950995,
|
|
"grad_norm": 0.451232811004955,
|
|
"learning_rate": 3.796313820379313e-05,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2255152463912964,
|
|
"step": 1055,
|
|
"valid_targets_mean": 5610.7,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 1.6232771822358347,
|
|
"grad_norm": 0.4193660210903287,
|
|
"learning_rate": 3.792942401300792e-05,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20200134813785553,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5545.6,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 1.6309341500765697,
|
|
"grad_norm": 0.4869469520460917,
|
|
"learning_rate": 3.789544831231639e-05,
|
|
"loss": 0.219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2149246633052826,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3840.6,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 1.6385911179173047,
|
|
"grad_norm": 0.8289501528499609,
|
|
"learning_rate": 3.7861211597271655e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23823891580104828,
|
|
"step": 1070,
|
|
"valid_targets_mean": 5751.8,
|
|
"valid_targets_min": 3017
|
|
},
|
|
{
|
|
"epoch": 1.6462480857580397,
|
|
"grad_norm": 0.3834423104766572,
|
|
"learning_rate": 3.782671436723389e-05,
|
|
"loss": 0.2078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19142815470695496,
|
|
"step": 1075,
|
|
"valid_targets_mean": 6619.1,
|
|
"valid_targets_min": 3373
|
|
},
|
|
{
|
|
"epoch": 1.653905053598775,
|
|
"grad_norm": 0.48705512770145654,
|
|
"learning_rate": 3.779195712536301e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23004969954490662,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4073.9,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 1.66156202143951,
|
|
"grad_norm": 0.4542717256577794,
|
|
"learning_rate": 3.775694037861134e-05,
|
|
"loss": 0.2153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2153184711933136,
|
|
"step": 1085,
|
|
"valid_targets_mean": 5164.8,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 1.6692189892802451,
|
|
"grad_norm": 0.45851010452010027,
|
|
"learning_rate": 3.772166463771619e-05,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2138391137123108,
|
|
"step": 1090,
|
|
"valid_targets_mean": 5276.1,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 1.6768759571209801,
|
|
"grad_norm": 0.5506935584762426,
|
|
"learning_rate": 3.768613041719247e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22046592831611633,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4694.3,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 1.6845329249617151,
|
|
"grad_norm": 0.46507047548755864,
|
|
"learning_rate": 3.765033823532514e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22383110225200653,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4433.4,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 1.6921898928024501,
|
|
"grad_norm": 0.5379797863275028,
|
|
"learning_rate": 3.7614288614161625e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27269816398620605,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3752.3,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 1.6998468606431854,
|
|
"grad_norm": 0.43058162892643365,
|
|
"learning_rate": 3.7577982079504284e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2153037041425705,
|
|
"step": 1110,
|
|
"valid_targets_mean": 5146.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 1.7075038284839203,
|
|
"grad_norm": 0.44249590505034403,
|
|
"learning_rate": 3.754141916090266e-05,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20741818845272064,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5802.1,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.7151607963246556,
|
|
"grad_norm": 0.458772872006437,
|
|
"learning_rate": 3.750460039164581e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24594131112098694,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4699.8,
|
|
"valid_targets_min": 184
|
|
},
|
|
{
|
|
"epoch": 1.7228177641653906,
|
|
"grad_norm": 0.7672926531697714,
|
|
"learning_rate": 3.746752630875448e-05,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24634478986263275,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3561.6,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 1.7304747320061256,
|
|
"grad_norm": 0.43972274729807126,
|
|
"learning_rate": 3.743019745297332e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22277215123176575,
|
|
"step": 1130,
|
|
"valid_targets_mean": 5361.2,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 1.7381316998468606,
|
|
"grad_norm": 0.4516327506576068,
|
|
"learning_rate": 3.739261436876296e-05,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23480652272701263,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4729.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.7457886676875956,
|
|
"grad_norm": 0.4949615258488163,
|
|
"learning_rate": 3.73547776042921e-05,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2477773129940033,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4868.1,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 1.7534456355283308,
|
|
"grad_norm": 0.4717486814671122,
|
|
"learning_rate": 3.731668771142946e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20434430241584778,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4753.1,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 1.761102603369066,
|
|
"grad_norm": 0.43298582473400343,
|
|
"learning_rate": 3.727834524573582e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23231241106987,
|
|
"step": 1150,
|
|
"valid_targets_mean": 5028.3,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 1.768759571209801,
|
|
"grad_norm": 0.4520348267564731,
|
|
"learning_rate": 3.7239750766455826e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22502996027469635,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4861.9,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 1.776416539050536,
|
|
"grad_norm": 0.5150942320605032,
|
|
"learning_rate": 3.720090483650988e-05,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2381991147994995,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4304.5,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 1.784073506891271,
|
|
"grad_norm": 0.5161348929268305,
|
|
"learning_rate": 3.7161808022485935e-05,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21709483861923218,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4438.7,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.791730474732006,
|
|
"grad_norm": 0.49665971280240234,
|
|
"learning_rate": 3.7122460894631204e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24442270398139954,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4556.2,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 1.7993874425727412,
|
|
"grad_norm": 0.8745968372963754,
|
|
"learning_rate": 3.708286402684387e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23762422800064087,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4245.6,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 1.8070444104134764,
|
|
"grad_norm": 0.41278379589280434,
|
|
"learning_rate": 3.704301799666469e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20009422302246094,
|
|
"step": 1180,
|
|
"valid_targets_mean": 5268.8,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 1.8147013782542114,
|
|
"grad_norm": 0.48742732045154236,
|
|
"learning_rate": 3.700292338526858e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24549022316932678,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4213.9,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 1.8223583460949464,
|
|
"grad_norm": 0.4712771047324667,
|
|
"learning_rate": 3.696258077745616e-05,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24899449944496155,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4609.4,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 1.8300153139356814,
|
|
"grad_norm": 0.44975298875115,
|
|
"learning_rate": 3.6921990761645185e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21963663399219513,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4574.2,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 1.8376722817764164,
|
|
"grad_norm": 0.3951327552159224,
|
|
"learning_rate": 3.6881153929861995e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2228214144706726,
|
|
"step": 1200,
|
|
"valid_targets_mean": 6362.1,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 1.8453292496171516,
|
|
"grad_norm": 0.42372166371340003,
|
|
"learning_rate": 3.684007087773287e-05,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21967345476150513,
|
|
"step": 1205,
|
|
"valid_targets_mean": 5326.0,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 1.8529862174578868,
|
|
"grad_norm": 0.5177828906855314,
|
|
"learning_rate": 3.679874220447533e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23139753937721252,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3876.4,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 1.8606431852986218,
|
|
"grad_norm": 0.4257753688447525,
|
|
"learning_rate": 3.675716851288942e-05,
|
|
"loss": 0.2101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21256472170352936,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5345.1,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 1.8683001531393568,
|
|
"grad_norm": 0.41030398631754234,
|
|
"learning_rate": 3.671535040934889e-05,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21335265040397644,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4680.1,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 1.8759571209800918,
|
|
"grad_norm": 0.430169115187478,
|
|
"learning_rate": 3.667328850379238e-05,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20409463346004486,
|
|
"step": 1225,
|
|
"valid_targets_mean": 5608.6,
|
|
"valid_targets_min": 2533
|
|
},
|
|
{
|
|
"epoch": 1.8836140888208268,
|
|
"grad_norm": 0.42025014500169766,
|
|
"learning_rate": 3.6630983409714494e-05,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21930241584777832,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5487.7,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 1.891271056661562,
|
|
"grad_norm": 0.5021327369610216,
|
|
"learning_rate": 3.6588435744156865e-05,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19022399187088013,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5496.7,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 1.8989280245022973,
|
|
"grad_norm": 0.540321749042443,
|
|
"learning_rate": 3.654564612769917e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2716904878616333,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3926.7,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 1.9065849923430322,
|
|
"grad_norm": 0.4729363531042848,
|
|
"learning_rate": 3.650261518445006e-05,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21322724223136902,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5020.2,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 1.9142419601837672,
|
|
"grad_norm": 0.4446714421133037,
|
|
"learning_rate": 3.6459343542038056e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20671671628952026,
|
|
"step": 1250,
|
|
"valid_targets_mean": 5615.5,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 1.9218989280245022,
|
|
"grad_norm": 0.3944661996414203,
|
|
"learning_rate": 3.64158318316024e-05,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21900799870491028,
|
|
"step": 1255,
|
|
"valid_targets_mean": 6057.3,
|
|
"valid_targets_min": 3739
|
|
},
|
|
{
|
|
"epoch": 1.9295558958652372,
|
|
"grad_norm": 0.4723508700169769,
|
|
"learning_rate": 3.6372080687783864e-05,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22037751972675323,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4365.8,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.9372128637059725,
|
|
"grad_norm": 0.45131110819991693,
|
|
"learning_rate": 3.632809074871546e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20661242306232452,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4677.1,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.9448698315467075,
|
|
"grad_norm": 0.5463896519037112,
|
|
"learning_rate": 3.628386265601317e-05,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21639251708984375,
|
|
"step": 1270,
|
|
"valid_targets_mean": 5609.1,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 1.9525267993874427,
|
|
"grad_norm": 0.47101652563636187,
|
|
"learning_rate": 3.623939705476655e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689882516860962,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4807.4,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 1.9601837672281777,
|
|
"grad_norm": 0.5829538504135483,
|
|
"learning_rate": 3.619469459352937e-05,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19414472579956055,
|
|
"step": 1280,
|
|
"valid_targets_mean": 5776.8,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 1.9678407350689127,
|
|
"grad_norm": 0.47130592895075707,
|
|
"learning_rate": 3.614975592431009e-05,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2240133434534073,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4178.6,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 1.9754977029096477,
|
|
"grad_norm": 0.41933414032795246,
|
|
"learning_rate": 3.6104581702562406e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2266603261232376,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5151.5,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 1.9831546707503829,
|
|
"grad_norm": 0.4099884275428022,
|
|
"learning_rate": 3.605917258717567e-05,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2012246549129486,
|
|
"step": 1295,
|
|
"valid_targets_mean": 5393.4,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 1.9908116385911179,
|
|
"grad_norm": 0.3989801489279899,
|
|
"learning_rate": 3.6013529240465284e-05,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19280970096588135,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5744.2,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 1.998468606431853,
|
|
"grad_norm": 0.402617048006408,
|
|
"learning_rate": 3.596765232816301e-05,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2282564640045166,
|
|
"step": 1305,
|
|
"valid_targets_mean": 5270.4,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 2.006125574272588,
|
|
"grad_norm": 0.4373173508442768,
|
|
"learning_rate": 3.5921542519407305e-05,
|
|
"loss": 0.212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20129822194576263,
|
|
"step": 1310,
|
|
"valid_targets_mean": 5563.7,
|
|
"valid_targets_min": 1904
|
|
},
|
|
{
|
|
"epoch": 2.013782542113323,
|
|
"grad_norm": 0.40992921140400285,
|
|
"learning_rate": 3.587520048673354e-05,
|
|
"loss": 0.206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1945541501045227,
|
|
"step": 1315,
|
|
"valid_targets_mean": 5979.3,
|
|
"valid_targets_min": 3547
|
|
},
|
|
{
|
|
"epoch": 2.021439509954058,
|
|
"grad_norm": 0.4970730710034263,
|
|
"learning_rate": 3.582862690606419e-05,
|
|
"loss": 0.2077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19818368554115295,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4507.1,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 2.029096477794793,
|
|
"grad_norm": 0.475172498602143,
|
|
"learning_rate": 3.578182245669896e-05,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19407986104488373,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4450.6,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 2.0367534456355285,
|
|
"grad_norm": 0.47059170257977145,
|
|
"learning_rate": 3.573478782130494e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2392202615737915,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4831.3,
|
|
"valid_targets_min": 1514
|
|
},
|
|
{
|
|
"epoch": 2.0444104134762635,
|
|
"grad_norm": 0.528011468967076,
|
|
"learning_rate": 3.5687523685906535e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22966377437114716,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3790.3,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 2.0520673813169985,
|
|
"grad_norm": 0.5074078705476603,
|
|
"learning_rate": 3.564003073987559e-05,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24435581266880035,
|
|
"step": 1340,
|
|
"valid_targets_mean": 4693.4,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 2.0597243491577335,
|
|
"grad_norm": 0.3844129003728788,
|
|
"learning_rate": 3.559230967592123e-05,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17261341214179993,
|
|
"step": 1345,
|
|
"valid_targets_mean": 5297.9,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 2.0673813169984685,
|
|
"grad_norm": 0.4224982802578748,
|
|
"learning_rate": 3.554436119007982e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2082083821296692,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4975.2,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 2.0750382848392035,
|
|
"grad_norm": 0.437850384380176,
|
|
"learning_rate": 3.5496185981704775e-05,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20134752988815308,
|
|
"step": 1355,
|
|
"valid_targets_mean": 5147.9,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 2.082695252679939,
|
|
"grad_norm": 0.4207133103422171,
|
|
"learning_rate": 3.544778475345639e-05,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2060418426990509,
|
|
"step": 1360,
|
|
"valid_targets_mean": 5071.6,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 2.090352220520674,
|
|
"grad_norm": 1.3112730233999392,
|
|
"learning_rate": 3.539915821129156e-05,
|
|
"loss": 0.2027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2119438499212265,
|
|
"step": 1365,
|
|
"valid_targets_mean": 5674.6,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 2.098009188361409,
|
|
"grad_norm": 0.5554558757769965,
|
|
"learning_rate": 3.535030706445352e-05,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21164335310459137,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4558.6,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 2.105666156202144,
|
|
"grad_norm": 0.4420544764278569,
|
|
"learning_rate": 3.530123202546146e-05,
|
|
"loss": 0.2047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2061401605606079,
|
|
"step": 1375,
|
|
"valid_targets_mean": 5457.4,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 2.113323124042879,
|
|
"grad_norm": 0.46858657202849646,
|
|
"learning_rate": 3.525193381010015e-05,
|
|
"loss": 0.2015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17351192235946655,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4331.8,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 2.120980091883614,
|
|
"grad_norm": 0.4114806177904376,
|
|
"learning_rate": 3.520241313740954e-05,
|
|
"loss": 0.1985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18135160207748413,
|
|
"step": 1385,
|
|
"valid_targets_mean": 5179.8,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 2.1286370597243494,
|
|
"grad_norm": 0.4150389909635014,
|
|
"learning_rate": 3.51526707296742e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21126386523246765,
|
|
"step": 1390,
|
|
"valid_targets_mean": 5510.6,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 2.1362940275650844,
|
|
"grad_norm": 0.4189310905969695,
|
|
"learning_rate": 3.510270731241282e-05,
|
|
"loss": 0.1986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2002798169851303,
|
|
"step": 1395,
|
|
"valid_targets_mean": 6350.9,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 2.1439509954058193,
|
|
"grad_norm": 0.41400366324510934,
|
|
"learning_rate": 3.505252361436765e-05,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1952013075351715,
|
|
"step": 1400,
|
|
"valid_targets_mean": 5682.1,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 2.1516079632465543,
|
|
"grad_norm": 0.49444430141106194,
|
|
"learning_rate": 3.500212036749382e-05,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22853989899158478,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4394.3,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 2.1592649310872893,
|
|
"grad_norm": 0.4644344666905227,
|
|
"learning_rate": 3.495149830694872e-05,
|
|
"loss": 0.202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1914132535457611,
|
|
"step": 1410,
|
|
"valid_targets_mean": 5267.1,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 2.1669218989280243,
|
|
"grad_norm": 0.45615206271339814,
|
|
"learning_rate": 3.490065817108124e-05,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20476290583610535,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4469.4,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 2.1745788667687598,
|
|
"grad_norm": 0.47393370598028417,
|
|
"learning_rate": 3.484960070142102e-05,
|
|
"loss": 0.2204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22236575186252594,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4936.5,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 2.1822358346094948,
|
|
"grad_norm": 0.4253170924277631,
|
|
"learning_rate": 3.4798326642667587e-05,
|
|
"loss": 0.2047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18139034509658813,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4734.9,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 2.1898928024502298,
|
|
"grad_norm": 0.49911646366760776,
|
|
"learning_rate": 3.474683674267959e-05,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20690619945526123,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5103.0,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 2.1975497702909648,
|
|
"grad_norm": 0.49119882276790566,
|
|
"learning_rate": 3.469513175246379e-05,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1835339367389679,
|
|
"step": 1435,
|
|
"valid_targets_mean": 4820.6,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 2.2052067381316998,
|
|
"grad_norm": 0.5154475611308721,
|
|
"learning_rate": 3.464321242616418e-05,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19209043681621552,
|
|
"step": 1440,
|
|
"valid_targets_mean": 4761.6,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 2.2128637059724348,
|
|
"grad_norm": 0.45164627691834747,
|
|
"learning_rate": 3.459107952105091e-05,
|
|
"loss": 0.2153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1891171634197235,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5681.1,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 2.22052067381317,
|
|
"grad_norm": 0.5335767275763915,
|
|
"learning_rate": 3.4538733797509355e-05,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2298070639371872,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4577.5,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 2.228177641653905,
|
|
"grad_norm": 0.4679949420186815,
|
|
"learning_rate": 3.44861760190289e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19955570995807648,
|
|
"step": 1455,
|
|
"valid_targets_mean": 4832.8,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 2.23583460949464,
|
|
"grad_norm": 0.42095307623333295,
|
|
"learning_rate": 3.443340695219188e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1974397748708725,
|
|
"step": 1460,
|
|
"valid_targets_mean": 5598.1,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 2.243491577335375,
|
|
"grad_norm": 0.4745463301945368,
|
|
"learning_rate": 3.4380427366662376e-05,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1813713014125824,
|
|
"step": 1465,
|
|
"valid_targets_mean": 5202.5,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 2.25114854517611,
|
|
"grad_norm": 0.39487703795653906,
|
|
"learning_rate": 3.432723803517501e-05,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1820264607667923,
|
|
"step": 1470,
|
|
"valid_targets_mean": 6077.2,
|
|
"valid_targets_min": 3521
|
|
},
|
|
{
|
|
"epoch": 2.258805513016845,
|
|
"grad_norm": 0.5769539475945872,
|
|
"learning_rate": 3.427383973352363e-05,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2072950303554535,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3227.7,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 2.26646248085758,
|
|
"grad_norm": 0.4288473452558018,
|
|
"learning_rate": 3.422023324055005e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17287051677703857,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5125.8,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 2.2741194486983156,
|
|
"grad_norm": 0.4123498873195868,
|
|
"learning_rate": 3.4166419338132636e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2294299602508545,
|
|
"step": 1485,
|
|
"valid_targets_mean": 5843.1,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 2.2817764165390506,
|
|
"grad_norm": 0.4730663649794135,
|
|
"learning_rate": 3.411239881117494e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22474753856658936,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4959.0,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 2.2894333843797856,
|
|
"grad_norm": 0.3875549685911907,
|
|
"learning_rate": 3.4058172447594255e-05,
|
|
"loss": 0.1889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1825382262468338,
|
|
"step": 1495,
|
|
"valid_targets_mean": 6264.6,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.2970903522205206,
|
|
"grad_norm": 0.4656618705136615,
|
|
"learning_rate": 3.400374103831007e-05,
|
|
"loss": 0.216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21203726530075073,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4578.1,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 2.3047473200612556,
|
|
"grad_norm": 0.5002570246078484,
|
|
"learning_rate": 3.394910537723259e-05,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21089878678321838,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4343.4,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 2.312404287901991,
|
|
"grad_norm": 0.452438730699335,
|
|
"learning_rate": 3.3894266261251163e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18554329872131348,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4893.6,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 2.320061255742726,
|
|
"grad_norm": 0.45774747263893145,
|
|
"learning_rate": 3.3839224490222594e-05,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18411816656589508,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4389.2,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 2.327718223583461,
|
|
"grad_norm": 0.4396767692269071,
|
|
"learning_rate": 3.378398086695954e-05,
|
|
"loss": 0.1967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19679993391036987,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4803.2,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 2.335375191424196,
|
|
"grad_norm": 0.49013046313660535,
|
|
"learning_rate": 3.372853619721876e-05,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20178654789924622,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4873.6,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 2.343032159264931,
|
|
"grad_norm": 0.3922824720718304,
|
|
"learning_rate": 3.367289128968939e-05,
|
|
"loss": 0.1947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16767504811286926,
|
|
"step": 1530,
|
|
"valid_targets_mean": 6036.3,
|
|
"valid_targets_min": 1865
|
|
},
|
|
{
|
|
"epoch": 2.350689127105666,
|
|
"grad_norm": 0.5498579719590082,
|
|
"learning_rate": 3.361704695598115e-05,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22038382291793823,
|
|
"step": 1535,
|
|
"valid_targets_mean": 5159.2,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 2.358346094946401,
|
|
"grad_norm": 0.4055300698729176,
|
|
"learning_rate": 3.3561004010612466e-05,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19593411684036255,
|
|
"step": 1540,
|
|
"valid_targets_mean": 5894.4,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 2.3660030627871365,
|
|
"grad_norm": 0.3932581284453595,
|
|
"learning_rate": 3.3504763270998634e-05,
|
|
"loss": 0.2148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20369234681129456,
|
|
"step": 1545,
|
|
"valid_targets_mean": 6333.2,
|
|
"valid_targets_min": 2885
|
|
},
|
|
{
|
|
"epoch": 2.3736600306278715,
|
|
"grad_norm": 0.47076653510770716,
|
|
"learning_rate": 3.344832555743988e-05,
|
|
"loss": 0.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21554359793663025,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4756.6,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 2.3813169984686064,
|
|
"grad_norm": 0.48873017026939236,
|
|
"learning_rate": 3.33916916931094e-05,
|
|
"loss": 0.2117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22236159443855286,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4995.3,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 2.3889739663093414,
|
|
"grad_norm": 0.4125303912579736,
|
|
"learning_rate": 3.3334862504041336e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20083507895469666,
|
|
"step": 1560,
|
|
"valid_targets_mean": 5715.2,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 2.3966309341500764,
|
|
"grad_norm": 0.3853433874516468,
|
|
"learning_rate": 3.327783881911876e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17324942350387573,
|
|
"step": 1565,
|
|
"valid_targets_mean": 6147.9,
|
|
"valid_targets_min": 4254
|
|
},
|
|
{
|
|
"epoch": 2.404287901990812,
|
|
"grad_norm": 0.4211655279711124,
|
|
"learning_rate": 3.322062147006156e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18347764015197754,
|
|
"step": 1570,
|
|
"valid_targets_mean": 5185.8,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 2.411944869831547,
|
|
"grad_norm": 0.648964580388655,
|
|
"learning_rate": 3.3163211291414304e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19629991054534912,
|
|
"step": 1575,
|
|
"valid_targets_mean": 4286.4,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 2.419601837672282,
|
|
"grad_norm": 0.4962453331492355,
|
|
"learning_rate": 3.310560912053409e-05,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2081303745508194,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4552.9,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 2.427258805513017,
|
|
"grad_norm": 0.4555354604900568,
|
|
"learning_rate": 3.304781579757833e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17186057567596436,
|
|
"step": 1585,
|
|
"valid_targets_mean": 5026.6,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 2.434915773353752,
|
|
"grad_norm": 0.4285448366359402,
|
|
"learning_rate": 3.298983216549248e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2216184288263321,
|
|
"step": 1590,
|
|
"valid_targets_mean": 5668.2,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 2.442572741194487,
|
|
"grad_norm": 0.5893928303619657,
|
|
"learning_rate": 3.2931659069997735e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21629182994365692,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3893.3,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 2.450229709035222,
|
|
"grad_norm": 0.48884082069491913,
|
|
"learning_rate": 3.287329735957874e-05,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2368190437555313,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4901.4,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 2.4578866768759573,
|
|
"grad_norm": 0.52785052949108,
|
|
"learning_rate": 3.281474788547118e-05,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22330242395401,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4853.1,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 2.4655436447166923,
|
|
"grad_norm": 0.3987288953747758,
|
|
"learning_rate": 3.275601150164935e-05,
|
|
"loss": 0.206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2003602832555771,
|
|
"step": 1610,
|
|
"valid_targets_mean": 5620.3,
|
|
"valid_targets_min": 2039
|
|
},
|
|
{
|
|
"epoch": 2.4732006125574273,
|
|
"grad_norm": 0.47491465305943875,
|
|
"learning_rate": 3.269708906481374e-05,
|
|
"loss": 0.1866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1774139255285263,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4716.2,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 2.4808575803981623,
|
|
"grad_norm": 0.44594046656836617,
|
|
"learning_rate": 3.263798143437851e-05,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20561468601226807,
|
|
"step": 1620,
|
|
"valid_targets_mean": 5183.3,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 2.4885145482388973,
|
|
"grad_norm": 0.42727062717581415,
|
|
"learning_rate": 3.2578689472458976e-05,
|
|
"loss": 0.1963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18722905218601227,
|
|
"step": 1625,
|
|
"valid_targets_mean": 5382.0,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 2.4961715160796323,
|
|
"grad_norm": 0.5834450873795829,
|
|
"learning_rate": 3.251921404385901e-05,
|
|
"loss": 0.2102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2354370504617691,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4581.8,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 2.5038284839203673,
|
|
"grad_norm": 0.5345211557255457,
|
|
"learning_rate": 3.245955601605845e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2337321937084198,
|
|
"step": 1635,
|
|
"valid_targets_mean": 4913.5,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 2.5114854517611027,
|
|
"grad_norm": 0.37333243741751854,
|
|
"learning_rate": 3.239971625920043e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19104315340518951,
|
|
"step": 1640,
|
|
"valid_targets_mean": 5793.1,
|
|
"valid_targets_min": 2257
|
|
},
|
|
{
|
|
"epoch": 2.5191424196018377,
|
|
"grad_norm": 0.5550955042373348,
|
|
"learning_rate": 3.23396956460787e-05,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21975022554397583,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3863.6,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 2.5267993874425727,
|
|
"grad_norm": 0.4228650870100245,
|
|
"learning_rate": 3.2279495052124884e-05,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1825849711894989,
|
|
"step": 1650,
|
|
"valid_targets_mean": 5644.5,
|
|
"valid_targets_min": 2558
|
|
},
|
|
{
|
|
"epoch": 2.5344563552833077,
|
|
"grad_norm": 0.46523951256002843,
|
|
"learning_rate": 3.2219115355395745e-05,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20776934921741486,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4995.9,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 2.5421133231240427,
|
|
"grad_norm": 0.4225684699080134,
|
|
"learning_rate": 3.2158557436560317e-05,
|
|
"loss": 0.1996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19467893242835999,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4794.1,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 2.549770290964778,
|
|
"grad_norm": 0.4533547241905163,
|
|
"learning_rate": 3.2097822178887114e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20666176080703735,
|
|
"step": 1665,
|
|
"valid_targets_mean": 5426.8,
|
|
"valid_targets_min": 3917
|
|
},
|
|
{
|
|
"epoch": 2.557427258805513,
|
|
"grad_norm": 0.7691377736423072,
|
|
"learning_rate": 3.203691046823124e-05,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768425941467285,
|
|
"step": 1670,
|
|
"valid_targets_mean": 4918.1,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 2.565084226646248,
|
|
"grad_norm": 0.4647968234361939,
|
|
"learning_rate": 3.197582319302143e-05,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2030567228794098,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4793.9,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 2.572741194486983,
|
|
"grad_norm": 0.47503096589440647,
|
|
"learning_rate": 3.191456124424715e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20649227499961853,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4376.4,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 2.580398162327718,
|
|
"grad_norm": 0.38967161506770254,
|
|
"learning_rate": 3.185312551544553e-05,
|
|
"loss": 0.2027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16294370591640472,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5400.3,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 2.5880551301684536,
|
|
"grad_norm": 0.4137207933362876,
|
|
"learning_rate": 3.179151690268842e-05,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20800091326236725,
|
|
"step": 1690,
|
|
"valid_targets_mean": 5615.9,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 2.595712098009188,
|
|
"grad_norm": 0.43845263427784364,
|
|
"learning_rate": 3.1729736304569216e-05,
|
|
"loss": 0.1951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1801062524318695,
|
|
"step": 1695,
|
|
"valid_targets_mean": 5320.6,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 2.6033690658499236,
|
|
"grad_norm": 0.4583531820739542,
|
|
"learning_rate": 3.1667784622189866e-05,
|
|
"loss": 0.201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2043706327676773,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4630.8,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 2.6110260336906586,
|
|
"grad_norm": 0.5834610593137702,
|
|
"learning_rate": 3.160566275914763e-05,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1968202143907547,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4307.7,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 2.6186830015313936,
|
|
"grad_norm": 0.5472036514053488,
|
|
"learning_rate": 3.154337162152196e-05,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23707276582717896,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3378.8,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 2.6263399693721285,
|
|
"grad_norm": 0.44150272015060515,
|
|
"learning_rate": 3.148091211786126e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21165896952152252,
|
|
"step": 1715,
|
|
"valid_targets_mean": 5223.7,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 2.6339969372128635,
|
|
"grad_norm": 0.5441798770208954,
|
|
"learning_rate": 3.141828515916963e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19593748450279236,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5229.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 2.641653905053599,
|
|
"grad_norm": 0.49587618350763246,
|
|
"learning_rate": 3.135549165889361e-05,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22288836538791656,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4101.1,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 2.649310872894334,
|
|
"grad_norm": 0.4169833894502891,
|
|
"learning_rate": 3.1292532532908814e-05,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19745498895645142,
|
|
"step": 1730,
|
|
"valid_targets_mean": 5347.1,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 2.656967840735069,
|
|
"grad_norm": 0.4322133840167338,
|
|
"learning_rate": 3.12294086995066e-05,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17262643575668335,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4846.4,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 2.664624808575804,
|
|
"grad_norm": 0.5366545162742864,
|
|
"learning_rate": 3.116612107938068e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22421014308929443,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4210.1,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 2.672281776416539,
|
|
"grad_norm": 0.45585126107024354,
|
|
"learning_rate": 3.1102670595613654e-05,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19032257795333862,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4588.5,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 2.679938744257274,
|
|
"grad_norm": 0.5669095838367388,
|
|
"learning_rate": 3.10390581736636e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20808973908424377,
|
|
"step": 1750,
|
|
"valid_targets_mean": 4542.7,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 2.687595712098009,
|
|
"grad_norm": 0.42969387483485466,
|
|
"learning_rate": 3.0975284741350535e-05,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.184463769197464,
|
|
"step": 1755,
|
|
"valid_targets_mean": 5571.9,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 2.6952526799387444,
|
|
"grad_norm": 0.4688512511006581,
|
|
"learning_rate": 3.091135122884289e-05,
|
|
"loss": 0.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21699786186218262,
|
|
"step": 1760,
|
|
"valid_targets_mean": 5923.4,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 2.7029096477794794,
|
|
"grad_norm": 0.48031854884765524,
|
|
"learning_rate": 3.084725856864395e-05,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21168527007102966,
|
|
"step": 1765,
|
|
"valid_targets_mean": 5652.4,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 2.7105666156202144,
|
|
"grad_norm": 0.3909694462296888,
|
|
"learning_rate": 3.078300769557827e-05,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16804447770118713,
|
|
"step": 1770,
|
|
"valid_targets_mean": 5812.6,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 2.7182235834609494,
|
|
"grad_norm": 0.44319625905779303,
|
|
"learning_rate": 3.0718599546778e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21331599354743958,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4729.1,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 2.7258805513016844,
|
|
"grad_norm": 0.4495408359076918,
|
|
"learning_rate": 3.065403506166925e-05,
|
|
"loss": 0.2083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20311203598976135,
|
|
"step": 1780,
|
|
"valid_targets_mean": 5382.8,
|
|
"valid_targets_min": 2506
|
|
},
|
|
{
|
|
"epoch": 2.73353751914242,
|
|
"grad_norm": 0.46444461555955624,
|
|
"learning_rate": 3.058931518195834e-05,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21994590759277344,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4179.7,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 2.741194486983155,
|
|
"grad_norm": 0.591047392543951,
|
|
"learning_rate": 3.052444085161818e-05,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20447468757629395,
|
|
"step": 1790,
|
|
"valid_targets_mean": 5012.3,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 2.74885145482389,
|
|
"grad_norm": 0.46171591187608774,
|
|
"learning_rate": 3.0459413016874334e-05,
|
|
"loss": 0.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22244763374328613,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4043.6,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 2.756508422664625,
|
|
"grad_norm": 0.4755204959145075,
|
|
"learning_rate": 3.039423262619137e-05,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23722028732299805,
|
|
"step": 1800,
|
|
"valid_targets_mean": 4548.3,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 2.76416539050536,
|
|
"grad_norm": 0.43118554325602115,
|
|
"learning_rate": 3.0328900630258924e-05,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20272132754325867,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5041.2,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 2.771822358346095,
|
|
"grad_norm": 0.40532349261110173,
|
|
"learning_rate": 3.02634179819779e-05,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18425460159778595,
|
|
"step": 1810,
|
|
"valid_targets_mean": 5047.2,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.77947932618683,
|
|
"grad_norm": 0.40870340425176643,
|
|
"learning_rate": 3.0197785636446516e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19097676873207092,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5106.1,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 2.7871362940275652,
|
|
"grad_norm": 0.4466837704739254,
|
|
"learning_rate": 3.0132004550946438e-05,
|
|
"loss": 0.2052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24000149965286255,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5685.8,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 2.7947932618683002,
|
|
"grad_norm": 0.4157130523321726,
|
|
"learning_rate": 3.006607568492875e-05,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20618754625320435,
|
|
"step": 1825,
|
|
"valid_targets_mean": 5365.9,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 2.8024502297090352,
|
|
"grad_norm": 0.45846280599364925,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.1982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20795312523841858,
|
|
"step": 1830,
|
|
"valid_targets_mean": 5361.0,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 2.8101071975497702,
|
|
"grad_norm": 0.42423011488107815,
|
|
"learning_rate": 2.9933778459908178e-05,
|
|
"loss": 0.1997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2021748125553131,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5163.9,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 2.8177641653905052,
|
|
"grad_norm": 0.5922093821939035,
|
|
"learning_rate": 2.986741203052863e-05,
|
|
"loss": 0.2007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2244371771812439,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5787.4,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 2.8254211332312407,
|
|
"grad_norm": 0.41160121668642163,
|
|
"learning_rate": 2.9800901679849993e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20778921246528625,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5501.7,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 2.8330781010719757,
|
|
"grad_norm": 0.4427215800064586,
|
|
"learning_rate": 2.9734248377960072e-05,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19352000951766968,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5902.9,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 2.8407350689127107,
|
|
"grad_norm": 0.4807115826614534,
|
|
"learning_rate": 2.9667453097031695e-05,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24455857276916504,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4574.5,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 2.8483920367534457,
|
|
"grad_norm": 0.5374002834819784,
|
|
"learning_rate": 2.9600516811308516e-05,
|
|
"loss": 0.2138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22875338792800903,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4644.6,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 2.8560490045941807,
|
|
"grad_norm": 0.5090559947438412,
|
|
"learning_rate": 2.953344049709082e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19830799102783203,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4229.7,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 2.8637059724349156,
|
|
"grad_norm": 0.45737005237173484,
|
|
"learning_rate": 2.9466225132721285e-05,
|
|
"loss": 0.2102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20132991671562195,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4853.1,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 2.8713629402756506,
|
|
"grad_norm": 0.40896772467478815,
|
|
"learning_rate": 2.9398871698570706e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20456016063690186,
|
|
"step": 1875,
|
|
"valid_targets_mean": 5263.5,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 2.879019908116386,
|
|
"grad_norm": 0.46658454506306835,
|
|
"learning_rate": 2.9331381177023685e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20330816507339478,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4714.1,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 2.886676875957121,
|
|
"grad_norm": 0.45821454150634894,
|
|
"learning_rate": 2.9263754552464338e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19405120611190796,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4531.8,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 2.894333843797856,
|
|
"grad_norm": 0.5547254247208179,
|
|
"learning_rate": 2.9195992811261897e-05,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23308220505714417,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3445.6,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 2.901990811638591,
|
|
"grad_norm": 0.45421334304878075,
|
|
"learning_rate": 2.912809694175634e-05,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2073233723640442,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4847.8,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 2.909647779479326,
|
|
"grad_norm": 0.528454036702243,
|
|
"learning_rate": 2.906006793424398e-05,
|
|
"loss": 0.2079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21321921050548553,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3648.8,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 2.9173047473200615,
|
|
"grad_norm": 0.41825864046575006,
|
|
"learning_rate": 2.8991906780963014e-05,
|
|
"loss": 0.194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19795027375221252,
|
|
"step": 1905,
|
|
"valid_targets_mean": 5839.6,
|
|
"valid_targets_min": 2864
|
|
},
|
|
{
|
|
"epoch": 2.924961715160796,
|
|
"grad_norm": 0.41700524250275683,
|
|
"learning_rate": 2.8923614476079053e-05,
|
|
"loss": 0.2008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20309601724147797,
|
|
"step": 1910,
|
|
"valid_targets_mean": 5556.4,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 2.9326186830015315,
|
|
"grad_norm": 0.41390443168776353,
|
|
"learning_rate": 2.885519201567063e-05,
|
|
"loss": 0.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19842462241649628,
|
|
"step": 1915,
|
|
"valid_targets_mean": 5288.7,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 2.9402756508422665,
|
|
"grad_norm": 0.44113143651113673,
|
|
"learning_rate": 2.878664039771466e-05,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1972970962524414,
|
|
"step": 1920,
|
|
"valid_targets_mean": 4468.6,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 2.9479326186830015,
|
|
"grad_norm": 0.42348693430683976,
|
|
"learning_rate": 2.8717960622071875e-05,
|
|
"loss": 0.1982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2290765643119812,
|
|
"step": 1925,
|
|
"valid_targets_mean": 5191.4,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 2.9555895865237365,
|
|
"grad_norm": 0.40281713608998554,
|
|
"learning_rate": 2.8649153690472258e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18338808417320251,
|
|
"step": 1930,
|
|
"valid_targets_mean": 5284.5,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 2.9632465543644715,
|
|
"grad_norm": 0.4313394629221367,
|
|
"learning_rate": 2.858022060650045e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20001380145549774,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4850.2,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 2.970903522205207,
|
|
"grad_norm": 0.4330030636250435,
|
|
"learning_rate": 2.851116237558106e-05,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19025284051895142,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4625.9,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 2.978560490045942,
|
|
"grad_norm": 0.4537727728009908,
|
|
"learning_rate": 2.8441980004964035e-05,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22758567333221436,
|
|
"step": 1945,
|
|
"valid_targets_mean": 5108.7,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 2.986217457886677,
|
|
"grad_norm": 0.5070336992907599,
|
|
"learning_rate": 2.8372674503709988e-05,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22816471755504608,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4112.2,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 2.993874425727412,
|
|
"grad_norm": 0.5222307071784804,
|
|
"learning_rate": 2.8303246882675422e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.230756014585495,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4718.6,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 3.001531393568147,
|
|
"grad_norm": 0.44388401663627725,
|
|
"learning_rate": 2.8233698154498042e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21255435049533844,
|
|
"step": 1960,
|
|
"valid_targets_mean": 5199.9,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.009188361408882,
|
|
"grad_norm": 0.49805799320752814,
|
|
"learning_rate": 2.8164029333581964e-05,
|
|
"loss": 0.1928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17959743738174438,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4954.9,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 3.0168453292496173,
|
|
"grad_norm": 0.451755079299758,
|
|
"learning_rate": 2.809424143608289e-05,
|
|
"loss": 0.2047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1869395226240158,
|
|
"step": 1970,
|
|
"valid_targets_mean": 5274.4,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.0245022970903523,
|
|
"grad_norm": 0.4237197860277832,
|
|
"learning_rate": 2.802433547989336e-05,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1696140170097351,
|
|
"step": 1975,
|
|
"valid_targets_mean": 5256.1,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 3.0321592649310873,
|
|
"grad_norm": 0.48674324881062975,
|
|
"learning_rate": 2.7954312484627824e-05,
|
|
"loss": 0.1877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1883959323167801,
|
|
"step": 1980,
|
|
"valid_targets_mean": 5084.5,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 3.0398162327718223,
|
|
"grad_norm": 0.49321268064240037,
|
|
"learning_rate": 2.788417347160783e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20648150146007538,
|
|
"step": 1985,
|
|
"valid_targets_mean": 4628.4,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.0474732006125573,
|
|
"grad_norm": 0.541476424950113,
|
|
"learning_rate": 2.7813919463847094e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19818390905857086,
|
|
"step": 1990,
|
|
"valid_targets_mean": 4582.2,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.0551301684532923,
|
|
"grad_norm": 0.4501064317153738,
|
|
"learning_rate": 2.7743551486036588e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18310943245887756,
|
|
"step": 1995,
|
|
"valid_targets_mean": 5253.2,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 3.0627871362940278,
|
|
"grad_norm": 0.47833852227635554,
|
|
"learning_rate": 2.7673070564529606e-05,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1747608482837677,
|
|
"step": 2000,
|
|
"valid_targets_mean": 5987.4,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.0704441041347628,
|
|
"grad_norm": 0.44582266307962753,
|
|
"learning_rate": 2.7602477727326764e-05,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20294205844402313,
|
|
"step": 2005,
|
|
"valid_targets_mean": 5650.4,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 3.0781010719754978,
|
|
"grad_norm": 0.4439541326450507,
|
|
"learning_rate": 2.7531774004061057e-05,
|
|
"loss": 0.1956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18605002760887146,
|
|
"step": 2010,
|
|
"valid_targets_mean": 5244.9,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 3.0857580398162328,
|
|
"grad_norm": 0.8975814777494563,
|
|
"learning_rate": 2.746096042598279e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1635829508304596,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4843.5,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 3.0934150076569678,
|
|
"grad_norm": 0.4399161374325912,
|
|
"learning_rate": 2.739003802594456e-05,
|
|
"loss": 0.179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19688273966312408,
|
|
"step": 2020,
|
|
"valid_targets_mean": 5672.7,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 3.1010719754977027,
|
|
"grad_norm": 0.4325179298872115,
|
|
"learning_rate": 2.7319007838386177e-05,
|
|
"loss": 0.1818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1865837574005127,
|
|
"step": 2025,
|
|
"valid_targets_mean": 5061.4,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 3.108728943338438,
|
|
"grad_norm": 0.5041193832066779,
|
|
"learning_rate": 2.724787089931962e-05,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17695216834545135,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4700.2,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.116385911179173,
|
|
"grad_norm": 0.6636882689104748,
|
|
"learning_rate": 2.7176628246313864e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17805099487304688,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5752.7,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 3.124042879019908,
|
|
"grad_norm": 0.4991191331475684,
|
|
"learning_rate": 2.7105280918479775e-05,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18973186612129211,
|
|
"step": 2040,
|
|
"valid_targets_mean": 5067.0,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 3.131699846860643,
|
|
"grad_norm": 0.4629329569115915,
|
|
"learning_rate": 2.7033829956454992e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22558261454105377,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4791.7,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 3.139356814701378,
|
|
"grad_norm": 0.4955623156729709,
|
|
"learning_rate": 2.696227640238867e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18404105305671692,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4818.3,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 3.147013782542113,
|
|
"grad_norm": 0.40572235426798386,
|
|
"learning_rate": 2.6890621299926337e-05,
|
|
"loss": 0.1846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16386070847511292,
|
|
"step": 2055,
|
|
"valid_targets_mean": 5887.9,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 3.1546707503828486,
|
|
"grad_norm": 0.4822367465005572,
|
|
"learning_rate": 2.681886569419467e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1752379685640335,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4959.8,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 3.1623277182235836,
|
|
"grad_norm": 0.42573426451272567,
|
|
"learning_rate": 2.674701063178621e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1862529069185257,
|
|
"step": 2065,
|
|
"valid_targets_mean": 5424.7,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 3.1699846860643186,
|
|
"grad_norm": 0.555365775012984,
|
|
"learning_rate": 2.6675057160744157e-05,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2169274091720581,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4135.9,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 3.1776416539050536,
|
|
"grad_norm": 0.49960455879913235,
|
|
"learning_rate": 2.660300633054703e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19349941611289978,
|
|
"step": 2075,
|
|
"valid_targets_mean": 4799.2,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 3.1852986217457886,
|
|
"grad_norm": 0.4401708479718189,
|
|
"learning_rate": 2.653085919209339e-05,
|
|
"loss": 0.1859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19372834265232086,
|
|
"step": 2080,
|
|
"valid_targets_mean": 5538.8,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 3.1929555895865236,
|
|
"grad_norm": 0.40630670332354507,
|
|
"learning_rate": 2.64586167976865e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18609587848186493,
|
|
"step": 2085,
|
|
"valid_targets_mean": 5602.0,
|
|
"valid_targets_min": 1947
|
|
},
|
|
{
|
|
"epoch": 3.2006125574272586,
|
|
"grad_norm": 0.4486946026376343,
|
|
"learning_rate": 2.6386280201018978e-05,
|
|
"loss": 0.1776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19235703349113464,
|
|
"step": 2090,
|
|
"valid_targets_mean": 5800.1,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 3.208269525267994,
|
|
"grad_norm": 0.43778644558935853,
|
|
"learning_rate": 2.6313850457157446e-05,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18642935156822205,
|
|
"step": 2095,
|
|
"valid_targets_mean": 5270.8,
|
|
"valid_targets_min": 2460
|
|
},
|
|
{
|
|
"epoch": 3.215926493108729,
|
|
"grad_norm": 0.4771876713178929,
|
|
"learning_rate": 2.6241328622527097e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20062386989593506,
|
|
"step": 2100,
|
|
"valid_targets_mean": 5161.2,
|
|
"valid_targets_min": 2477
|
|
},
|
|
{
|
|
"epoch": 3.223583460949464,
|
|
"grad_norm": 0.46215779428957415,
|
|
"learning_rate": 2.6168715754896346e-05,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19280359148979187,
|
|
"step": 2105,
|
|
"valid_targets_mean": 5252.8,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 3.231240428790199,
|
|
"grad_norm": 0.445313686278001,
|
|
"learning_rate": 2.6096012913361355e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17572081089019775,
|
|
"step": 2110,
|
|
"valid_targets_mean": 5731.9,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 3.238897396630934,
|
|
"grad_norm": 0.5062526832195221,
|
|
"learning_rate": 2.60232211583306e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18368211388587952,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4119.7,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 3.2465543644716695,
|
|
"grad_norm": 0.5086260485057701,
|
|
"learning_rate": 2.5950341551509417e-05,
|
|
"loss": 0.185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16073405742645264,
|
|
"step": 2120,
|
|
"valid_targets_mean": 5029.1,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 3.2542113323124044,
|
|
"grad_norm": 0.4553370142955746,
|
|
"learning_rate": 2.58773751558845e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1590225100517273,
|
|
"step": 2125,
|
|
"valid_targets_mean": 5276.3,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 3.2618683001531394,
|
|
"grad_norm": 0.5007894833828535,
|
|
"learning_rate": 2.5804323035708398e-05,
|
|
"loss": 0.1764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17388451099395752,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4547.9,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 3.2695252679938744,
|
|
"grad_norm": 0.4373602866755202,
|
|
"learning_rate": 2.5731186256484e-05,
|
|
"loss": 0.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18840092420578003,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4721.8,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 3.2771822358346094,
|
|
"grad_norm": 0.6742452779283673,
|
|
"learning_rate": 2.5657965884949e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2183414101600647,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4429.9,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 3.2848392036753444,
|
|
"grad_norm": 0.45898450490614895,
|
|
"learning_rate": 2.5584662989060317e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17275741696357727,
|
|
"step": 2145,
|
|
"valid_targets_mean": 5442.9,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 3.2924961715160794,
|
|
"grad_norm": 0.44882146875151396,
|
|
"learning_rate": 2.5511278637978532e-05,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15515941381454468,
|
|
"step": 2150,
|
|
"valid_targets_mean": 5243.6,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.300153139356815,
|
|
"grad_norm": 0.4467734518319107,
|
|
"learning_rate": 2.5437813902052292e-05,
|
|
"loss": 0.1774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18329621851444244,
|
|
"step": 2155,
|
|
"valid_targets_mean": 5117.8,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 3.30781010719755,
|
|
"grad_norm": 0.4152520371866648,
|
|
"learning_rate": 2.536426985280271e-05,
|
|
"loss": 0.2025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1850317418575287,
|
|
"step": 2160,
|
|
"valid_targets_mean": 5134.1,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 3.315467075038285,
|
|
"grad_norm": 0.44998774771356215,
|
|
"learning_rate": 2.5290647562907705e-05,
|
|
"loss": 0.1929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21751946210861206,
|
|
"step": 2165,
|
|
"valid_targets_mean": 5467.7,
|
|
"valid_targets_min": 2622
|
|
},
|
|
{
|
|
"epoch": 3.32312404287902,
|
|
"grad_norm": 0.4019431753462474,
|
|
"learning_rate": 2.5216948106186395e-05,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1585729718208313,
|
|
"step": 2170,
|
|
"valid_targets_mean": 5651.1,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 3.330781010719755,
|
|
"grad_norm": 0.5618290697674159,
|
|
"learning_rate": 2.5143172557583412e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21604791283607483,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3703.8,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 3.3384379785604903,
|
|
"grad_norm": 0.43902097786183897,
|
|
"learning_rate": 2.506932199315321e-05,
|
|
"loss": 0.1921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19183912873268127,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4844.0,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 3.3460949464012253,
|
|
"grad_norm": 0.45644637337033184,
|
|
"learning_rate": 2.499539749004441e-05,
|
|
"loss": 0.2002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20222628116607666,
|
|
"step": 2185,
|
|
"valid_targets_mean": 5212.4,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 3.3537519142419603,
|
|
"grad_norm": 0.4393001352247523,
|
|
"learning_rate": 2.4921400126484057e-05,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20443087816238403,
|
|
"step": 2190,
|
|
"valid_targets_mean": 5666.1,
|
|
"valid_targets_min": 1784
|
|
},
|
|
{
|
|
"epoch": 3.3614088820826953,
|
|
"grad_norm": 0.5628152912241892,
|
|
"learning_rate": 2.4847330981761893e-05,
|
|
"loss": 0.1818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18845781683921814,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3962.0,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 3.3690658499234303,
|
|
"grad_norm": 0.4499446422169732,
|
|
"learning_rate": 2.4773191136214655e-05,
|
|
"loss": 0.1951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1923513114452362,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4977.8,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 3.3767228177641653,
|
|
"grad_norm": 0.4645104369113949,
|
|
"learning_rate": 2.4698981671210253e-05,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1823941171169281,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4065.4,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 3.3843797856049003,
|
|
"grad_norm": 0.46636495279671436,
|
|
"learning_rate": 2.462470366913206e-05,
|
|
"loss": 0.1882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18425936996936798,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4618.2,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 3.3920367534456357,
|
|
"grad_norm": 0.5055126421992738,
|
|
"learning_rate": 2.4550358213363083e-05,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19574221968650818,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3900.2,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 3.3996937212863707,
|
|
"grad_norm": 0.5503070083738346,
|
|
"learning_rate": 2.4475946388270172e-05,
|
|
"loss": 0.1848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17538341879844666,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3802.2,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 3.4073506891271057,
|
|
"grad_norm": 0.47088674144945025,
|
|
"learning_rate": 2.440146927918823e-05,
|
|
"loss": 0.1889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2073318362236023,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4473.6,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 3.4150076569678407,
|
|
"grad_norm": 0.4740661356667313,
|
|
"learning_rate": 2.4326927972404333e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20011824369430542,
|
|
"step": 2230,
|
|
"valid_targets_mean": 4605.9,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 3.4226646248085757,
|
|
"grad_norm": 0.4165429827415794,
|
|
"learning_rate": 2.4252323555141935e-05,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17027318477630615,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5750.4,
|
|
"valid_targets_min": 2722
|
|
},
|
|
{
|
|
"epoch": 3.4303215926493107,
|
|
"grad_norm": 0.4794637308355578,
|
|
"learning_rate": 2.417765711554498e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16433902084827423,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4362.0,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 3.437978560490046,
|
|
"grad_norm": 0.5136159685801481,
|
|
"learning_rate": 2.410292974266203e-05,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20960760116577148,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3976.4,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 3.445635528330781,
|
|
"grad_norm": 0.5270719518942621,
|
|
"learning_rate": 2.402814252643042e-05,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20371288061141968,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4996.0,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 3.453292496171516,
|
|
"grad_norm": 0.48963006601475756,
|
|
"learning_rate": 2.3953296557660288e-05,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18395353853702545,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5188.1,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 3.460949464012251,
|
|
"grad_norm": 0.4145211188615159,
|
|
"learning_rate": 2.387839292801875e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1779845654964447,
|
|
"step": 2260,
|
|
"valid_targets_mean": 5619.1,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 3.468606431852986,
|
|
"grad_norm": 0.4701954182659396,
|
|
"learning_rate": 2.3803432730013913e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18874239921569824,
|
|
"step": 2265,
|
|
"valid_targets_mean": 4970.4,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 3.476263399693721,
|
|
"grad_norm": 0.5084547938774928,
|
|
"learning_rate": 2.372841705697897e-05,
|
|
"loss": 0.1884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19924131035804749,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4273.2,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 3.4839203675344566,
|
|
"grad_norm": 0.42346772066137994,
|
|
"learning_rate": 2.365334700305624e-05,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18509188294410706,
|
|
"step": 2275,
|
|
"valid_targets_mean": 5318.5,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.4915773353751915,
|
|
"grad_norm": 0.43876011826770084,
|
|
"learning_rate": 2.3578223663181214e-05,
|
|
"loss": 0.1868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19643978774547577,
|
|
"step": 2280,
|
|
"valid_targets_mean": 4982.8,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 3.4992343032159265,
|
|
"grad_norm": 0.4259076717957742,
|
|
"learning_rate": 2.35030481330666e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17723461985588074,
|
|
"step": 2285,
|
|
"valid_targets_mean": 5322.2,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 3.5068912710566615,
|
|
"grad_norm": 0.40790611814620886,
|
|
"learning_rate": 2.3427821509186308e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16672778129577637,
|
|
"step": 2290,
|
|
"valid_targets_mean": 5282.6,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 3.5145482388973965,
|
|
"grad_norm": 0.43724973404898343,
|
|
"learning_rate": 2.3352544888759495e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17628073692321777,
|
|
"step": 2295,
|
|
"valid_targets_mean": 5093.2,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 3.522205206738132,
|
|
"grad_norm": 0.4419945752454893,
|
|
"learning_rate": 2.3277219369734537e-05,
|
|
"loss": 0.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17800669372081757,
|
|
"step": 2300,
|
|
"valid_targets_mean": 5829.1,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 3.5298621745788665,
|
|
"grad_norm": 0.40029502890365776,
|
|
"learning_rate": 2.320184605077302e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1498422920703888,
|
|
"step": 2305,
|
|
"valid_targets_mean": 6132.4,
|
|
"valid_targets_min": 3259
|
|
},
|
|
{
|
|
"epoch": 3.537519142419602,
|
|
"grad_norm": 0.4317895472992919,
|
|
"learning_rate": 2.3126426031233714e-05,
|
|
"loss": 0.1799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20129495859146118,
|
|
"step": 2310,
|
|
"valid_targets_mean": 5208.1,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 3.545176110260337,
|
|
"grad_norm": 0.40601945027019737,
|
|
"learning_rate": 2.3050960411156546e-05,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18385669589042664,
|
|
"step": 2315,
|
|
"valid_targets_mean": 6188.5,
|
|
"valid_targets_min": 3363
|
|
},
|
|
{
|
|
"epoch": 3.552833078101072,
|
|
"grad_norm": 0.4955956305407851,
|
|
"learning_rate": 2.2975450291246536e-05,
|
|
"loss": 0.1815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16455523669719696,
|
|
"step": 2320,
|
|
"valid_targets_mean": 5401.9,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 3.560490045941807,
|
|
"grad_norm": 0.4775531036710853,
|
|
"learning_rate": 2.289989677285779e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20387563109397888,
|
|
"step": 2325,
|
|
"valid_targets_mean": 5242.1,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.568147013782542,
|
|
"grad_norm": 0.42783431960706375,
|
|
"learning_rate": 2.282430095797737e-05,
|
|
"loss": 0.1916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17975592613220215,
|
|
"step": 2330,
|
|
"valid_targets_mean": 5704.2,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 3.5758039816232774,
|
|
"grad_norm": 0.4653094697113535,
|
|
"learning_rate": 2.274866394920927e-05,
|
|
"loss": 0.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19249895215034485,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5539.1,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 3.5834609494640124,
|
|
"grad_norm": 0.42954428223445007,
|
|
"learning_rate": 2.2672986849758316e-05,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17228573560714722,
|
|
"step": 2340,
|
|
"valid_targets_mean": 5570.5,
|
|
"valid_targets_min": 2379
|
|
},
|
|
{
|
|
"epoch": 3.5911179173047474,
|
|
"grad_norm": 0.8433824524079548,
|
|
"learning_rate": 2.259727076341407e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15344902873039246,
|
|
"step": 2345,
|
|
"valid_targets_mean": 5395.2,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 3.5987748851454824,
|
|
"grad_norm": 0.43204330288459675,
|
|
"learning_rate": 2.252151679453475e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1977773904800415,
|
|
"step": 2350,
|
|
"valid_targets_mean": 4850.1,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 3.6064318529862174,
|
|
"grad_norm": 0.3957799588447967,
|
|
"learning_rate": 2.2445726048031104e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18701013922691345,
|
|
"step": 2355,
|
|
"valid_targets_mean": 6515.1,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 3.6140888208269524,
|
|
"grad_norm": 0.46060379254586875,
|
|
"learning_rate": 2.23698996293503e-05,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19811898469924927,
|
|
"step": 2360,
|
|
"valid_targets_mean": 5086.2,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 3.6217457886676874,
|
|
"grad_norm": 0.4761035270932635,
|
|
"learning_rate": 2.2294038644459805e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18033194541931152,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4584.5,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 3.629402756508423,
|
|
"grad_norm": 0.4869396442286812,
|
|
"learning_rate": 2.221814419983125e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20814746618270874,
|
|
"step": 2370,
|
|
"valid_targets_mean": 5545.9,
|
|
"valid_targets_min": 2727
|
|
},
|
|
{
|
|
"epoch": 3.637059724349158,
|
|
"grad_norm": 0.46298467756924694,
|
|
"learning_rate": 2.2142217402424296e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.176169753074646,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4827.2,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 3.644716692189893,
|
|
"grad_norm": 0.4195617237812523,
|
|
"learning_rate": 2.2066259359670485e-05,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17234407365322113,
|
|
"step": 2380,
|
|
"valid_targets_mean": 5661.3,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 3.652373660030628,
|
|
"grad_norm": 0.43491867592878586,
|
|
"learning_rate": 2.1990271179457082e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17926275730133057,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5154.2,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 3.660030627871363,
|
|
"grad_norm": 0.5808883209817489,
|
|
"learning_rate": 2.1914253970110937e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22726327180862427,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3341.1,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 3.6676875957120982,
|
|
"grad_norm": 0.39616224115059184,
|
|
"learning_rate": 2.1838208840382294e-05,
|
|
"loss": 0.1836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17501763999462128,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5985.6,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 3.6753445635528332,
|
|
"grad_norm": 0.5537720074927155,
|
|
"learning_rate": 2.176213689942863e-05,
|
|
"loss": 0.1896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2179131954908371,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3625.4,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 3.6830015313935682,
|
|
"grad_norm": 0.43256442597937816,
|
|
"learning_rate": 2.168603925679849e-05,
|
|
"loss": 0.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14384448528289795,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4353.9,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 3.6906584992343032,
|
|
"grad_norm": 0.4852843793132942,
|
|
"learning_rate": 2.160991702241527e-05,
|
|
"loss": 0.1968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20300811529159546,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4493.1,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 3.698315467075038,
|
|
"grad_norm": 0.4046986527750801,
|
|
"learning_rate": 2.1533771306561066e-05,
|
|
"loss": 0.1897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17245183885097504,
|
|
"step": 2415,
|
|
"valid_targets_mean": 6083.0,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 3.705972434915773,
|
|
"grad_norm": 0.4681618550416894,
|
|
"learning_rate": 2.1457603219860457e-05,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20374765992164612,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4759.5,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 3.713629402756508,
|
|
"grad_norm": 0.35609101937418786,
|
|
"learning_rate": 2.1381413873264315e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16857865452766418,
|
|
"step": 2425,
|
|
"valid_targets_mean": 6080.7,
|
|
"valid_targets_min": 3141
|
|
},
|
|
{
|
|
"epoch": 3.7212863705972437,
|
|
"grad_norm": 0.39199321408980886,
|
|
"learning_rate": 2.1305204378033598e-05,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17050239443778992,
|
|
"step": 2430,
|
|
"valid_targets_mean": 6238.8,
|
|
"valid_targets_min": 2991
|
|
},
|
|
{
|
|
"epoch": 3.7289433384379786,
|
|
"grad_norm": 0.5289287346109518,
|
|
"learning_rate": 2.1228975845723137e-05,
|
|
"loss": 0.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18746045231819153,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3991.0,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.7366003062787136,
|
|
"grad_norm": 0.4065373365511912,
|
|
"learning_rate": 2.115272938816544e-05,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14783808588981628,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4898.0,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.7442572741194486,
|
|
"grad_norm": 0.465243722366574,
|
|
"learning_rate": 2.107646611745445e-05,
|
|
"loss": 0.1912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2061719298362732,
|
|
"step": 2445,
|
|
"valid_targets_mean": 5343.1,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 3.7519142419601836,
|
|
"grad_norm": 0.5540659684720433,
|
|
"learning_rate": 2.1000187145929347e-05,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20314675569534302,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3803.7,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 3.759571209800919,
|
|
"grad_norm": 0.5243985701272287,
|
|
"learning_rate": 2.092389358615832e-05,
|
|
"loss": 0.2083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2511424124240875,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4707.7,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 3.7672281776416536,
|
|
"grad_norm": 0.42664805575678655,
|
|
"learning_rate": 2.0847586550922326e-05,
|
|
"loss": 0.1889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2017786204814911,
|
|
"step": 2460,
|
|
"valid_targets_mean": 5301.8,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 3.774885145482389,
|
|
"grad_norm": 0.4453346796586349,
|
|
"learning_rate": 2.0771267153198873e-05,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17807403206825256,
|
|
"step": 2465,
|
|
"valid_targets_mean": 5417.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 3.782542113323124,
|
|
"grad_norm": 0.4219997971711044,
|
|
"learning_rate": 2.069493650614578e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1867009550333023,
|
|
"step": 2470,
|
|
"valid_targets_mean": 5047.1,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 3.790199081163859,
|
|
"grad_norm": 0.3945998573310309,
|
|
"learning_rate": 2.0618595723084938e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16966620087623596,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5595.2,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 3.797856049004594,
|
|
"grad_norm": 0.3896927875127744,
|
|
"learning_rate": 2.054224591748609e-05,
|
|
"loss": 0.1828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17560520768165588,
|
|
"step": 2480,
|
|
"valid_targets_mean": 5799.6,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 3.805513016845329,
|
|
"grad_norm": 0.4767292234124444,
|
|
"learning_rate": 2.046588820295057e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2139269858598709,
|
|
"step": 2485,
|
|
"valid_targets_mean": 5065.8,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 3.8131699846860645,
|
|
"grad_norm": 0.48305615805465524,
|
|
"learning_rate": 2.038952369319507e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1714230179786682,
|
|
"step": 2490,
|
|
"valid_targets_mean": 5173.9,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 3.8208269525267995,
|
|
"grad_norm": 0.4591287515515576,
|
|
"learning_rate": 2.031315350203539e-05,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20354583859443665,
|
|
"step": 2495,
|
|
"valid_targets_mean": 5899.7,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 3.8284839203675345,
|
|
"grad_norm": 0.405791559738873,
|
|
"learning_rate": 2.02367787433702e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1725158542394638,
|
|
"step": 2500,
|
|
"valid_targets_mean": 5390.0,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 3.8361408882082695,
|
|
"grad_norm": 0.4497822748514485,
|
|
"learning_rate": 2.0160400531164787e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15819357335567474,
|
|
"step": 2505,
|
|
"valid_targets_mean": 5183.4,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 3.8437978560490045,
|
|
"grad_norm": 0.38677614976809455,
|
|
"learning_rate": 2.008401997943481e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19747616350650787,
|
|
"step": 2510,
|
|
"valid_targets_mean": 6532.3,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 3.85145482388974,
|
|
"grad_norm": 0.4278038294940775,
|
|
"learning_rate": 2.0007638202230053e-05,
|
|
"loss": 0.1814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1896567940711975,
|
|
"step": 2515,
|
|
"valid_targets_mean": 5536.9,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 3.8591117917304745,
|
|
"grad_norm": 0.5403737252126551,
|
|
"learning_rate": 1.9931256313618173e-05,
|
|
"loss": 0.185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19202588498592377,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4081.1,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 3.86676875957121,
|
|
"grad_norm": 0.44283695481669993,
|
|
"learning_rate": 1.9854875427668453e-05,
|
|
"loss": 0.183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1849876344203949,
|
|
"step": 2525,
|
|
"valid_targets_mean": 5332.9,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 3.874425727411945,
|
|
"grad_norm": 0.45612268357938296,
|
|
"learning_rate": 1.9778496658435552e-05,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2018076777458191,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4947.2,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 3.88208269525268,
|
|
"grad_norm": 0.4511802661455763,
|
|
"learning_rate": 1.970212111994325e-05,
|
|
"loss": 0.1776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1800006926059723,
|
|
"step": 2535,
|
|
"valid_targets_mean": 5933.2,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 3.889739663093415,
|
|
"grad_norm": 0.4550636487612721,
|
|
"learning_rate": 1.9625749926168205e-05,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19659721851348877,
|
|
"step": 2540,
|
|
"valid_targets_mean": 5744.1,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 3.89739663093415,
|
|
"grad_norm": 0.45143924481673986,
|
|
"learning_rate": 1.954938419102372e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17691466212272644,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4654.2,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 3.9050535987748853,
|
|
"grad_norm": 0.4108146672941242,
|
|
"learning_rate": 1.9473025028343464e-05,
|
|
"loss": 0.1859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1811871975660324,
|
|
"step": 2550,
|
|
"valid_targets_mean": 5904.1,
|
|
"valid_targets_min": 2536
|
|
},
|
|
{
|
|
"epoch": 3.9127105666156203,
|
|
"grad_norm": 0.39297017745981955,
|
|
"learning_rate": 1.9396673551865245e-05,
|
|
"loss": 0.1808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16155928373336792,
|
|
"step": 2555,
|
|
"valid_targets_mean": 5933.3,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 3.9203675344563553,
|
|
"grad_norm": 0.4647337901010682,
|
|
"learning_rate": 1.932033087521478e-05,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20087887346744537,
|
|
"step": 2560,
|
|
"valid_targets_mean": 5496.6,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 3.9280245022970903,
|
|
"grad_norm": 0.51196014419268,
|
|
"learning_rate": 1.9243998111889422e-05,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18994739651679993,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4321.1,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 3.9356814701378253,
|
|
"grad_norm": 0.486482225957426,
|
|
"learning_rate": 1.916767637524193e-05,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20536579191684723,
|
|
"step": 2570,
|
|
"valid_targets_mean": 4581.4,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 3.9433384379785608,
|
|
"grad_norm": 0.5397289328340875,
|
|
"learning_rate": 1.9091366778464236e-05,
|
|
"loss": 0.1935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1987244337797165,
|
|
"step": 2575,
|
|
"valid_targets_mean": 4776.2,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 3.9509954058192953,
|
|
"grad_norm": 0.39898386260649726,
|
|
"learning_rate": 1.9015070434571214e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15328550338745117,
|
|
"step": 2580,
|
|
"valid_targets_mean": 5600.8,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 3.9586523736600308,
|
|
"grad_norm": 0.4317711975925466,
|
|
"learning_rate": 1.8938788456384435e-05,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19141104817390442,
|
|
"step": 2585,
|
|
"valid_targets_mean": 5771.4,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 3.9663093415007658,
|
|
"grad_norm": 0.471345314428317,
|
|
"learning_rate": 1.886252195651593e-05,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18774153292179108,
|
|
"step": 2590,
|
|
"valid_targets_mean": 5299.0,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 3.9739663093415007,
|
|
"grad_norm": 0.48013416700446687,
|
|
"learning_rate": 1.8786272047351974e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20652708411216736,
|
|
"step": 2595,
|
|
"valid_targets_mean": 5300.6,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 3.9816232771822357,
|
|
"grad_norm": 0.4624682372289305,
|
|
"learning_rate": 1.8710039841036868e-05,
|
|
"loss": 0.1848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17302487790584564,
|
|
"step": 2600,
|
|
"valid_targets_mean": 4120.8,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 3.9892802450229707,
|
|
"grad_norm": 0.4168745532084026,
|
|
"learning_rate": 1.8633826449456694e-05,
|
|
"loss": 0.192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17362730205059052,
|
|
"step": 2605,
|
|
"valid_targets_mean": 5236.6,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 3.996937212863706,
|
|
"grad_norm": 0.4932527691180615,
|
|
"learning_rate": 1.8557632984223124e-05,
|
|
"loss": 0.1946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1852743923664093,
|
|
"step": 2610,
|
|
"valid_targets_mean": 5870.2,
|
|
"valid_targets_min": 3261
|
|
},
|
|
{
|
|
"epoch": 4.004594180704441,
|
|
"grad_norm": 0.4117858119372678,
|
|
"learning_rate": 1.848146055665718e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16399316489696503,
|
|
"step": 2615,
|
|
"valid_targets_mean": 5134.5,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 4.012251148545176,
|
|
"grad_norm": 0.4808770384706559,
|
|
"learning_rate": 1.840531027777306e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1545652449131012,
|
|
"step": 2620,
|
|
"valid_targets_mean": 5073.8,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 4.019908116385912,
|
|
"grad_norm": 0.4716619083630413,
|
|
"learning_rate": 1.832918325826188e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1703418791294098,
|
|
"step": 2625,
|
|
"valid_targets_mean": 4764.6,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 4.027565084226646,
|
|
"grad_norm": 0.4838374411041368,
|
|
"learning_rate": 1.825308060847554e-05,
|
|
"loss": 0.1755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17043305933475494,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4994.8,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 4.035222052067382,
|
|
"grad_norm": 0.4840997051081212,
|
|
"learning_rate": 1.8177003438410468e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16094501316547394,
|
|
"step": 2635,
|
|
"valid_targets_mean": 5405.5,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 4.042879019908116,
|
|
"grad_norm": 0.4485354827030893,
|
|
"learning_rate": 1.8100952857691478e-05,
|
|
"loss": 0.1778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16980960965156555,
|
|
"step": 2640,
|
|
"valid_targets_mean": 5238.2,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 4.050535987748852,
|
|
"grad_norm": 0.5691308127289896,
|
|
"learning_rate": 1.802492997555554e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18940171599388123,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3804.6,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 4.058192955589586,
|
|
"grad_norm": 0.5431514385919676,
|
|
"learning_rate": 1.7948935900835666e-05,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17891111969947815,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4574.4,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 4.065849923430322,
|
|
"grad_norm": 0.5041080458794216,
|
|
"learning_rate": 1.7872971741944657e-05,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20006835460662842,
|
|
"step": 2655,
|
|
"valid_targets_mean": 4423.0,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 4.073506891271057,
|
|
"grad_norm": 0.48183764813968805,
|
|
"learning_rate": 1.779703860685899e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16695404052734375,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4707.2,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 4.081163859111792,
|
|
"grad_norm": 0.44635472401034965,
|
|
"learning_rate": 1.772113760310265e-05,
|
|
"loss": 0.1759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16373205184936523,
|
|
"step": 2665,
|
|
"valid_targets_mean": 5316.4,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 4.088820826952527,
|
|
"grad_norm": 0.5341676996658703,
|
|
"learning_rate": 1.7645269837730964e-05,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18219105899333954,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4616.1,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 4.096477794793262,
|
|
"grad_norm": 0.5082489734934296,
|
|
"learning_rate": 1.7569436417314454e-05,
|
|
"loss": 0.1654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15915320813655853,
|
|
"step": 2675,
|
|
"valid_targets_mean": 5307.9,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 4.104134762633997,
|
|
"grad_norm": 0.48315077118735306,
|
|
"learning_rate": 1.7493638447922724e-05,
|
|
"loss": 0.1763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17643246054649353,
|
|
"step": 2680,
|
|
"valid_targets_mean": 4735.8,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 4.111791730474732,
|
|
"grad_norm": 0.4158258538790409,
|
|
"learning_rate": 1.741787703510828e-05,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14290867745876312,
|
|
"step": 2685,
|
|
"valid_targets_mean": 5752.9,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 4.119448698315467,
|
|
"grad_norm": 0.4464039644440533,
|
|
"learning_rate": 1.7342153283890454e-05,
|
|
"loss": 0.1889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16524581611156464,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4866.8,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 4.1271056661562024,
|
|
"grad_norm": 0.4276684308904948,
|
|
"learning_rate": 1.7266468298739248e-05,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17335769534111023,
|
|
"step": 2695,
|
|
"valid_targets_mean": 5832.6,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 4.134762633996937,
|
|
"grad_norm": 0.4728772362847781,
|
|
"learning_rate": 1.719082318355924e-05,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1612071692943573,
|
|
"step": 2700,
|
|
"valid_targets_mean": 5342.1,
|
|
"valid_targets_min": 2692
|
|
},
|
|
{
|
|
"epoch": 4.142419601837672,
|
|
"grad_norm": 0.4765083268369304,
|
|
"learning_rate": 1.7115219041673513e-05,
|
|
"loss": 0.1869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16713187098503113,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4761.3,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 4.150076569678407,
|
|
"grad_norm": 0.4842020502823108,
|
|
"learning_rate": 1.703965697580749e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1749369502067566,
|
|
"step": 2710,
|
|
"valid_targets_mean": 4355.8,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 4.157733537519142,
|
|
"grad_norm": 0.49332601951947624,
|
|
"learning_rate": 1.6964138088072927e-05,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20040933787822723,
|
|
"step": 2715,
|
|
"valid_targets_mean": 4893.5,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 4.165390505359878,
|
|
"grad_norm": 0.4114065993741536,
|
|
"learning_rate": 1.6888663479951787e-05,
|
|
"loss": 0.1836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15738333761692047,
|
|
"step": 2720,
|
|
"valid_targets_mean": 5901.4,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 4.173047473200612,
|
|
"grad_norm": 0.49020449855560566,
|
|
"learning_rate": 1.6813234252280198e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17966817319393158,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4505.4,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 4.180704441041348,
|
|
"grad_norm": 0.467609857800558,
|
|
"learning_rate": 1.673785150523239e-05,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1697966456413269,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4822.9,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 4.188361408882082,
|
|
"grad_norm": 0.405164834964215,
|
|
"learning_rate": 1.6662516338304653e-05,
|
|
"loss": 0.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15354806184768677,
|
|
"step": 2735,
|
|
"valid_targets_mean": 5354.1,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 4.196018376722818,
|
|
"grad_norm": 0.4338536588919433,
|
|
"learning_rate": 1.658722985029928e-05,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17717352509498596,
|
|
"step": 2740,
|
|
"valid_targets_mean": 5779.4,
|
|
"valid_targets_min": 2130
|
|
},
|
|
{
|
|
"epoch": 4.203675344563552,
|
|
"grad_norm": 0.5850087613394777,
|
|
"learning_rate": 1.6511993139308593e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15216293931007385,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4756.1,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 4.211332312404288,
|
|
"grad_norm": 0.4176714174791368,
|
|
"learning_rate": 1.6436807302698853e-05,
|
|
"loss": 0.1523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15484420955181122,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5400.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 4.218989280245023,
|
|
"grad_norm": 0.5045586445327894,
|
|
"learning_rate": 1.6361673437094306e-05,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18908411264419556,
|
|
"step": 2755,
|
|
"valid_targets_mean": 4536.8,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 4.226646248085758,
|
|
"grad_norm": 0.47376908314362176,
|
|
"learning_rate": 1.6286592638361176e-05,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16104529798030853,
|
|
"step": 2760,
|
|
"valid_targets_mean": 5460.4,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 4.234303215926493,
|
|
"grad_norm": 0.4804874523270708,
|
|
"learning_rate": 1.6211566001591673e-05,
|
|
"loss": 0.1719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16267931461334229,
|
|
"step": 2765,
|
|
"valid_targets_mean": 4827.1,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 4.241960183767228,
|
|
"grad_norm": 0.5114521946324453,
|
|
"learning_rate": 1.6136594621088038e-05,
|
|
"loss": 0.1882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19819220900535583,
|
|
"step": 2770,
|
|
"valid_targets_mean": 4684.1,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 4.249617151607963,
|
|
"grad_norm": 0.5128617395395278,
|
|
"learning_rate": 1.606167959034656e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1607045829296112,
|
|
"step": 2775,
|
|
"valid_targets_mean": 4665.6,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 4.257274119448699,
|
|
"grad_norm": 0.5017550199119629,
|
|
"learning_rate": 1.5986822002041645e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.167169451713562,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4271.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 4.264931087289433,
|
|
"grad_norm": 0.44395083608432084,
|
|
"learning_rate": 1.5912022948009862e-05,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15831434726715088,
|
|
"step": 2785,
|
|
"valid_targets_mean": 5820.2,
|
|
"valid_targets_min": 2996
|
|
},
|
|
{
|
|
"epoch": 4.272588055130169,
|
|
"grad_norm": 0.4722239671745089,
|
|
"learning_rate": 1.5837283519234038e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1687237173318863,
|
|
"step": 2790,
|
|
"valid_targets_mean": 5822.8,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 4.280245022970903,
|
|
"grad_norm": 0.45638928620066843,
|
|
"learning_rate": 1.5762604805827323e-05,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1702617108821869,
|
|
"step": 2795,
|
|
"valid_targets_mean": 5522.4,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 4.287901990811639,
|
|
"grad_norm": 0.4756632270967452,
|
|
"learning_rate": 1.5687987897017324e-05,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21105210483074188,
|
|
"step": 2800,
|
|
"valid_targets_mean": 5453.9,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 4.295558958652373,
|
|
"grad_norm": 0.4018470816825365,
|
|
"learning_rate": 1.561343388113017e-05,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1360223889350891,
|
|
"step": 2805,
|
|
"valid_targets_mean": 6373.5,
|
|
"valid_targets_min": 2733
|
|
},
|
|
{
|
|
"epoch": 4.303215926493109,
|
|
"grad_norm": 0.4931211767025457,
|
|
"learning_rate": 1.5538943845574674e-05,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15920180082321167,
|
|
"step": 2810,
|
|
"valid_targets_mean": 6204.0,
|
|
"valid_targets_min": 2948
|
|
},
|
|
{
|
|
"epoch": 4.310872894333844,
|
|
"grad_norm": 0.5467551374335786,
|
|
"learning_rate": 1.5464518876826474e-05,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2006562054157257,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4931.8,
|
|
"valid_targets_min": 2745
|
|
},
|
|
{
|
|
"epoch": 4.318529862174579,
|
|
"grad_norm": 0.42757772305840064,
|
|
"learning_rate": 1.5390160060412153e-05,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15964971482753754,
|
|
"step": 2820,
|
|
"valid_targets_mean": 6229.6,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.326186830015314,
|
|
"grad_norm": 0.4781249802666833,
|
|
"learning_rate": 1.531586848089345e-05,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1797013282775879,
|
|
"step": 2825,
|
|
"valid_targets_mean": 5788.6,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 4.333843797856049,
|
|
"grad_norm": 0.4881439374190577,
|
|
"learning_rate": 1.5241645221851405e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18294446170330048,
|
|
"step": 2830,
|
|
"valid_targets_mean": 5210.4,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 4.341500765696784,
|
|
"grad_norm": 0.40894754528830446,
|
|
"learning_rate": 1.5167491365870573e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14472708106040955,
|
|
"step": 2835,
|
|
"valid_targets_mean": 5870.3,
|
|
"valid_targets_min": 3287
|
|
},
|
|
{
|
|
"epoch": 4.3491577335375196,
|
|
"grad_norm": 0.49768141849572484,
|
|
"learning_rate": 1.5093407994523234e-05,
|
|
"loss": 0.1647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.178148090839386,
|
|
"step": 2840,
|
|
"valid_targets_mean": 5438.2,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 4.356814701378254,
|
|
"grad_norm": 0.428585898075749,
|
|
"learning_rate": 1.501939618835361e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15074047446250916,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4899.4,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 4.3644716692189895,
|
|
"grad_norm": 0.5203366302958021,
|
|
"learning_rate": 1.4945457026862102e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18068398535251617,
|
|
"step": 2850,
|
|
"valid_targets_mean": 5078.4,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 4.372128637059724,
|
|
"grad_norm": 0.42841342187225284,
|
|
"learning_rate": 1.4871591588489558e-05,
|
|
"loss": 0.1764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15299955010414124,
|
|
"step": 2855,
|
|
"valid_targets_mean": 5253.9,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 4.3797856049004595,
|
|
"grad_norm": 0.4510836298560107,
|
|
"learning_rate": 1.4797800950601527e-05,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.195342555642128,
|
|
"step": 2860,
|
|
"valid_targets_mean": 6093.7,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 4.387442572741194,
|
|
"grad_norm": 0.5035923423896532,
|
|
"learning_rate": 1.4724086189472573e-05,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18418394029140472,
|
|
"step": 2865,
|
|
"valid_targets_mean": 5024.9,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 4.3950995405819295,
|
|
"grad_norm": 0.511919119688101,
|
|
"learning_rate": 1.4650448380270542e-05,
|
|
"loss": 0.182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18659475445747375,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4247.2,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 4.402756508422665,
|
|
"grad_norm": 0.49765259928346983,
|
|
"learning_rate": 1.4576888597040897e-05,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1821971833705902,
|
|
"step": 2875,
|
|
"valid_targets_mean": 4381.1,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 4.4104134762633995,
|
|
"grad_norm": 0.4844340089149697,
|
|
"learning_rate": 1.450340791269106e-05,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16185416281223297,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4287.1,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 4.418070444104135,
|
|
"grad_norm": 0.4422597910254744,
|
|
"learning_rate": 1.4430007398974751e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15629816055297852,
|
|
"step": 2885,
|
|
"valid_targets_mean": 5202.2,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 4.4257274119448695,
|
|
"grad_norm": 0.448577660568666,
|
|
"learning_rate": 1.4356688126476352e-05,
|
|
"loss": 0.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15885485708713531,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4905.8,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 4.433384379785605,
|
|
"grad_norm": 0.5127193213614976,
|
|
"learning_rate": 1.428345116459532e-05,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1999489665031433,
|
|
"step": 2895,
|
|
"valid_targets_mean": 5543.8,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 4.44104134762634,
|
|
"grad_norm": 0.3696874292839933,
|
|
"learning_rate": 1.421029758153055e-05,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13624325394630432,
|
|
"step": 2900,
|
|
"valid_targets_mean": 6215.7,
|
|
"valid_targets_min": 2870
|
|
},
|
|
{
|
|
"epoch": 4.448698315467075,
|
|
"grad_norm": 0.47165957515150697,
|
|
"learning_rate": 1.413722844426482e-05,
|
|
"loss": 0.1755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17101368308067322,
|
|
"step": 2905,
|
|
"valid_targets_mean": 5829.2,
|
|
"valid_targets_min": 3825
|
|
},
|
|
{
|
|
"epoch": 4.45635528330781,
|
|
"grad_norm": 0.529309619643836,
|
|
"learning_rate": 1.4064244818549227e-05,
|
|
"loss": 0.1673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20200702548027039,
|
|
"step": 2910,
|
|
"valid_targets_mean": 5083.9,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 4.464012251148545,
|
|
"grad_norm": 0.4583986271288342,
|
|
"learning_rate": 1.3991347768887629e-05,
|
|
"loss": 0.1673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15790319442749023,
|
|
"step": 2915,
|
|
"valid_targets_mean": 5490.7,
|
|
"valid_targets_min": 2776
|
|
},
|
|
{
|
|
"epoch": 4.47166921898928,
|
|
"grad_norm": 0.4672454081699034,
|
|
"learning_rate": 1.3918538358521136e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1702156662940979,
|
|
"step": 2920,
|
|
"valid_targets_mean": 6327.2,
|
|
"valid_targets_min": 3255
|
|
},
|
|
{
|
|
"epoch": 4.479326186830015,
|
|
"grad_norm": 0.5413334749004304,
|
|
"learning_rate": 1.384581764941259e-05,
|
|
"loss": 0.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18624061346054077,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3659.4,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 4.48698315467075,
|
|
"grad_norm": 0.4203208806439039,
|
|
"learning_rate": 1.3773186702231076e-05,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18235370516777039,
|
|
"step": 2930,
|
|
"valid_targets_mean": 6541.8,
|
|
"valid_targets_min": 2382
|
|
},
|
|
{
|
|
"epoch": 4.494640122511486,
|
|
"grad_norm": 0.49042923690230983,
|
|
"learning_rate": 1.3700646576336469e-05,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16061678528785706,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4792.9,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 4.50229709035222,
|
|
"grad_norm": 0.4834191846877714,
|
|
"learning_rate": 1.362819832976395e-05,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.187465101480484,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4979.9,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 4.509954058192956,
|
|
"grad_norm": 0.5358280569932836,
|
|
"learning_rate": 1.3555843019208604e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16426986455917358,
|
|
"step": 2945,
|
|
"valid_targets_mean": 5311.7,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 4.51761102603369,
|
|
"grad_norm": 0.5228285623940019,
|
|
"learning_rate": 1.3483581700009988e-05,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2028902769088745,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4474.3,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.525267993874426,
|
|
"grad_norm": 0.4362550606192867,
|
|
"learning_rate": 1.3411415426136754e-05,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18145951628684998,
|
|
"step": 2955,
|
|
"valid_targets_mean": 5741.4,
|
|
"valid_targets_min": 3720
|
|
},
|
|
{
|
|
"epoch": 4.53292496171516,
|
|
"grad_norm": 0.5334592365964695,
|
|
"learning_rate": 1.333934525017127e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19370566308498383,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4627.0,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 4.540581929555896,
|
|
"grad_norm": 0.5166440369660882,
|
|
"learning_rate": 1.3267372223294258e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1994536966085434,
|
|
"step": 2965,
|
|
"valid_targets_mean": 4718.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.548238897396631,
|
|
"grad_norm": 0.5598640848233768,
|
|
"learning_rate": 1.319549739526948e-05,
|
|
"loss": 0.1672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22695991396903992,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4743.5,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 4.555895865237366,
|
|
"grad_norm": 0.4771347776369329,
|
|
"learning_rate": 1.3123721814428408e-05,
|
|
"loss": 0.1759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18281838297843933,
|
|
"step": 2975,
|
|
"valid_targets_mean": 4889.7,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 4.563552833078101,
|
|
"grad_norm": 0.42341232828878017,
|
|
"learning_rate": 1.3052046527654948e-05,
|
|
"loss": 0.1783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16951890289783478,
|
|
"step": 2980,
|
|
"valid_targets_mean": 5872.5,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 4.571209800918836,
|
|
"grad_norm": 0.4384091653374271,
|
|
"learning_rate": 1.2980472580370162e-05,
|
|
"loss": 0.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16993127763271332,
|
|
"step": 2985,
|
|
"valid_targets_mean": 5593.4,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 4.578866768759571,
|
|
"grad_norm": 0.4736958546897791,
|
|
"learning_rate": 1.2909001016517031e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15475991368293762,
|
|
"step": 2990,
|
|
"valid_targets_mean": 5100.8,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 4.586523736600307,
|
|
"grad_norm": 0.4500490682468214,
|
|
"learning_rate": 1.2837632878545212e-05,
|
|
"loss": 0.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17424660921096802,
|
|
"step": 2995,
|
|
"valid_targets_mean": 5034.8,
|
|
"valid_targets_min": 2439
|
|
},
|
|
{
|
|
"epoch": 4.594180704441041,
|
|
"grad_norm": 0.4982990021173331,
|
|
"learning_rate": 1.2766369207395845e-05,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15128692984580994,
|
|
"step": 3000,
|
|
"valid_targets_mean": 4580.6,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 4.601837672281777,
|
|
"grad_norm": 0.5411255865457929,
|
|
"learning_rate": 1.269521104248637e-05,
|
|
"loss": 0.1759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19686409831047058,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3894.5,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 4.609494640122511,
|
|
"grad_norm": 0.46544029566434875,
|
|
"learning_rate": 1.2624159421695354e-05,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17594610154628754,
|
|
"step": 3010,
|
|
"valid_targets_mean": 5448.4,
|
|
"valid_targets_min": 2259
|
|
},
|
|
{
|
|
"epoch": 4.617151607963247,
|
|
"grad_norm": 0.5041573873911557,
|
|
"learning_rate": 1.2553215381347377e-05,
|
|
"loss": 0.1788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17713363468647003,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3941.9,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 4.624808575803982,
|
|
"grad_norm": 0.4460422876457042,
|
|
"learning_rate": 1.2482379956197898e-05,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1611134260892868,
|
|
"step": 3020,
|
|
"valid_targets_mean": 5502.9,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 4.632465543644717,
|
|
"grad_norm": 0.5123541360019072,
|
|
"learning_rate": 1.2411654179418162e-05,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15525534749031067,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4013.7,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 4.640122511485452,
|
|
"grad_norm": 0.5056279153295342,
|
|
"learning_rate": 1.2341039082580143e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20131169259548187,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4715.8,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 4.647779479326187,
|
|
"grad_norm": 0.4963953600906092,
|
|
"learning_rate": 1.2270535695641488e-05,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1591898500919342,
|
|
"step": 3035,
|
|
"valid_targets_mean": 4588.5,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 4.655436447166922,
|
|
"grad_norm": 0.4787452476331541,
|
|
"learning_rate": 1.2200145046930494e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18031710386276245,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4756.6,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 4.663093415007657,
|
|
"grad_norm": 0.4353032563440157,
|
|
"learning_rate": 1.2129868163131115e-05,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15741820633411407,
|
|
"step": 3045,
|
|
"valid_targets_mean": 5642.2,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 4.670750382848392,
|
|
"grad_norm": 0.4956709263367504,
|
|
"learning_rate": 1.2059706069267985e-05,
|
|
"loss": 0.1987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20289403200149536,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4478.3,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 4.6784073506891275,
|
|
"grad_norm": 0.4374701574959408,
|
|
"learning_rate": 1.1989659788691472e-05,
|
|
"loss": 0.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15316550433635712,
|
|
"step": 3055,
|
|
"valid_targets_mean": 5378.1,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 4.686064318529862,
|
|
"grad_norm": 0.4312228148138217,
|
|
"learning_rate": 1.1919730343062742e-05,
|
|
"loss": 0.182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1685815006494522,
|
|
"step": 3060,
|
|
"valid_targets_mean": 5913.8,
|
|
"valid_targets_min": 2858
|
|
},
|
|
{
|
|
"epoch": 4.6937212863705975,
|
|
"grad_norm": 0.6788655954584126,
|
|
"learning_rate": 1.1849918752338864e-05,
|
|
"loss": 0.1591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1469895839691162,
|
|
"step": 3065,
|
|
"valid_targets_mean": 5040.0,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 4.701378254211332,
|
|
"grad_norm": 0.5027169074416509,
|
|
"learning_rate": 1.1780226034757938e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15269100666046143,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3950.4,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 4.7090352220520675,
|
|
"grad_norm": 0.45849599624176834,
|
|
"learning_rate": 1.1710653206824225e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1836073398590088,
|
|
"step": 3075,
|
|
"valid_targets_mean": 5161.0,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 4.716692189892802,
|
|
"grad_norm": 0.463224080205975,
|
|
"learning_rate": 1.164120128329334e-05,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1551927775144577,
|
|
"step": 3080,
|
|
"valid_targets_mean": 4956.4,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 4.7243491577335375,
|
|
"grad_norm": 0.6749074626071137,
|
|
"learning_rate": 1.1571871277157458e-05,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1819312572479248,
|
|
"step": 3085,
|
|
"valid_targets_mean": 3737.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 4.732006125574273,
|
|
"grad_norm": 0.4507533807504539,
|
|
"learning_rate": 1.15026641996305e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1480093002319336,
|
|
"step": 3090,
|
|
"valid_targets_mean": 5161.5,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 4.7396630934150075,
|
|
"grad_norm": 0.5183685281293393,
|
|
"learning_rate": 1.1433581060133432e-05,
|
|
"loss": 0.178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1799740195274353,
|
|
"step": 3095,
|
|
"valid_targets_mean": 5317.7,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 4.747320061255743,
|
|
"grad_norm": 0.5028716072176976,
|
|
"learning_rate": 1.136462286627952e-05,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17748679220676422,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4244.8,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 4.7549770290964775,
|
|
"grad_norm": 0.7306366733577314,
|
|
"learning_rate": 1.1295790623859605e-05,
|
|
"loss": 0.175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16387039422988892,
|
|
"step": 3105,
|
|
"valid_targets_mean": 6011.9,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 4.762633996937213,
|
|
"grad_norm": 0.4663498360213423,
|
|
"learning_rate": 1.1227085336827492e-05,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18103697896003723,
|
|
"step": 3110,
|
|
"valid_targets_mean": 5026.4,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 4.7702909647779475,
|
|
"grad_norm": 0.483563480045064,
|
|
"learning_rate": 1.1158508007285266e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16695986688137054,
|
|
"step": 3115,
|
|
"valid_targets_mean": 5319.2,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 4.777947932618683,
|
|
"grad_norm": 0.4827606579725246,
|
|
"learning_rate": 1.1090059635468693e-05,
|
|
"loss": 0.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.167944997549057,
|
|
"step": 3120,
|
|
"valid_targets_mean": 4909.1,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.785604900459418,
|
|
"grad_norm": 0.43356790112848625,
|
|
"learning_rate": 1.1021741219732602e-05,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1488049477338791,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4725.1,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 4.793261868300153,
|
|
"grad_norm": 0.4798237312494164,
|
|
"learning_rate": 1.0953553756536363e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.202002614736557,
|
|
"step": 3130,
|
|
"valid_targets_mean": 5125.2,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 4.800918836140888,
|
|
"grad_norm": 0.4223262568134376,
|
|
"learning_rate": 1.0885498240429344e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16876980662345886,
|
|
"step": 3135,
|
|
"valid_targets_mean": 5708.7,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 4.808575803981624,
|
|
"grad_norm": 0.4239980387727623,
|
|
"learning_rate": 1.0817575664036371e-05,
|
|
"loss": 0.1688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1675986349582672,
|
|
"step": 3140,
|
|
"valid_targets_mean": 4858.1,
|
|
"valid_targets_min": 2378
|
|
},
|
|
{
|
|
"epoch": 4.816232771822358,
|
|
"grad_norm": 0.4847147778344571,
|
|
"learning_rate": 1.07497870180433e-05,
|
|
"loss": 0.1829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16506324708461761,
|
|
"step": 3145,
|
|
"valid_targets_mean": 5388.0,
|
|
"valid_targets_min": 2902
|
|
},
|
|
{
|
|
"epoch": 4.823889739663094,
|
|
"grad_norm": 0.5548302078937414,
|
|
"learning_rate": 1.0682133291182522e-05,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14651057124137878,
|
|
"step": 3150,
|
|
"valid_targets_mean": 5324.9,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 4.831546707503828,
|
|
"grad_norm": 0.5265431034812131,
|
|
"learning_rate": 1.0614615470218585e-05,
|
|
"loss": 0.1808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19283831119537354,
|
|
"step": 3155,
|
|
"valid_targets_mean": 4184.3,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 4.839203675344564,
|
|
"grad_norm": 0.4752269724565345,
|
|
"learning_rate": 1.0547234539933755e-05,
|
|
"loss": 0.1766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17586413025856018,
|
|
"step": 3160,
|
|
"valid_targets_mean": 4714.7,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 4.846860643185298,
|
|
"grad_norm": 1.169559990539335,
|
|
"learning_rate": 1.0479991483113697e-05,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17083494365215302,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4708.2,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 4.854517611026034,
|
|
"grad_norm": 0.4368959854231952,
|
|
"learning_rate": 1.0412887280533117e-05,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1704709231853485,
|
|
"step": 3170,
|
|
"valid_targets_mean": 5273.7,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 4.862174578866769,
|
|
"grad_norm": 0.4944289144103065,
|
|
"learning_rate": 1.0345922910941448e-05,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14916256070137024,
|
|
"step": 3175,
|
|
"valid_targets_mean": 6193.1,
|
|
"valid_targets_min": 3214
|
|
},
|
|
{
|
|
"epoch": 4.869831546707504,
|
|
"grad_norm": 0.46502943737171365,
|
|
"learning_rate": 1.0279099351048602e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17156018316745758,
|
|
"step": 3180,
|
|
"valid_targets_mean": 5356.7,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 4.877488514548239,
|
|
"grad_norm": 0.5097200882162136,
|
|
"learning_rate": 1.0212417575510694e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1614779531955719,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5069.6,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 4.885145482388974,
|
|
"grad_norm": 0.47471570009753655,
|
|
"learning_rate": 1.0145878556915849e-05,
|
|
"loss": 0.1734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1738322228193283,
|
|
"step": 3190,
|
|
"valid_targets_mean": 4969.6,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 4.892802450229709,
|
|
"grad_norm": 0.4998838240469851,
|
|
"learning_rate": 1.0079483265770019e-05,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17251190543174744,
|
|
"step": 3195,
|
|
"valid_targets_mean": 4337.6,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 4.900459418070444,
|
|
"grad_norm": 0.531925874666518,
|
|
"learning_rate": 1.001323267048278e-05,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1674518585205078,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4905.7,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 4.908116385911179,
|
|
"grad_norm": 0.5453180840806279,
|
|
"learning_rate": 9.947127737353306e-06,
|
|
"loss": 0.1853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17539682984352112,
|
|
"step": 3205,
|
|
"valid_targets_mean": 5963.2,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 4.915773353751915,
|
|
"grad_norm": 0.4250839312770307,
|
|
"learning_rate": 9.88116943055615e-06,
|
|
"loss": 0.1715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16129732131958008,
|
|
"step": 3210,
|
|
"valid_targets_mean": 5374.8,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 4.923430321592649,
|
|
"grad_norm": 0.5076367049894257,
|
|
"learning_rate": 9.81535871212729e-06,
|
|
"loss": 0.176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18412069976329803,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4940.6,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 4.931087289433385,
|
|
"grad_norm": 0.46983192162441934,
|
|
"learning_rate": 9.749696541950013e-06,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18911156058311462,
|
|
"step": 3220,
|
|
"valid_targets_mean": 5087.8,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 4.938744257274119,
|
|
"grad_norm": 0.43659187427488777,
|
|
"learning_rate": 9.684183877740985e-06,
|
|
"loss": 0.1618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17905791103839874,
|
|
"step": 3225,
|
|
"valid_targets_mean": 6008.7,
|
|
"valid_targets_min": 3603
|
|
},
|
|
{
|
|
"epoch": 4.946401225114855,
|
|
"grad_norm": 0.4841288841367283,
|
|
"learning_rate": 9.61882167503624e-06,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20017869770526886,
|
|
"step": 3230,
|
|
"valid_targets_mean": 5367.9,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 4.954058192955589,
|
|
"grad_norm": 0.512592936573579,
|
|
"learning_rate": 9.553610887177246e-06,
|
|
"loss": 0.1716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19410035014152527,
|
|
"step": 3235,
|
|
"valid_targets_mean": 4450.0,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 4.961715160796325,
|
|
"grad_norm": 0.5188515954114221,
|
|
"learning_rate": 9.488552465297015e-06,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1623811423778534,
|
|
"step": 3240,
|
|
"valid_targets_mean": 4658.7,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 4.96937212863706,
|
|
"grad_norm": 0.476731013903736,
|
|
"learning_rate": 9.423647358306218e-06,
|
|
"loss": 0.1726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1714991182088852,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4713.1,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 4.977029096477795,
|
|
"grad_norm": 0.5546365002101505,
|
|
"learning_rate": 9.358896512879358e-06,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15670126676559448,
|
|
"step": 3250,
|
|
"valid_targets_mean": 5500.0,
|
|
"valid_targets_min": 2235
|
|
},
|
|
{
|
|
"epoch": 4.98468606431853,
|
|
"grad_norm": 0.4853561972328554,
|
|
"learning_rate": 9.294300873440936e-06,
|
|
"loss": 0.1673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17436271905899048,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4673.0,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 4.992343032159265,
|
|
"grad_norm": 0.5222923370657017,
|
|
"learning_rate": 9.22986138215171e-06,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2152424305677414,
|
|
"step": 3260,
|
|
"valid_targets_mean": 5085.4,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6441979554730815,
|
|
"learning_rate": 9.165578978894937e-06,
|
|
"loss": 0.1734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17200767993927002,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3869.9,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 5.007656967840735,
|
|
"grad_norm": 0.4220356269014037,
|
|
"learning_rate": 9.10145460126265e-06,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17021849751472473,
|
|
"step": 3270,
|
|
"valid_targets_mean": 5500.6,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 5.01531393568147,
|
|
"grad_norm": 0.4289204570010935,
|
|
"learning_rate": 9.03748918454201e-06,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16679969429969788,
|
|
"step": 3275,
|
|
"valid_targets_mean": 6136.5,
|
|
"valid_targets_min": 3099
|
|
},
|
|
{
|
|
"epoch": 5.022970903522205,
|
|
"grad_norm": 0.4712426003028533,
|
|
"learning_rate": 8.973683661701637e-06,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1566212773323059,
|
|
"step": 3280,
|
|
"valid_targets_mean": 5120.7,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 5.03062787136294,
|
|
"grad_norm": 0.4518245512255529,
|
|
"learning_rate": 8.910038963378032e-06,
|
|
"loss": 0.1627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14622384309768677,
|
|
"step": 3285,
|
|
"valid_targets_mean": 4985.9,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 5.038284839203675,
|
|
"grad_norm": 0.4982253074593932,
|
|
"learning_rate": 8.846556017861987e-06,
|
|
"loss": 0.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15284357964992523,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4108.5,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 5.04594180704441,
|
|
"grad_norm": 0.5111382953009579,
|
|
"learning_rate": 8.783235751085016e-06,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16670887172222137,
|
|
"step": 3295,
|
|
"valid_targets_mean": 5229.1,
|
|
"valid_targets_min": 1762
|
|
},
|
|
{
|
|
"epoch": 5.053598774885145,
|
|
"grad_norm": 0.508139696790504,
|
|
"learning_rate": 8.72007908660593e-06,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17335477471351624,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4433.8,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 5.061255742725881,
|
|
"grad_norm": 0.4971355581613195,
|
|
"learning_rate": 8.657086945597273e-06,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1560318022966385,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4618.1,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 5.068912710566615,
|
|
"grad_norm": 0.519951349237222,
|
|
"learning_rate": 8.594260246831954e-06,
|
|
"loss": 0.1624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17906685173511505,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4824.6,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 5.076569678407351,
|
|
"grad_norm": 0.48195723569060445,
|
|
"learning_rate": 8.531599906669802e-06,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17248311638832092,
|
|
"step": 3315,
|
|
"valid_targets_mean": 5084.2,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 5.084226646248085,
|
|
"grad_norm": 0.5032723828958181,
|
|
"learning_rate": 8.469106839044232e-06,
|
|
"loss": 0.1708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18109896779060364,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4405.3,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 5.091883614088821,
|
|
"grad_norm": 0.45559814554047867,
|
|
"learning_rate": 8.406781955448913e-06,
|
|
"loss": 0.1537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16298320889472961,
|
|
"step": 3325,
|
|
"valid_targets_mean": 5751.0,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 5.099540581929556,
|
|
"grad_norm": 0.4939004046341742,
|
|
"learning_rate": 8.344626164924436e-06,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1718837022781372,
|
|
"step": 3330,
|
|
"valid_targets_mean": 5382.8,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 5.107197549770291,
|
|
"grad_norm": 0.5085695929621461,
|
|
"learning_rate": 8.28264037404511e-06,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1606028974056244,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4314.8,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 5.114854517611026,
|
|
"grad_norm": 0.5096351984468025,
|
|
"learning_rate": 8.220825486905686e-06,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14969605207443237,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4719.4,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 5.122511485451761,
|
|
"grad_norm": 0.47515607105527047,
|
|
"learning_rate": 8.159182405108222e-06,
|
|
"loss": 0.1571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1551935225725174,
|
|
"step": 3345,
|
|
"valid_targets_mean": 5535.8,
|
|
"valid_targets_min": 2886
|
|
},
|
|
{
|
|
"epoch": 5.130168453292496,
|
|
"grad_norm": 0.4465656432414435,
|
|
"learning_rate": 8.097712027748879e-06,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1423056423664093,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4984.5,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 5.137825421133231,
|
|
"grad_norm": 0.5427524948981346,
|
|
"learning_rate": 8.036415251404855e-06,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1889774203300476,
|
|
"step": 3355,
|
|
"valid_targets_mean": 5009.1,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 5.145482388973966,
|
|
"grad_norm": 0.4892462664411317,
|
|
"learning_rate": 7.975292970121286e-06,
|
|
"loss": 0.152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15726116299629211,
|
|
"step": 3360,
|
|
"valid_targets_mean": 5218.2,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 5.153139356814702,
|
|
"grad_norm": 0.4770823262528399,
|
|
"learning_rate": 7.914346075398191e-06,
|
|
"loss": 0.1634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16496963798999786,
|
|
"step": 3365,
|
|
"valid_targets_mean": 4660.6,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 5.160796324655436,
|
|
"grad_norm": 0.8056092258122578,
|
|
"learning_rate": 7.85357545617751e-06,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13913197815418243,
|
|
"step": 3370,
|
|
"valid_targets_mean": 4270.4,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 5.168453292496172,
|
|
"grad_norm": 0.5061408770101286,
|
|
"learning_rate": 7.792981998830092e-06,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16775824129581451,
|
|
"step": 3375,
|
|
"valid_targets_mean": 5022.6,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 5.176110260336906,
|
|
"grad_norm": 0.6037385627128377,
|
|
"learning_rate": 7.732566587142793e-06,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17123952507972717,
|
|
"step": 3380,
|
|
"valid_targets_mean": 4231.1,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 5.183767228177642,
|
|
"grad_norm": 0.5311926260698461,
|
|
"learning_rate": 7.672330102305596e-06,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19350102543830872,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4315.9,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 5.191424196018377,
|
|
"grad_norm": 0.5044523176604377,
|
|
"learning_rate": 7.612273422898726e-06,
|
|
"loss": 0.1562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19003711640834808,
|
|
"step": 3390,
|
|
"valid_targets_mean": 4566.2,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 5.199081163859112,
|
|
"grad_norm": 0.6176433774855741,
|
|
"learning_rate": 7.5523974248798714e-06,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16127054393291473,
|
|
"step": 3395,
|
|
"valid_targets_mean": 5243.8,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 5.206738131699847,
|
|
"grad_norm": 0.5162091315973347,
|
|
"learning_rate": 7.492702981571363e-06,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1597318947315216,
|
|
"step": 3400,
|
|
"valid_targets_mean": 4938.6,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 5.214395099540582,
|
|
"grad_norm": 0.8676299780329471,
|
|
"learning_rate": 7.433190963647488e-06,
|
|
"loss": 0.1611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14614874124526978,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4358.2,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 5.222052067381317,
|
|
"grad_norm": 0.4774915723911,
|
|
"learning_rate": 7.373862239121743e-06,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16033905744552612,
|
|
"step": 3410,
|
|
"valid_targets_mean": 4894.8,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 5.229709035222052,
|
|
"grad_norm": 0.5040954116535451,
|
|
"learning_rate": 7.314717673334213e-06,
|
|
"loss": 0.1672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16919949650764465,
|
|
"step": 3415,
|
|
"valid_targets_mean": 6000.2,
|
|
"valid_targets_min": 3105
|
|
},
|
|
{
|
|
"epoch": 5.237366003062787,
|
|
"grad_norm": 0.4912501104493769,
|
|
"learning_rate": 7.255758128938934e-06,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20043662190437317,
|
|
"step": 3420,
|
|
"valid_targets_mean": 6128.0,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 5.2450229709035225,
|
|
"grad_norm": 0.530597468241474,
|
|
"learning_rate": 7.196984465891288e-06,
|
|
"loss": 0.1666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16545242071151733,
|
|
"step": 3425,
|
|
"valid_targets_mean": 4632.1,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 5.252679938744257,
|
|
"grad_norm": 0.4704553688188211,
|
|
"learning_rate": 7.138397541435513e-06,
|
|
"loss": 0.1751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16121527552604675,
|
|
"step": 3430,
|
|
"valid_targets_mean": 5139.7,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 5.2603369065849925,
|
|
"grad_norm": 0.5036246695889622,
|
|
"learning_rate": 7.079998210092132e-06,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18528419733047485,
|
|
"step": 3435,
|
|
"valid_targets_mean": 4868.1,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 5.267993874425727,
|
|
"grad_norm": 0.5946952911939519,
|
|
"learning_rate": 7.021787323645557e-06,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1626713126897812,
|
|
"step": 3440,
|
|
"valid_targets_mean": 5682.8,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.2756508422664625,
|
|
"grad_norm": 0.44380451085617817,
|
|
"learning_rate": 6.963765731131622e-06,
|
|
"loss": 0.1619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13598023355007172,
|
|
"step": 3445,
|
|
"valid_targets_mean": 5084.1,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 5.283307810107198,
|
|
"grad_norm": 0.44937678766204686,
|
|
"learning_rate": 6.9059342788252035e-06,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15446916222572327,
|
|
"step": 3450,
|
|
"valid_targets_mean": 5280.4,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 5.2909647779479325,
|
|
"grad_norm": 0.5423325722642703,
|
|
"learning_rate": 6.848293810227901e-06,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16436201333999634,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3904.1,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 5.298621745788668,
|
|
"grad_norm": 0.5230875510151383,
|
|
"learning_rate": 6.790845166055699e-06,
|
|
"loss": 0.1737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1833193153142929,
|
|
"step": 3460,
|
|
"valid_targets_mean": 4362.5,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 5.3062787136294025,
|
|
"grad_norm": 0.47630676748649625,
|
|
"learning_rate": 6.733589184226747e-06,
|
|
"loss": 0.1593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14603200554847717,
|
|
"step": 3465,
|
|
"valid_targets_mean": 5092.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 5.313935681470138,
|
|
"grad_norm": 0.47996042244501036,
|
|
"learning_rate": 6.676526699849086e-06,
|
|
"loss": 0.1627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17878413200378418,
|
|
"step": 3470,
|
|
"valid_targets_mean": 5376.2,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 5.3215926493108725,
|
|
"grad_norm": 0.49793974395131096,
|
|
"learning_rate": 6.619658545208523e-06,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17316502332687378,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4947.2,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 5.329249617151608,
|
|
"grad_norm": 0.5080242803851914,
|
|
"learning_rate": 6.562985549756448e-06,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14953093230724335,
|
|
"step": 3480,
|
|
"valid_targets_mean": 4898.6,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 5.336906584992343,
|
|
"grad_norm": 0.514443547063424,
|
|
"learning_rate": 6.506508540097769e-06,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16423040628433228,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5819.7,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 5.344563552833078,
|
|
"grad_norm": 0.4584620928236497,
|
|
"learning_rate": 6.450228339978832e-06,
|
|
"loss": 0.1637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17829495668411255,
|
|
"step": 3490,
|
|
"valid_targets_mean": 5519.6,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 5.352220520673813,
|
|
"grad_norm": 0.47394337082009697,
|
|
"learning_rate": 6.394145770275402e-06,
|
|
"loss": 0.1545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1514066755771637,
|
|
"step": 3495,
|
|
"valid_targets_mean": 5396.5,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 5.359877488514548,
|
|
"grad_norm": 0.4729834195916555,
|
|
"learning_rate": 6.338261648980728e-06,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16408702731132507,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4929.8,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 5.367534456355283,
|
|
"grad_norm": 0.4695111134489663,
|
|
"learning_rate": 6.282576791193557e-06,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14791680872440338,
|
|
"step": 3505,
|
|
"valid_targets_mean": 4815.1,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 5.375191424196018,
|
|
"grad_norm": 0.6948673104574568,
|
|
"learning_rate": 6.227092009106301e-06,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1767755150794983,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3131.2,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 5.382848392036753,
|
|
"grad_norm": 0.5735437955530691,
|
|
"learning_rate": 6.171808111993158e-06,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17738190293312073,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4039.9,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 5.390505359877489,
|
|
"grad_norm": 0.4761051088696013,
|
|
"learning_rate": 6.116725906198297e-06,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1486128270626068,
|
|
"step": 3520,
|
|
"valid_targets_mean": 4567.4,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 5.398162327718223,
|
|
"grad_norm": 0.4579204274900233,
|
|
"learning_rate": 6.061846195124144e-06,
|
|
"loss": 0.1677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18317697942256927,
|
|
"step": 3525,
|
|
"valid_targets_mean": 5270.5,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 5.405819295558959,
|
|
"grad_norm": 0.41159114268724806,
|
|
"learning_rate": 6.007169779219606e-06,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442817747592926,
|
|
"step": 3530,
|
|
"valid_targets_mean": 5373.3,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 5.413476263399693,
|
|
"grad_norm": 0.665150948244762,
|
|
"learning_rate": 5.952697455968444e-06,
|
|
"loss": 0.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17882975935935974,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3959.0,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 5.421133231240429,
|
|
"grad_norm": 0.5112194729196444,
|
|
"learning_rate": 5.898430019877626e-06,
|
|
"loss": 0.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1770792305469513,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4908.1,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 5.428790199081164,
|
|
"grad_norm": 0.49899364818543357,
|
|
"learning_rate": 5.8443682624657095e-06,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14375297725200653,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4505.1,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 5.436447166921899,
|
|
"grad_norm": 0.5057554070553661,
|
|
"learning_rate": 5.790512972251356e-06,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14775539934635162,
|
|
"step": 3550,
|
|
"valid_targets_mean": 5433.1,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 5.444104134762634,
|
|
"grad_norm": 0.4379349439343321,
|
|
"learning_rate": 5.736864934741764e-06,
|
|
"loss": 0.1621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14294332265853882,
|
|
"step": 3555,
|
|
"valid_targets_mean": 5075.1,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 5.451761102603369,
|
|
"grad_norm": 0.46265699177561026,
|
|
"learning_rate": 5.683424932421273e-06,
|
|
"loss": 0.1556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15184825658798218,
|
|
"step": 3560,
|
|
"valid_targets_mean": 5332.7,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 5.459418070444104,
|
|
"grad_norm": 0.5952621899721654,
|
|
"learning_rate": 5.630193744739896e-06,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19496630132198334,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4201.9,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 5.46707503828484,
|
|
"grad_norm": 0.48070697420874536,
|
|
"learning_rate": 5.577172148101993e-06,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16540558636188507,
|
|
"step": 3570,
|
|
"valid_targets_mean": 4890.1,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 5.474732006125574,
|
|
"grad_norm": 0.6848512357907565,
|
|
"learning_rate": 5.52436091585493e-06,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17848747968673706,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4479.4,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 5.48238897396631,
|
|
"grad_norm": 0.47722149531129565,
|
|
"learning_rate": 5.471760818277792e-06,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17572689056396484,
|
|
"step": 3580,
|
|
"valid_targets_mean": 5303.5,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 5.490045941807044,
|
|
"grad_norm": 0.5271976003353499,
|
|
"learning_rate": 5.419372622570169e-06,
|
|
"loss": 0.1557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15122967958450317,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4763.6,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 5.49770290964778,
|
|
"grad_norm": 0.7659869620019119,
|
|
"learning_rate": 5.367197092840932e-06,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17659571766853333,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4276.9,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 5.505359877488514,
|
|
"grad_norm": 0.5759762429875664,
|
|
"learning_rate": 5.315234990097131e-06,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14629271626472473,
|
|
"step": 3595,
|
|
"valid_targets_mean": 5444.4,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 5.51301684532925,
|
|
"grad_norm": 0.4702162740174821,
|
|
"learning_rate": 5.263487072232851e-06,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15715420246124268,
|
|
"step": 3600,
|
|
"valid_targets_mean": 5552.1,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 5.520673813169985,
|
|
"grad_norm": 0.5664693454943723,
|
|
"learning_rate": 5.211954094018201e-06,
|
|
"loss": 0.1737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2129855751991272,
|
|
"step": 3605,
|
|
"valid_targets_mean": 4471.0,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 5.52833078101072,
|
|
"grad_norm": 0.439009368809084,
|
|
"learning_rate": 5.160636807088277e-06,
|
|
"loss": 0.1594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1415124386548996,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4938.8,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 5.535987748851455,
|
|
"grad_norm": 0.4784953299934146,
|
|
"learning_rate": 5.109535959932195e-06,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16233739256858826,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4896.0,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 5.54364471669219,
|
|
"grad_norm": 0.48393748501482625,
|
|
"learning_rate": 5.058652297882205e-06,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16299815475940704,
|
|
"step": 3620,
|
|
"valid_targets_mean": 4792.2,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 5.551301684532925,
|
|
"grad_norm": 0.4788193457811368,
|
|
"learning_rate": 5.007986563102778e-06,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13831590116024017,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5167.0,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 5.55895865237366,
|
|
"grad_norm": 0.5713684082322842,
|
|
"learning_rate": 4.9575394945798236e-06,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15097324550151825,
|
|
"step": 3630,
|
|
"valid_targets_mean": 4927.1,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 5.566615620214395,
|
|
"grad_norm": 0.5133104989832143,
|
|
"learning_rate": 4.9073118281098845e-06,
|
|
"loss": 0.1882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19625042378902435,
|
|
"step": 3635,
|
|
"valid_targets_mean": 5513.1,
|
|
"valid_targets_min": 2604
|
|
},
|
|
{
|
|
"epoch": 5.5742725880551305,
|
|
"grad_norm": 0.454081092007902,
|
|
"learning_rate": 4.857304296289398e-06,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15363352000713348,
|
|
"step": 3640,
|
|
"valid_targets_mean": 5523.7,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 5.581929555895865,
|
|
"grad_norm": 0.46884469793283995,
|
|
"learning_rate": 4.807517628504048e-06,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17339780926704407,
|
|
"step": 3645,
|
|
"valid_targets_mean": 5304.4,
|
|
"valid_targets_min": 1990
|
|
},
|
|
{
|
|
"epoch": 5.5895865237366005,
|
|
"grad_norm": 0.45801637501032694,
|
|
"learning_rate": 4.757952550918077e-06,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14689268171787262,
|
|
"step": 3650,
|
|
"valid_targets_mean": 5640.2,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 5.597243491577335,
|
|
"grad_norm": 0.5120675322489687,
|
|
"learning_rate": 4.7086097864637444e-06,
|
|
"loss": 0.1574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1544780433177948,
|
|
"step": 3655,
|
|
"valid_targets_mean": 5030.8,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 5.6049004594180705,
|
|
"grad_norm": 0.567671223880722,
|
|
"learning_rate": 4.659490054830729e-06,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1720244139432907,
|
|
"step": 3660,
|
|
"valid_targets_mean": 4914.8,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 5.612557427258805,
|
|
"grad_norm": 0.43509274496020417,
|
|
"learning_rate": 4.6105940724557e-06,
|
|
"loss": 0.1625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15081757307052612,
|
|
"step": 3665,
|
|
"valid_targets_mean": 6018.0,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 5.6202143950995405,
|
|
"grad_norm": 0.5912507132310114,
|
|
"learning_rate": 4.561922552511788e-06,
|
|
"loss": 0.1624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17240439355373383,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3949.9,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 5.627871362940276,
|
|
"grad_norm": 0.4793859887692694,
|
|
"learning_rate": 4.5134762048982485e-06,
|
|
"loss": 0.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1666552871465683,
|
|
"step": 3675,
|
|
"valid_targets_mean": 5170.1,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 5.6355283307810105,
|
|
"grad_norm": 0.45725888831198136,
|
|
"learning_rate": 4.465255736230076e-06,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15449725091457367,
|
|
"step": 3680,
|
|
"valid_targets_mean": 5772.3,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.643185298621746,
|
|
"grad_norm": 0.4785523703342554,
|
|
"learning_rate": 4.417261849827696e-06,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19263219833374023,
|
|
"step": 3685,
|
|
"valid_targets_mean": 5215.2,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 5.650842266462481,
|
|
"grad_norm": 0.4757831273839956,
|
|
"learning_rate": 4.369495245706729e-06,
|
|
"loss": 0.1618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1494811326265335,
|
|
"step": 3690,
|
|
"valid_targets_mean": 5826.9,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 5.658499234303216,
|
|
"grad_norm": 0.4702461000587345,
|
|
"learning_rate": 4.321956620567751e-06,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1776936650276184,
|
|
"step": 3695,
|
|
"valid_targets_mean": 5043.8,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 5.666156202143951,
|
|
"grad_norm": 0.46987683467332103,
|
|
"learning_rate": 4.274646667786157e-06,
|
|
"loss": 0.1557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15853236615657806,
|
|
"step": 3700,
|
|
"valid_targets_mean": 4678.7,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 5.673813169984686,
|
|
"grad_norm": 0.4680178929081938,
|
|
"learning_rate": 4.227566077402041e-06,
|
|
"loss": 0.1647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1535928100347519,
|
|
"step": 3705,
|
|
"valid_targets_mean": 4963.2,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 5.681470137825421,
|
|
"grad_norm": 0.44046042021206055,
|
|
"learning_rate": 4.180715536110112e-06,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13984829187393188,
|
|
"step": 3710,
|
|
"valid_targets_mean": 5743.1,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 5.689127105666156,
|
|
"grad_norm": 0.5050717822406969,
|
|
"learning_rate": 4.1340957272497115e-06,
|
|
"loss": 0.1557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14051397144794464,
|
|
"step": 3715,
|
|
"valid_targets_mean": 4413.8,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 5.696784073506891,
|
|
"grad_norm": 0.609576054055646,
|
|
"learning_rate": 4.087707330794814e-06,
|
|
"loss": 0.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14713960886001587,
|
|
"step": 3720,
|
|
"valid_targets_mean": 5758.8,
|
|
"valid_targets_min": 1866
|
|
},
|
|
{
|
|
"epoch": 5.704441041347627,
|
|
"grad_norm": 0.5071420592448429,
|
|
"learning_rate": 4.041551023344139e-06,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17213720083236694,
|
|
"step": 3725,
|
|
"valid_targets_mean": 5253.4,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 5.712098009188361,
|
|
"grad_norm": 0.46046580007849597,
|
|
"learning_rate": 3.995627478111264e-06,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1543341875076294,
|
|
"step": 3730,
|
|
"valid_targets_mean": 5808.6,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 5.719754977029097,
|
|
"grad_norm": 0.4717948081992276,
|
|
"learning_rate": 3.949937364914798e-06,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14789888262748718,
|
|
"step": 3735,
|
|
"valid_targets_mean": 5842.7,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 5.727411944869831,
|
|
"grad_norm": 0.472692526766977,
|
|
"learning_rate": 3.904481350168641e-06,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18231379985809326,
|
|
"step": 3740,
|
|
"valid_targets_mean": 5480.3,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 5.735068912710567,
|
|
"grad_norm": 0.4774164157350961,
|
|
"learning_rate": 3.8592600968722285e-06,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17668023705482483,
|
|
"step": 3745,
|
|
"valid_targets_mean": 5286.9,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 5.742725880551301,
|
|
"grad_norm": 0.5089014992120109,
|
|
"learning_rate": 3.814274264600899e-06,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15314318239688873,
|
|
"step": 3750,
|
|
"valid_targets_mean": 4953.4,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 5.750382848392037,
|
|
"grad_norm": 0.5179707303157852,
|
|
"learning_rate": 3.7695245094962228e-06,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16455323994159698,
|
|
"step": 3755,
|
|
"valid_targets_mean": 5168.4,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 5.758039816232772,
|
|
"grad_norm": 0.49543638175235816,
|
|
"learning_rate": 3.7250114842565087e-06,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15587033331394196,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4512.6,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 5.765696784073507,
|
|
"grad_norm": 0.5025738435987257,
|
|
"learning_rate": 3.6807358381271963e-06,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2389712631702423,
|
|
"step": 3765,
|
|
"valid_targets_mean": 5495.2,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 5.773353751914242,
|
|
"grad_norm": 0.44563197876271343,
|
|
"learning_rate": 3.6366982168914456e-06,
|
|
"loss": 0.1625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14085888862609863,
|
|
"step": 3770,
|
|
"valid_targets_mean": 5305.1,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 5.781010719754977,
|
|
"grad_norm": 0.4711699929533897,
|
|
"learning_rate": 3.5928992628607075e-06,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14801645278930664,
|
|
"step": 3775,
|
|
"valid_targets_mean": 5146.2,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 5.788667687595712,
|
|
"grad_norm": 0.45863808724665833,
|
|
"learning_rate": 3.549339614865328e-06,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17908339202404022,
|
|
"step": 3780,
|
|
"valid_targets_mean": 5437.6,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 5.796324655436447,
|
|
"grad_norm": 0.5602836811382915,
|
|
"learning_rate": 3.506019908245275e-06,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15862153470516205,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4928.1,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 5.803981623277182,
|
|
"grad_norm": 0.5625845577017787,
|
|
"learning_rate": 3.462940774840826e-06,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18470574915409088,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4665.9,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 5.811638591117918,
|
|
"grad_norm": 0.5156477028052144,
|
|
"learning_rate": 3.4201028429833883e-06,
|
|
"loss": 0.1529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14838847517967224,
|
|
"step": 3795,
|
|
"valid_targets_mean": 5494.6,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 5.819295558958652,
|
|
"grad_norm": 0.4397002728342675,
|
|
"learning_rate": 3.37750673748632e-06,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14019045233726501,
|
|
"step": 3800,
|
|
"valid_targets_mean": 5286.6,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 5.826952526799388,
|
|
"grad_norm": 0.48750838244321903,
|
|
"learning_rate": 3.3351530796358024e-06,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1837264895439148,
|
|
"step": 3805,
|
|
"valid_targets_mean": 5369.9,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 5.834609494640122,
|
|
"grad_norm": 0.575464842240813,
|
|
"learning_rate": 3.2930424871818145e-06,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1724276840686798,
|
|
"step": 3810,
|
|
"valid_targets_mean": 5045.8,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 5.842266462480858,
|
|
"grad_norm": 0.5014265385817863,
|
|
"learning_rate": 3.2511755743290774e-06,
|
|
"loss": 0.1676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16347980499267578,
|
|
"step": 3815,
|
|
"valid_targets_mean": 5700.8,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 5.849923430321593,
|
|
"grad_norm": 0.4971195883757721,
|
|
"learning_rate": 3.2095529517281365e-06,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15201014280319214,
|
|
"step": 3820,
|
|
"valid_targets_mean": 4555.1,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 5.857580398162328,
|
|
"grad_norm": 0.5008524466215926,
|
|
"learning_rate": 3.1681752264664387e-06,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15063107013702393,
|
|
"step": 3825,
|
|
"valid_targets_mean": 4804.3,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 5.865237366003063,
|
|
"grad_norm": 0.5064320357447261,
|
|
"learning_rate": 3.12704300205946e-06,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16848312318325043,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4334.7,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 5.8728943338437976,
|
|
"grad_norm": 0.5582372456603737,
|
|
"learning_rate": 3.0861568784419393e-06,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15561410784721375,
|
|
"step": 3835,
|
|
"valid_targets_mean": 4067.6,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 5.880551301684533,
|
|
"grad_norm": 0.48015271820719657,
|
|
"learning_rate": 3.0455174519590926e-06,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17181405425071716,
|
|
"step": 3840,
|
|
"valid_targets_mean": 5108.1,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 5.888208269525268,
|
|
"grad_norm": 0.5032917025542417,
|
|
"learning_rate": 3.0051253153579373e-06,
|
|
"loss": 0.1554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15245842933654785,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4856.6,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 5.895865237366003,
|
|
"grad_norm": 0.4568927865552627,
|
|
"learning_rate": 2.964981057778644e-06,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15734651684761047,
|
|
"step": 3850,
|
|
"valid_targets_mean": 5474.4,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 5.903522205206738,
|
|
"grad_norm": 0.5333563811950146,
|
|
"learning_rate": 2.9250852647459418e-06,
|
|
"loss": 0.1591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14090844988822937,
|
|
"step": 3855,
|
|
"valid_targets_mean": 4721.5,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 5.911179173047473,
|
|
"grad_norm": 0.4809515679578391,
|
|
"learning_rate": 2.8854385181605594e-06,
|
|
"loss": 0.1684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1540873646736145,
|
|
"step": 3860,
|
|
"valid_targets_mean": 5109.4,
|
|
"valid_targets_min": 1901
|
|
},
|
|
{
|
|
"epoch": 5.918836140888208,
|
|
"grad_norm": 0.48239544970207976,
|
|
"learning_rate": 2.8460413962907705e-06,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17352712154388428,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4727.1,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 5.926493108728943,
|
|
"grad_norm": 0.5337046272163664,
|
|
"learning_rate": 2.8068944737639436e-06,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16555720567703247,
|
|
"step": 3870,
|
|
"valid_targets_mean": 4828.6,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 5.934150076569678,
|
|
"grad_norm": 0.475103452331955,
|
|
"learning_rate": 2.7679983215581474e-06,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19165559113025665,
|
|
"step": 3875,
|
|
"valid_targets_mean": 6190.2,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 5.941807044410414,
|
|
"grad_norm": 0.5134784747632062,
|
|
"learning_rate": 2.72935350699385e-06,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15154170989990234,
|
|
"step": 3880,
|
|
"valid_targets_mean": 4244.9,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 5.949464012251148,
|
|
"grad_norm": 0.43408459090260726,
|
|
"learning_rate": 2.69096059372562e-06,
|
|
"loss": 0.1677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1547510176897049,
|
|
"step": 3885,
|
|
"valid_targets_mean": 5395.6,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 5.957120980091884,
|
|
"grad_norm": 0.6503354175258022,
|
|
"learning_rate": 2.6528201417339205e-06,
|
|
"loss": 0.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1949981451034546,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3938.8,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 5.964777947932618,
|
|
"grad_norm": 0.5246115940102106,
|
|
"learning_rate": 2.614932707316942e-06,
|
|
"loss": 0.1604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17228254675865173,
|
|
"step": 3895,
|
|
"valid_targets_mean": 4690.4,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 5.972434915773354,
|
|
"grad_norm": 0.4986410635561786,
|
|
"learning_rate": 2.5772988430824697e-06,
|
|
"loss": 0.1512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16765034198760986,
|
|
"step": 3900,
|
|
"valid_targets_mean": 5091.7,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 5.980091883614088,
|
|
"grad_norm": 0.41080740486480766,
|
|
"learning_rate": 2.5399190979398493e-06,
|
|
"loss": 0.1605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1336369514465332,
|
|
"step": 3905,
|
|
"valid_targets_mean": 6159.9,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 5.987748851454824,
|
|
"grad_norm": 0.42306492813111624,
|
|
"learning_rate": 2.5027940170919583e-06,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14831745624542236,
|
|
"step": 3910,
|
|
"valid_targets_mean": 6090.9,
|
|
"valid_targets_min": 2635
|
|
},
|
|
{
|
|
"epoch": 5.995405819295559,
|
|
"grad_norm": 0.4748890363586371,
|
|
"learning_rate": 2.4659241420272716e-06,
|
|
"loss": 0.16,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1577981561422348,
|
|
"step": 3915,
|
|
"valid_targets_mean": 5881.8,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 6.003062787136294,
|
|
"grad_norm": 0.46055683667700925,
|
|
"learning_rate": 2.429310010511956e-06,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16992008686065674,
|
|
"step": 3920,
|
|
"valid_targets_mean": 5102.2,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 6.010719754977029,
|
|
"grad_norm": 0.42406910050920277,
|
|
"learning_rate": 2.392952156582018e-06,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13615508377552032,
|
|
"step": 3925,
|
|
"valid_targets_mean": 5358.2,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 6.018376722817764,
|
|
"grad_norm": 0.4799597914576974,
|
|
"learning_rate": 2.3568511105355363e-06,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1690567135810852,
|
|
"step": 3930,
|
|
"valid_targets_mean": 4816.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 6.026033690658499,
|
|
"grad_norm": 0.4808874892486829,
|
|
"learning_rate": 2.321007398924897e-06,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14551788568496704,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4578.4,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 6.033690658499235,
|
|
"grad_norm": 0.44957556088245915,
|
|
"learning_rate": 2.2854215445491467e-06,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15582698583602905,
|
|
"step": 3940,
|
|
"valid_targets_mean": 5508.3,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 6.041347626339969,
|
|
"grad_norm": 0.5135673979652564,
|
|
"learning_rate": 2.250094066446342e-06,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1671498417854309,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4560.2,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 6.049004594180705,
|
|
"grad_norm": 0.5547826648523004,
|
|
"learning_rate": 2.215025479885999e-06,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16672340035438538,
|
|
"step": 3950,
|
|
"valid_targets_mean": 4601.8,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 6.056661562021439,
|
|
"grad_norm": 0.4009473872690044,
|
|
"learning_rate": 2.180216296361548e-06,
|
|
"loss": 0.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12265469133853912,
|
|
"step": 3955,
|
|
"valid_targets_mean": 6534.8,
|
|
"valid_targets_min": 1368
|
|
},
|
|
{
|
|
"epoch": 6.064318529862175,
|
|
"grad_norm": 0.4564705546836802,
|
|
"learning_rate": 2.145667023582907e-06,
|
|
"loss": 0.1611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.144794762134552,
|
|
"step": 3960,
|
|
"valid_targets_mean": 5498.0,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 6.071975497702909,
|
|
"grad_norm": 0.5051561406739382,
|
|
"learning_rate": 2.1113781654690624e-06,
|
|
"loss": 0.1574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1356779932975769,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4497.1,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 6.079632465543645,
|
|
"grad_norm": 0.47564995155516737,
|
|
"learning_rate": 2.077350222140704e-06,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1437039077281952,
|
|
"step": 3970,
|
|
"valid_targets_mean": 5306.3,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 6.08728943338438,
|
|
"grad_norm": 0.5940128337403686,
|
|
"learning_rate": 2.0435836899129624e-06,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19864995777606964,
|
|
"step": 3975,
|
|
"valid_targets_mean": 4722.4,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 6.094946401225115,
|
|
"grad_norm": 0.49070285695934485,
|
|
"learning_rate": 2.0100790612881392e-06,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19582757353782654,
|
|
"step": 3980,
|
|
"valid_targets_mean": 5257.1,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 6.10260336906585,
|
|
"grad_norm": 0.46266996157161816,
|
|
"learning_rate": 1.9768368249485427e-06,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16311945021152496,
|
|
"step": 3985,
|
|
"valid_targets_mean": 5434.1,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 6.110260336906585,
|
|
"grad_norm": 0.46875844903787695,
|
|
"learning_rate": 1.9438574657493547e-06,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.148982971906662,
|
|
"step": 3990,
|
|
"valid_targets_mean": 5669.6,
|
|
"valid_targets_min": 3120
|
|
},
|
|
{
|
|
"epoch": 6.11791730474732,
|
|
"grad_norm": 0.4856031191051758,
|
|
"learning_rate": 1.9111414647115545e-06,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14689423143863678,
|
|
"step": 3995,
|
|
"valid_targets_mean": 5054.9,
|
|
"valid_targets_min": 2586
|
|
},
|
|
{
|
|
"epoch": 6.1255742725880555,
|
|
"grad_norm": 0.6163929918826091,
|
|
"learning_rate": 1.878689299014913e-06,
|
|
"loss": 0.1644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15235333144664764,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4759.5,
|
|
"valid_targets_min": 1983
|
|
},
|
|
{
|
|
"epoch": 6.13323124042879,
|
|
"grad_norm": 0.429934054934571,
|
|
"learning_rate": 1.8465014419910155e-06,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1379457414150238,
|
|
"step": 4005,
|
|
"valid_targets_mean": 6366.9,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 6.1408882082695255,
|
|
"grad_norm": 0.7233086750971975,
|
|
"learning_rate": 1.8145783631163772e-06,
|
|
"loss": 0.1685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18254977464675903,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3828.1,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 6.14854517611026,
|
|
"grad_norm": 0.5027392672006016,
|
|
"learning_rate": 1.7829205280055938e-06,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1422107219696045,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4783.3,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 6.1562021439509955,
|
|
"grad_norm": 0.5617038740562822,
|
|
"learning_rate": 1.7515283984045228e-06,
|
|
"loss": 0.1608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1379905641078949,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3973.9,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 6.16385911179173,
|
|
"grad_norm": 0.48304445431238197,
|
|
"learning_rate": 1.7204024321835944e-06,
|
|
"loss": 0.1608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14369803667068481,
|
|
"step": 4025,
|
|
"valid_targets_mean": 5558.6,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 6.1715160796324655,
|
|
"grad_norm": 0.5855970422459711,
|
|
"learning_rate": 1.6895430833310844e-06,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1536564975976944,
|
|
"step": 4030,
|
|
"valid_targets_mean": 3549.4,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 6.179173047473201,
|
|
"grad_norm": 0.48239528326391967,
|
|
"learning_rate": 1.6589508019465395e-06,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16883978247642517,
|
|
"step": 4035,
|
|
"valid_targets_mean": 5138.6,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 6.1868300153139355,
|
|
"grad_norm": 0.5343661046550146,
|
|
"learning_rate": 1.628626034234173e-06,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16661593317985535,
|
|
"step": 4040,
|
|
"valid_targets_mean": 5237.5,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 6.194486983154671,
|
|
"grad_norm": 0.42129658187000146,
|
|
"learning_rate": 1.5985692224963844e-06,
|
|
"loss": 0.1571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12375140190124512,
|
|
"step": 4045,
|
|
"valid_targets_mean": 6090.9,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 6.2021439509954055,
|
|
"grad_norm": 0.4842019759636806,
|
|
"learning_rate": 1.5687808051272835e-06,
|
|
"loss": 0.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18923643231391907,
|
|
"step": 4050,
|
|
"valid_targets_mean": 5465.9,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 6.209800918836141,
|
|
"grad_norm": 1.005454460289286,
|
|
"learning_rate": 1.5392612166063203e-06,
|
|
"loss": 0.1691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18108555674552917,
|
|
"step": 4055,
|
|
"valid_targets_mean": 3432.4,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 6.217457886676876,
|
|
"grad_norm": 0.5016868489631979,
|
|
"learning_rate": 1.5100108874919395e-06,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12715457379817963,
|
|
"step": 4060,
|
|
"valid_targets_mean": 4946.0,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 6.225114854517611,
|
|
"grad_norm": 0.49871078521671197,
|
|
"learning_rate": 1.4810302444152868e-06,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17101529240608215,
|
|
"step": 4065,
|
|
"valid_targets_mean": 5428.0,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 6.232771822358346,
|
|
"grad_norm": 0.44896200586577506,
|
|
"learning_rate": 1.4523197100740127e-06,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14551226794719696,
|
|
"step": 4070,
|
|
"valid_targets_mean": 5273.6,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 6.240428790199081,
|
|
"grad_norm": 0.5369466730400164,
|
|
"learning_rate": 1.423879703226072e-06,
|
|
"loss": 0.1595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18292181193828583,
|
|
"step": 4075,
|
|
"valid_targets_mean": 4546.9,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 6.248085758039816,
|
|
"grad_norm": 0.4534004010866802,
|
|
"learning_rate": 1.3957106386836584e-06,
|
|
"loss": 0.1553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14585313200950623,
|
|
"step": 4080,
|
|
"valid_targets_mean": 5585.4,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 6.255742725880551,
|
|
"grad_norm": 0.4562447826282773,
|
|
"learning_rate": 1.3678129273071194e-06,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18082283437252045,
|
|
"step": 4085,
|
|
"valid_targets_mean": 5402.2,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 6.263399693721286,
|
|
"grad_norm": 0.5446884739075963,
|
|
"learning_rate": 1.340186975998976e-06,
|
|
"loss": 0.1621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15701700747013092,
|
|
"step": 4090,
|
|
"valid_targets_mean": 4313.9,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 6.271056661562022,
|
|
"grad_norm": 0.5562186277826435,
|
|
"learning_rate": 1.3128331876979994e-06,
|
|
"loss": 0.1523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1641978621482849,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4410.6,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 6.278713629402756,
|
|
"grad_norm": 0.4458800063927126,
|
|
"learning_rate": 1.285751961373305e-06,
|
|
"loss": 0.1594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14536157250404358,
|
|
"step": 4100,
|
|
"valid_targets_mean": 5645.1,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 6.286370597243492,
|
|
"grad_norm": 0.5283911789373491,
|
|
"learning_rate": 1.2589436920185661e-06,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16219042241573334,
|
|
"step": 4105,
|
|
"valid_targets_mean": 4158.7,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 6.294027565084226,
|
|
"grad_norm": 0.5094779670955533,
|
|
"learning_rate": 1.232408770646234e-06,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17320656776428223,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4912.1,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 6.301684532924962,
|
|
"grad_norm": 0.5719763141764206,
|
|
"learning_rate": 1.2061475842818337e-06,
|
|
"loss": 0.1647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17003297805786133,
|
|
"step": 4115,
|
|
"valid_targets_mean": 4195.1,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 6.309341500765697,
|
|
"grad_norm": 0.5256383923265986,
|
|
"learning_rate": 1.1801605159583307e-06,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1623782217502594,
|
|
"step": 4120,
|
|
"valid_targets_mean": 4425.1,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 6.316998468606432,
|
|
"grad_norm": 0.48481180094784476,
|
|
"learning_rate": 1.1544479447105261e-06,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16785955429077148,
|
|
"step": 4125,
|
|
"valid_targets_mean": 5730.3,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 6.324655436447167,
|
|
"grad_norm": 0.4572254396324727,
|
|
"learning_rate": 1.1290102455695595e-06,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16064852476119995,
|
|
"step": 4130,
|
|
"valid_targets_mean": 5513.2,
|
|
"valid_targets_min": 1849
|
|
},
|
|
{
|
|
"epoch": 6.332312404287902,
|
|
"grad_norm": 0.4530850293667914,
|
|
"learning_rate": 1.1038477895573974e-06,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16299015283584595,
|
|
"step": 4135,
|
|
"valid_targets_mean": 5793.6,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 6.339969372128637,
|
|
"grad_norm": 0.48028598319389537,
|
|
"learning_rate": 1.0789609436814552e-06,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14780756831169128,
|
|
"step": 4140,
|
|
"valid_targets_mean": 5281.0,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 6.347626339969372,
|
|
"grad_norm": 0.4496051344582939,
|
|
"learning_rate": 1.0543500709292309e-06,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17510706186294556,
|
|
"step": 4145,
|
|
"valid_targets_mean": 6073.4,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 6.355283307810107,
|
|
"grad_norm": 0.509916750158182,
|
|
"learning_rate": 1.0300155302630045e-06,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18384206295013428,
|
|
"step": 4150,
|
|
"valid_targets_mean": 4543.0,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 6.362940275650843,
|
|
"grad_norm": 0.4914861553380316,
|
|
"learning_rate": 1.005957676614624e-06,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15742814540863037,
|
|
"step": 4155,
|
|
"valid_targets_mean": 5673.4,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 6.370597243491577,
|
|
"grad_norm": 0.44389672499265986,
|
|
"learning_rate": 9.821768608802995e-07,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12984803318977356,
|
|
"step": 4160,
|
|
"valid_targets_mean": 5247.4,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 6.378254211332313,
|
|
"grad_norm": 0.5068179023268576,
|
|
"learning_rate": 9.58673429915511e-07,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18095433712005615,
|
|
"step": 4165,
|
|
"valid_targets_mean": 4936.8,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 6.385911179173047,
|
|
"grad_norm": 0.6110721344279758,
|
|
"learning_rate": 9.354477265299277e-07,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14109715819358826,
|
|
"step": 4170,
|
|
"valid_targets_mean": 4727.2,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 6.393568147013783,
|
|
"grad_norm": 0.4593471805913175,
|
|
"learning_rate": 9.125000894824332e-07,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15586645901203156,
|
|
"step": 4175,
|
|
"valid_targets_mean": 5135.5,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 6.401225114854517,
|
|
"grad_norm": 0.5144834066021605,
|
|
"learning_rate": 8.898308534761591e-07,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15124112367630005,
|
|
"step": 4180,
|
|
"valid_targets_mean": 4934.0,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 6.408882082695253,
|
|
"grad_norm": 0.4240329535706579,
|
|
"learning_rate": 8.674403491536121e-07,
|
|
"loss": 0.162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1325221210718155,
|
|
"step": 4185,
|
|
"valid_targets_mean": 6155.0,
|
|
"valid_targets_min": 2846
|
|
},
|
|
{
|
|
"epoch": 6.416539050535988,
|
|
"grad_norm": 0.4349870239932218,
|
|
"learning_rate": 8.453289030918643e-07,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13124766945838928,
|
|
"step": 4190,
|
|
"valid_targets_mean": 5847.3,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 6.424196018376723,
|
|
"grad_norm": 0.4258108290720063,
|
|
"learning_rate": 8.234968377977704e-07,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13588905334472656,
|
|
"step": 4195,
|
|
"valid_targets_mean": 5795.6,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 6.431852986217458,
|
|
"grad_norm": 0.4823382335960822,
|
|
"learning_rate": 8.019444717032732e-07,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14344608783721924,
|
|
"step": 4200,
|
|
"valid_targets_mean": 5538.8,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 6.439509954058193,
|
|
"grad_norm": 0.5440916742867541,
|
|
"learning_rate": 7.806721191607658e-07,
|
|
"loss": 0.1533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14522123336791992,
|
|
"step": 4205,
|
|
"valid_targets_mean": 4628.5,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 6.447166921898928,
|
|
"grad_norm": 0.47576315988744305,
|
|
"learning_rate": 7.596800904384838e-07,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14390070736408234,
|
|
"step": 4210,
|
|
"valid_targets_mean": 5652.9,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 6.4548238897396635,
|
|
"grad_norm": 0.5218061277679277,
|
|
"learning_rate": 7.38968691716011e-07,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18774065375328064,
|
|
"step": 4215,
|
|
"valid_targets_mean": 4977.1,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 6.462480857580398,
|
|
"grad_norm": 0.4286228931036304,
|
|
"learning_rate": 7.185382250797901e-07,
|
|
"loss": 0.1528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377241015434265,
|
|
"step": 4220,
|
|
"valid_targets_mean": 5731.1,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 6.4701378254211335,
|
|
"grad_norm": 0.47837224891886565,
|
|
"learning_rate": 6.983889885187279e-07,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15443077683448792,
|
|
"step": 4225,
|
|
"valid_targets_mean": 5042.2,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 6.477794793261868,
|
|
"grad_norm": 0.49422631910990883,
|
|
"learning_rate": 6.785212759198345e-07,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15702563524246216,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4542.4,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 6.4854517611026035,
|
|
"grad_norm": 0.477290567718433,
|
|
"learning_rate": 6.58935377063965e-07,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609748899936676,
|
|
"step": 4235,
|
|
"valid_targets_mean": 5713.4,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 6.493108728943339,
|
|
"grad_norm": 0.4756474679960268,
|
|
"learning_rate": 6.396315776215645e-07,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16133789718151093,
|
|
"step": 4240,
|
|
"valid_targets_mean": 5269.2,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 6.5007656967840735,
|
|
"grad_norm": 0.4929632772608458,
|
|
"learning_rate": 6.206101591485092e-07,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14727333188056946,
|
|
"step": 4245,
|
|
"valid_targets_mean": 5250.6,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 6.508422664624809,
|
|
"grad_norm": 0.5014543137245218,
|
|
"learning_rate": 6.018713990820168e-07,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18269553780555725,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4937.0,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 6.5160796324655434,
|
|
"grad_norm": 0.5773327888472211,
|
|
"learning_rate": 5.834155707365696e-07,
|
|
"loss": 0.1672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16463427245616913,
|
|
"step": 4255,
|
|
"valid_targets_mean": 4428.4,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 6.523736600306279,
|
|
"grad_norm": 0.5158369739919062,
|
|
"learning_rate": 5.652429432999596e-07,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20093436539173126,
|
|
"step": 4260,
|
|
"valid_targets_mean": 5806.6,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 6.531393568147013,
|
|
"grad_norm": 0.5089399556927194,
|
|
"learning_rate": 5.47353781829334e-07,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14460662007331848,
|
|
"step": 4265,
|
|
"valid_targets_mean": 4637.4,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 6.539050535987749,
|
|
"grad_norm": 0.5388069418287857,
|
|
"learning_rate": 5.297483472473541e-07,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15606024861335754,
|
|
"step": 4270,
|
|
"valid_targets_mean": 4018.7,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 6.546707503828484,
|
|
"grad_norm": 0.47769896995919425,
|
|
"learning_rate": 5.12426896338376e-07,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12930753827095032,
|
|
"step": 4275,
|
|
"valid_targets_mean": 5469.5,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 6.554364471669219,
|
|
"grad_norm": 0.5133848406051136,
|
|
"learning_rate": 4.953896817446957e-07,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14288434386253357,
|
|
"step": 4280,
|
|
"valid_targets_mean": 4725.6,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 6.562021439509954,
|
|
"grad_norm": 0.5203682808767163,
|
|
"learning_rate": 4.78636951962892e-07,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18543046712875366,
|
|
"step": 4285,
|
|
"valid_targets_mean": 5126.2,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 6.569678407350689,
|
|
"grad_norm": 0.46911153856516075,
|
|
"learning_rate": 4.621689513401739e-07,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14522890746593475,
|
|
"step": 4290,
|
|
"valid_targets_mean": 5616.9,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 6.577335375191424,
|
|
"grad_norm": 0.49903648422517666,
|
|
"learning_rate": 4.4598592007083277e-07,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15592968463897705,
|
|
"step": 4295,
|
|
"valid_targets_mean": 4984.1,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 6.584992343032159,
|
|
"grad_norm": 0.562087312070281,
|
|
"learning_rate": 4.300880941927399e-07,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15358403325080872,
|
|
"step": 4300,
|
|
"valid_targets_mean": 5085.4,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 6.592649310872894,
|
|
"grad_norm": 0.47791617240764883,
|
|
"learning_rate": 4.1447570558388774e-07,
|
|
"loss": 0.1532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1530168354511261,
|
|
"step": 4305,
|
|
"valid_targets_mean": 5281.8,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 6.60030627871363,
|
|
"grad_norm": 0.45343705198477624,
|
|
"learning_rate": 3.991489819590322e-07,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15143750607967377,
|
|
"step": 4310,
|
|
"valid_targets_mean": 5688.8,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 6.607963246554364,
|
|
"grad_norm": 0.6088682794383126,
|
|
"learning_rate": 3.8410814686634214e-07,
|
|
"loss": 0.1557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1674603819847107,
|
|
"step": 4315,
|
|
"valid_targets_mean": 4535.4,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 6.6156202143951,
|
|
"grad_norm": 0.5417614540657483,
|
|
"learning_rate": 3.6935341968417305e-07,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1633644849061966,
|
|
"step": 4320,
|
|
"valid_targets_mean": 4694.1,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 6.623277182235834,
|
|
"grad_norm": 0.5680715181021335,
|
|
"learning_rate": 3.548850156178274e-07,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15728434920310974,
|
|
"step": 4325,
|
|
"valid_targets_mean": 5259.0,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 6.63093415007657,
|
|
"grad_norm": 0.4664775223352316,
|
|
"learning_rate": 3.407031456964571e-07,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1763657182455063,
|
|
"step": 4330,
|
|
"valid_targets_mean": 5762.2,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 6.638591117917304,
|
|
"grad_norm": 0.6365218167161506,
|
|
"learning_rate": 3.2680801676995724e-07,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19776296615600586,
|
|
"step": 4335,
|
|
"valid_targets_mean": 4274.9,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 6.64624808575804,
|
|
"grad_norm": 0.4910720869258167,
|
|
"learning_rate": 3.1319983150595035e-07,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406899392604828,
|
|
"step": 4340,
|
|
"valid_targets_mean": 5006.8,
|
|
"valid_targets_min": 1618
|
|
},
|
|
{
|
|
"epoch": 6.653905053598775,
|
|
"grad_norm": 0.569329802811622,
|
|
"learning_rate": 2.998787883868537e-07,
|
|
"loss": 0.1539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16043078899383545,
|
|
"step": 4345,
|
|
"valid_targets_mean": 3629.7,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 6.66156202143951,
|
|
"grad_norm": 0.5169327909748895,
|
|
"learning_rate": 2.868450817069501e-07,
|
|
"loss": 0.1523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18156372010707855,
|
|
"step": 4350,
|
|
"valid_targets_mean": 5023.1,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 6.669218989280245,
|
|
"grad_norm": 0.5620209579007543,
|
|
"learning_rate": 2.7409890156958607e-07,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16176734864711761,
|
|
"step": 4355,
|
|
"valid_targets_mean": 4832.9,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 6.676875957120981,
|
|
"grad_norm": 0.4949524076974463,
|
|
"learning_rate": 2.616404338843803e-07,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16081801056861877,
|
|
"step": 4360,
|
|
"valid_targets_mean": 5138.5,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 6.684532924961715,
|
|
"grad_norm": 0.4562359420788241,
|
|
"learning_rate": 2.4946986036451294e-07,
|
|
"loss": 0.1536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16743341088294983,
|
|
"step": 4365,
|
|
"valid_targets_mean": 5915.8,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 6.692189892802451,
|
|
"grad_norm": 0.4911008520965554,
|
|
"learning_rate": 2.375873585240851e-07,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14914241433143616,
|
|
"step": 4370,
|
|
"valid_targets_mean": 4897.8,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 6.699846860643185,
|
|
"grad_norm": 0.5140272838780794,
|
|
"learning_rate": 2.2599310167551902e-07,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15968918800354004,
|
|
"step": 4375,
|
|
"valid_targets_mean": 4938.5,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 6.707503828483921,
|
|
"grad_norm": 0.5219624600556243,
|
|
"learning_rate": 2.1468725892704212e-07,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15002906322479248,
|
|
"step": 4380,
|
|
"valid_targets_mean": 5464.5,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 6.715160796324655,
|
|
"grad_norm": 0.5319553307201998,
|
|
"learning_rate": 2.0366999518020015e-07,
|
|
"loss": 0.1516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15449857711791992,
|
|
"step": 4385,
|
|
"valid_targets_mean": 4223.0,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 6.722817764165391,
|
|
"grad_norm": 0.6335557414758894,
|
|
"learning_rate": 1.9294147112748129e-07,
|
|
"loss": 0.1673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18669471144676208,
|
|
"step": 4390,
|
|
"valid_targets_mean": 3495.2,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 6.730474732006126,
|
|
"grad_norm": 0.5264194401944684,
|
|
"learning_rate": 1.8250184324994258e-07,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15428876876831055,
|
|
"step": 4395,
|
|
"valid_targets_mean": 4849.3,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 6.738131699846861,
|
|
"grad_norm": 0.5483060983513385,
|
|
"learning_rate": 1.7235126381494716e-07,
|
|
"loss": 0.1608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15400370955467224,
|
|
"step": 4400,
|
|
"valid_targets_mean": 3996.1,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 6.745788667687596,
|
|
"grad_norm": 0.484798407756023,
|
|
"learning_rate": 1.6248988087393946e-07,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18481765687465668,
|
|
"step": 4405,
|
|
"valid_targets_mean": 5657.8,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 6.7534456355283305,
|
|
"grad_norm": 0.4730938326668684,
|
|
"learning_rate": 1.529178382602803e-07,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15649360418319702,
|
|
"step": 4410,
|
|
"valid_targets_mean": 5236.0,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 6.761102603369066,
|
|
"grad_norm": 0.5092523070882127,
|
|
"learning_rate": 1.4363527558715286e-07,
|
|
"loss": 0.1685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1409115046262741,
|
|
"step": 4415,
|
|
"valid_targets_mean": 5457.8,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 6.7687595712098005,
|
|
"grad_norm": 0.524901721297739,
|
|
"learning_rate": 1.346423282455267e-07,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1688336730003357,
|
|
"step": 4420,
|
|
"valid_targets_mean": 4615.9,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 6.776416539050536,
|
|
"grad_norm": 0.5105117143096871,
|
|
"learning_rate": 1.259391274021815e-07,
|
|
"loss": 0.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1753007471561432,
|
|
"step": 4425,
|
|
"valid_targets_mean": 4862.4,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 6.784073506891271,
|
|
"grad_norm": 0.5143595147260921,
|
|
"learning_rate": 1.1752579999779523e-07,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18723537027835846,
|
|
"step": 4430,
|
|
"valid_targets_mean": 5300.6,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 6.791730474732006,
|
|
"grad_norm": 0.47454601556028797,
|
|
"learning_rate": 1.094024687450923e-07,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13127397000789642,
|
|
"step": 4435,
|
|
"valid_targets_mean": 4709.1,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 6.799387442572741,
|
|
"grad_norm": 0.520781711890744,
|
|
"learning_rate": 1.0156925212705171e-07,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1573190838098526,
|
|
"step": 4440,
|
|
"valid_targets_mean": 4981.8,
|
|
"valid_targets_min": 929
|
|
},
|
|
{
|
|
"epoch": 6.807044410413476,
|
|
"grad_norm": 0.4496519457965003,
|
|
"learning_rate": 9.402626439518393e-08,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14165663719177246,
|
|
"step": 4445,
|
|
"valid_targets_mean": 5795.9,
|
|
"valid_targets_min": 3105
|
|
},
|
|
{
|
|
"epoch": 6.814701378254211,
|
|
"grad_norm": 0.6215392778464087,
|
|
"learning_rate": 8.677361556786113e-08,
|
|
"loss": 0.1545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18268686532974243,
|
|
"step": 4450,
|
|
"valid_targets_mean": 6543.2,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 6.822358346094946,
|
|
"grad_norm": 0.49471966346976204,
|
|
"learning_rate": 7.98114114287052e-08,
|
|
"loss": 0.1512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15616750717163086,
|
|
"step": 4455,
|
|
"valid_targets_mean": 4753.4,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 6.830015313935681,
|
|
"grad_norm": 0.4930676116391729,
|
|
"learning_rate": 7.313975352506442e-08,
|
|
"loss": 0.1605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18801480531692505,
|
|
"step": 4460,
|
|
"valid_targets_mean": 4775.8,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 6.837672281776417,
|
|
"grad_norm": 0.52961697008991,
|
|
"learning_rate": 6.675873916651032e-08,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17023101449012756,
|
|
"step": 4465,
|
|
"valid_targets_mean": 4687.8,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 6.845329249617151,
|
|
"grad_norm": 0.49808821468991554,
|
|
"learning_rate": 6.066846142343208e-08,
|
|
"loss": 0.1562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15167677402496338,
|
|
"step": 4470,
|
|
"valid_targets_mean": 4647.4,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 6.852986217457887,
|
|
"grad_norm": 0.5002402449999928,
|
|
"learning_rate": 5.4869009125677606e-08,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16921664774417877,
|
|
"step": 4475,
|
|
"valid_targets_mean": 5446.4,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 6.860643185298621,
|
|
"grad_norm": 0.49397088265179795,
|
|
"learning_rate": 4.936046686125018e-08,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18386386334896088,
|
|
"step": 4480,
|
|
"valid_targets_mean": 5190.6,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 6.868300153139357,
|
|
"grad_norm": 0.4374868435323064,
|
|
"learning_rate": 4.414291497508494e-08,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12061470746994019,
|
|
"step": 4485,
|
|
"valid_targets_mean": 5660.0,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 6.875957120980092,
|
|
"grad_norm": 0.46344991151869086,
|
|
"learning_rate": 3.921642956786764e-08,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15344613790512085,
|
|
"step": 4490,
|
|
"valid_targets_mean": 5533.7,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 6.883614088820827,
|
|
"grad_norm": 0.5499673381962632,
|
|
"learning_rate": 3.4581082494933306e-08,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1682521402835846,
|
|
"step": 4495,
|
|
"valid_targets_mean": 5424.2,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 6.891271056661562,
|
|
"grad_norm": 0.5661611699091585,
|
|
"learning_rate": 3.023694136521149e-08,
|
|
"loss": 0.1537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19145449995994568,
|
|
"step": 4500,
|
|
"valid_targets_mean": 5180.9,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 6.898928024502297,
|
|
"grad_norm": 0.5138739009042879,
|
|
"learning_rate": 2.6184069540244883e-08,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1396559178829193,
|
|
"step": 4505,
|
|
"valid_targets_mean": 4953.2,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 6.906584992343032,
|
|
"grad_norm": 0.4734940775508067,
|
|
"learning_rate": 2.2422526133258905e-08,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1580895334482193,
|
|
"step": 4510,
|
|
"valid_targets_mean": 5021.9,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 6.914241960183768,
|
|
"grad_norm": 0.4706534435530353,
|
|
"learning_rate": 1.8952366008309076e-08,
|
|
"loss": 0.1721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342153698205948,
|
|
"step": 4515,
|
|
"valid_targets_mean": 5286.7,
|
|
"valid_targets_min": 2509
|
|
},
|
|
{
|
|
"epoch": 6.921898928024502,
|
|
"grad_norm": 0.48739925177110227,
|
|
"learning_rate": 1.5773639779470552e-08,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1816117912530899,
|
|
"step": 4520,
|
|
"valid_targets_mean": 5406.6,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 6.929555895865238,
|
|
"grad_norm": 0.5208874030422277,
|
|
"learning_rate": 1.288639381010759e-08,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15851663053035736,
|
|
"step": 4525,
|
|
"valid_targets_mean": 4442.0,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 6.937212863705972,
|
|
"grad_norm": 0.48457417601240316,
|
|
"learning_rate": 1.0290670212191878e-08,
|
|
"loss": 0.152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.165914848446846,
|
|
"step": 4530,
|
|
"valid_targets_mean": 4708.8,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 6.944869831546708,
|
|
"grad_norm": 0.4583395498556372,
|
|
"learning_rate": 7.986506845696351e-09,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13576406240463257,
|
|
"step": 4535,
|
|
"valid_targets_mean": 4882.4,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 6.952526799387442,
|
|
"grad_norm": 0.5546301713936433,
|
|
"learning_rate": 5.973937318028977e-09,
|
|
"loss": 0.1567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17114219069480896,
|
|
"step": 4540,
|
|
"valid_targets_mean": 4222.1,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 6.960183767228178,
|
|
"grad_norm": 0.4983059823209655,
|
|
"learning_rate": 4.2529909835553604e-09,
|
|
"loss": 0.1405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13904789090156555,
|
|
"step": 4545,
|
|
"valid_targets_mean": 4797.2,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 6.967840735068913,
|
|
"grad_norm": 0.45715360287978724,
|
|
"learning_rate": 2.8236929431701975e-09,
|
|
"loss": 0.1495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15358659625053406,
|
|
"step": 4550,
|
|
"valid_targets_mean": 6263.0,
|
|
"valid_targets_min": 3534
|
|
},
|
|
{
|
|
"epoch": 6.975497702909648,
|
|
"grad_norm": 0.48704332473921397,
|
|
"learning_rate": 1.6860640439197995e-09,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14753298461437225,
|
|
"step": 4555,
|
|
"valid_targets_mean": 4750.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 6.983154670750383,
|
|
"grad_norm": 0.6282723983499863,
|
|
"learning_rate": 8.401208787112147e-10,
|
|
"loss": 0.1647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16771043837070465,
|
|
"step": 4560,
|
|
"valid_targets_mean": 4766.7,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 6.990811638591118,
|
|
"grad_norm": 0.49667792484401724,
|
|
"learning_rate": 2.858757860590977e-10,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1469840109348297,
|
|
"step": 4565,
|
|
"valid_targets_mean": 4585.7,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 6.998468606431853,
|
|
"grad_norm": 0.5186330018791039,
|
|
"learning_rate": 2.3336849919175508e-11,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18415594100952148,
|
|
"step": 4570,
|
|
"valid_targets_mean": 5133.5,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15374836325645447,
|
|
"step": 4571,
|
|
"total_flos": 2281086641373184.0,
|
|
"train_loss": 0.20197117474362877,
|
|
"train_runtime": 35007.247,
|
|
"train_samples_per_second": 2.086,
|
|
"train_steps_per_second": 0.131,
|
|
"valid_targets_mean": 5963.9,
|
|
"valid_targets_min": 640
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4571,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2281086641373184.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|