Files
Qwen3-8B_exp_tas_temp_0.5_t…/trainer_state.json
ModelHub XC f27eb42337 初始化项目,由ModelHub XC社区提供模型
Model: laion/Qwen3-8B_exp_tas_temp_0.5_traces_save-strategy_steps
Source: Original Platform
2026-06-23 07:46:18 +08:00

5592 lines
155 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.0,
"eval_steps": 500,
"global_step": 2520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015873015873015872,
"grad_norm": 1.8274213812789266,
"learning_rate": 3.0769230769230774e-05,
"loss": 0.7577,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6461434364318848,
"step": 5,
"valid_targets_mean": 3240.4,
"valid_targets_min": 567
},
{
"epoch": 0.031746031746031744,
"grad_norm": 0.8909279252952478,
"learning_rate": 6.923076923076924e-05,
"loss": 0.5599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5154889822006226,
"step": 10,
"valid_targets_mean": 2951.5,
"valid_targets_min": 728
},
{
"epoch": 0.047619047619047616,
"grad_norm": 0.6539578705231934,
"learning_rate": 9.99999607417416e-05,
"loss": 0.4714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.44698208570480347,
"step": 15,
"valid_targets_mean": 3411.7,
"valid_targets_min": 858
},
{
"epoch": 0.06349206349206349,
"grad_norm": 0.5869088454096476,
"learning_rate": 9.999858670917045e-05,
"loss": 0.4533,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4649924039840698,
"step": 20,
"valid_targets_mean": 3109.9,
"valid_targets_min": 615
},
{
"epoch": 0.07936507936507936,
"grad_norm": 0.509457679261325,
"learning_rate": 9.999524982532699e-05,
"loss": 0.4412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.41344499588012695,
"step": 25,
"valid_targets_mean": 3829.7,
"valid_targets_min": 1140
},
{
"epoch": 0.09523809523809523,
"grad_norm": 0.7062119296057406,
"learning_rate": 9.998995022121104e-05,
"loss": 0.4127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4501193165779114,
"step": 30,
"valid_targets_mean": 2775.0,
"valid_targets_min": 1050
},
{
"epoch": 0.1111111111111111,
"grad_norm": 0.499780548134097,
"learning_rate": 9.998268810487518e-05,
"loss": 0.4262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.42958372831344604,
"step": 35,
"valid_targets_mean": 3573.8,
"valid_targets_min": 731
},
{
"epoch": 0.12698412698412698,
"grad_norm": 0.5092649924433299,
"learning_rate": 9.997346376141656e-05,
"loss": 0.4256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.46939408779144287,
"step": 40,
"valid_targets_mean": 3167.1,
"valid_targets_min": 692
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.5017667346997713,
"learning_rate": 9.99622775529657e-05,
"loss": 0.4034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3935692012310028,
"step": 45,
"valid_targets_mean": 3086.5,
"valid_targets_min": 656
},
{
"epoch": 0.15873015873015872,
"grad_norm": 0.4948092196535038,
"learning_rate": 9.994912991867228e-05,
"loss": 0.3939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3957802653312683,
"step": 50,
"valid_targets_mean": 3103.2,
"valid_targets_min": 586
},
{
"epoch": 0.1746031746031746,
"grad_norm": 0.4506415984779397,
"learning_rate": 9.99340213746879e-05,
"loss": 0.3878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3558445870876312,
"step": 55,
"valid_targets_mean": 3752.7,
"valid_targets_min": 748
},
{
"epoch": 0.19047619047619047,
"grad_norm": 0.47637495247663253,
"learning_rate": 9.991695251414583e-05,
"loss": 0.3804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3717506229877472,
"step": 60,
"valid_targets_mean": 2804.0,
"valid_targets_min": 838
},
{
"epoch": 0.20634920634920634,
"grad_norm": 0.4697442701659542,
"learning_rate": 9.989792400713771e-05,
"loss": 0.3763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3844413161277771,
"step": 65,
"valid_targets_mean": 3328.9,
"valid_targets_min": 947
},
{
"epoch": 0.2222222222222222,
"grad_norm": 0.4029985555816782,
"learning_rate": 9.987693660068722e-05,
"loss": 0.3624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.361744225025177,
"step": 70,
"valid_targets_mean": 3788.7,
"valid_targets_min": 1518
},
{
"epoch": 0.23809523809523808,
"grad_norm": 0.6171443115375874,
"learning_rate": 9.985399111872081e-05,
"loss": 0.3893,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40640202164649963,
"step": 75,
"valid_targets_mean": 3092.8,
"valid_targets_min": 1045
},
{
"epoch": 0.25396825396825395,
"grad_norm": 0.4749873069576808,
"learning_rate": 9.982908846203529e-05,
"loss": 0.3697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.39057451486587524,
"step": 80,
"valid_targets_mean": 3140.4,
"valid_targets_min": 723
},
{
"epoch": 0.2698412698412698,
"grad_norm": 0.48658927757571463,
"learning_rate": 9.980222960826254e-05,
"loss": 0.3755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3794889450073242,
"step": 85,
"valid_targets_mean": 3091.6,
"valid_targets_min": 849
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.5038556894604157,
"learning_rate": 9.977341561183109e-05,
"loss": 0.3701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3605089783668518,
"step": 90,
"valid_targets_mean": 2912.1,
"valid_targets_min": 741
},
{
"epoch": 0.30158730158730157,
"grad_norm": 0.4102412169121251,
"learning_rate": 9.974264760392466e-05,
"loss": 0.3525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3408468961715698,
"step": 95,
"valid_targets_mean": 3674.1,
"valid_targets_min": 694
},
{
"epoch": 0.31746031746031744,
"grad_norm": 0.5215429312356035,
"learning_rate": 9.97099267924379e-05,
"loss": 0.3809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.366327166557312,
"step": 100,
"valid_targets_mean": 3165.3,
"valid_targets_min": 731
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.506147006488813,
"learning_rate": 9.967525446192882e-05,
"loss": 0.3725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4026761054992676,
"step": 105,
"valid_targets_mean": 3033.7,
"valid_targets_min": 786
},
{
"epoch": 0.3492063492063492,
"grad_norm": 0.4449016182522291,
"learning_rate": 9.963863197356849e-05,
"loss": 0.3646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3816118538379669,
"step": 110,
"valid_targets_mean": 2767.7,
"valid_targets_min": 907
},
{
"epoch": 0.36507936507936506,
"grad_norm": 0.4493995369276615,
"learning_rate": 9.960006076508747e-05,
"loss": 0.3507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3474277853965759,
"step": 115,
"valid_targets_mean": 4193.3,
"valid_targets_min": 1138
},
{
"epoch": 0.38095238095238093,
"grad_norm": 0.3995938765741062,
"learning_rate": 9.95595423507195e-05,
"loss": 0.3607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34774190187454224,
"step": 120,
"valid_targets_mean": 3404.6,
"valid_targets_min": 563
},
{
"epoch": 0.3968253968253968,
"grad_norm": 0.4078617144812389,
"learning_rate": 9.951707832114193e-05,
"loss": 0.3543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3508348762989044,
"step": 125,
"valid_targets_mean": 3827.8,
"valid_targets_min": 971
},
{
"epoch": 0.4126984126984127,
"grad_norm": 0.40638750589791245,
"learning_rate": 9.947267034341341e-05,
"loss": 0.3533,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35496804118156433,
"step": 130,
"valid_targets_mean": 3386.2,
"valid_targets_min": 641
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.4234767970543296,
"learning_rate": 9.942632016090832e-05,
"loss": 0.3476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33436018228530884,
"step": 135,
"valid_targets_mean": 3398.4,
"valid_targets_min": 759
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.41265753362471513,
"learning_rate": 9.937802959324838e-05,
"loss": 0.3583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3562977910041809,
"step": 140,
"valid_targets_mean": 3618.7,
"valid_targets_min": 628
},
{
"epoch": 0.4603174603174603,
"grad_norm": 0.4757133092662119,
"learning_rate": 9.932780053623121e-05,
"loss": 0.3466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37288549542427063,
"step": 145,
"valid_targets_mean": 2592.0,
"valid_targets_min": 781
},
{
"epoch": 0.47619047619047616,
"grad_norm": 0.4239429827849855,
"learning_rate": 9.927563496175593e-05,
"loss": 0.3467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35352587699890137,
"step": 150,
"valid_targets_mean": 3382.8,
"valid_targets_min": 878
},
{
"epoch": 0.49206349206349204,
"grad_norm": 0.383979032896486,
"learning_rate": 9.922153491774572e-05,
"loss": 0.3495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3456829786300659,
"step": 155,
"valid_targets_mean": 3461.6,
"valid_targets_min": 908
},
{
"epoch": 0.5079365079365079,
"grad_norm": 0.424759371721176,
"learning_rate": 9.91655025280674e-05,
"loss": 0.3656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3748525381088257,
"step": 160,
"valid_targets_mean": 3654.6,
"valid_targets_min": 849
},
{
"epoch": 0.5238095238095238,
"grad_norm": 0.43022997649230643,
"learning_rate": 9.910753999244811e-05,
"loss": 0.3365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3463315963745117,
"step": 165,
"valid_targets_mean": 3035.5,
"valid_targets_min": 881
},
{
"epoch": 0.5396825396825397,
"grad_norm": 0.42157899649504554,
"learning_rate": 9.904764958638889e-05,
"loss": 0.3395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.38232117891311646,
"step": 170,
"valid_targets_mean": 2938.3,
"valid_targets_min": 777
},
{
"epoch": 0.5555555555555556,
"grad_norm": 0.4128966508525992,
"learning_rate": 9.898583366107538e-05,
"loss": 0.343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34359288215637207,
"step": 175,
"valid_targets_mean": 3082.7,
"valid_targets_min": 762
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.3945850409315049,
"learning_rate": 9.892209464328556e-05,
"loss": 0.3625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3760848045349121,
"step": 180,
"valid_targets_mean": 3291.0,
"valid_targets_min": 955
},
{
"epoch": 0.5873015873015873,
"grad_norm": 0.46514818271074837,
"learning_rate": 9.885643503529439e-05,
"loss": 0.3498,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3739466667175293,
"step": 185,
"valid_targets_mean": 2763.4,
"valid_targets_min": 677
},
{
"epoch": 0.6031746031746031,
"grad_norm": 0.5471372921378007,
"learning_rate": 9.878885741477563e-05,
"loss": 0.346,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.349778950214386,
"step": 190,
"valid_targets_mean": 3169.9,
"valid_targets_min": 852
},
{
"epoch": 0.6190476190476191,
"grad_norm": 0.37640397987573115,
"learning_rate": 9.871936443470063e-05,
"loss": 0.3365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3495105504989624,
"step": 195,
"valid_targets_mean": 3473.1,
"valid_targets_min": 802
},
{
"epoch": 0.6349206349206349,
"grad_norm": 0.34529912636962173,
"learning_rate": 9.864795882323421e-05,
"loss": 0.3307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32995861768722534,
"step": 200,
"valid_targets_mean": 3907.2,
"valid_targets_min": 718
},
{
"epoch": 0.6507936507936508,
"grad_norm": 0.33903980298913694,
"learning_rate": 9.857464338362748e-05,
"loss": 0.3373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3237103521823883,
"step": 205,
"valid_targets_mean": 3551.8,
"valid_targets_min": 1138
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.3816336636716071,
"learning_rate": 9.849942099410792e-05,
"loss": 0.3379,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34175223112106323,
"step": 210,
"valid_targets_mean": 3279.8,
"valid_targets_min": 1022
},
{
"epoch": 0.6825396825396826,
"grad_norm": 0.3691401701105125,
"learning_rate": 9.842229460776622e-05,
"loss": 0.3428,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3277595043182373,
"step": 215,
"valid_targets_mean": 3653.1,
"valid_targets_min": 1357
},
{
"epoch": 0.6984126984126984,
"grad_norm": 0.3920809302509506,
"learning_rate": 9.834326725244049e-05,
"loss": 0.3406,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3460606038570404,
"step": 220,
"valid_targets_mean": 3346.0,
"valid_targets_min": 293
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.386844574098974,
"learning_rate": 9.826234203059731e-05,
"loss": 0.3462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34869250655174255,
"step": 225,
"valid_targets_mean": 3427.1,
"valid_targets_min": 702
},
{
"epoch": 0.7301587301587301,
"grad_norm": 0.41899529934934654,
"learning_rate": 9.817952211921e-05,
"loss": 0.3449,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37060266733169556,
"step": 230,
"valid_targets_mean": 2814.3,
"valid_targets_min": 791
},
{
"epoch": 0.746031746031746,
"grad_norm": 0.34433423051134976,
"learning_rate": 9.809481076963383e-05,
"loss": 0.3313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34181442856788635,
"step": 235,
"valid_targets_mean": 4166.8,
"valid_targets_min": 971
},
{
"epoch": 0.7619047619047619,
"grad_norm": 0.4031506293847615,
"learning_rate": 9.800821130747837e-05,
"loss": 0.3196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32658693194389343,
"step": 240,
"valid_targets_mean": 2723.3,
"valid_targets_min": 270
},
{
"epoch": 0.7777777777777778,
"grad_norm": 0.34126070155625965,
"learning_rate": 9.791972713247704e-05,
"loss": 0.3231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3253839910030365,
"step": 245,
"valid_targets_mean": 3959.7,
"valid_targets_min": 1158
},
{
"epoch": 0.7936507936507936,
"grad_norm": 0.4139092354029092,
"learning_rate": 9.782936171835353e-05,
"loss": 0.3393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35836488008499146,
"step": 250,
"valid_targets_mean": 3191.8,
"valid_targets_min": 755
},
{
"epoch": 0.8095238095238095,
"grad_norm": 0.3545532506414492,
"learning_rate": 9.773711861268549e-05,
"loss": 0.3352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3009134531021118,
"step": 255,
"valid_targets_mean": 3466.7,
"valid_targets_min": 997
},
{
"epoch": 0.8253968253968254,
"grad_norm": 0.36975136926244245,
"learning_rate": 9.764300143676518e-05,
"loss": 0.337,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3257961869239807,
"step": 260,
"valid_targets_mean": 3094.9,
"valid_targets_min": 782
},
{
"epoch": 0.8412698412698413,
"grad_norm": 0.37844454589688126,
"learning_rate": 9.754701388545745e-05,
"loss": 0.3403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3239666223526001,
"step": 265,
"valid_targets_mean": 3849.2,
"valid_targets_min": 741
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.428420791431183,
"learning_rate": 9.744915972705453e-05,
"loss": 0.3308,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.335654616355896,
"step": 270,
"valid_targets_mean": 2919.1,
"valid_targets_min": 700
},
{
"epoch": 0.873015873015873,
"grad_norm": 0.34170393330211485,
"learning_rate": 9.734944280312824e-05,
"loss": 0.3361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3174281120300293,
"step": 275,
"valid_targets_mean": 3749.4,
"valid_targets_min": 808
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.38182638697449783,
"learning_rate": 9.7247867028379e-05,
"loss": 0.3278,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35490620136260986,
"step": 280,
"valid_targets_mean": 3069.8,
"valid_targets_min": 781
},
{
"epoch": 0.9047619047619048,
"grad_norm": 0.37442096963482363,
"learning_rate": 9.714443639048232e-05,
"loss": 0.34,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32899394631385803,
"step": 285,
"valid_targets_mean": 3468.4,
"valid_targets_min": 693
},
{
"epoch": 0.9206349206349206,
"grad_norm": 0.3582639519502781,
"learning_rate": 9.703915494993215e-05,
"loss": 0.321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33591094613075256,
"step": 290,
"valid_targets_mean": 3191.9,
"valid_targets_min": 567
},
{
"epoch": 0.9365079365079365,
"grad_norm": 0.34417265042249817,
"learning_rate": 9.693202683988151e-05,
"loss": 0.3087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3005759119987488,
"step": 295,
"valid_targets_mean": 3653.5,
"valid_targets_min": 648
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.369249163786665,
"learning_rate": 9.682305626598023e-05,
"loss": 0.3366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34282851219177246,
"step": 300,
"valid_targets_mean": 3090.8,
"valid_targets_min": 920
},
{
"epoch": 0.9682539682539683,
"grad_norm": 0.31346439307676455,
"learning_rate": 9.671224750620981e-05,
"loss": 0.3188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29791080951690674,
"step": 305,
"valid_targets_mean": 3806.2,
"valid_targets_min": 851
},
{
"epoch": 0.9841269841269841,
"grad_norm": 0.3823674576068241,
"learning_rate": 9.659960491071554e-05,
"loss": 0.3349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33889368176460266,
"step": 310,
"valid_targets_mean": 3091.2,
"valid_targets_min": 948
},
{
"epoch": 1.0,
"grad_norm": 0.346262826029887,
"learning_rate": 9.64851329016356e-05,
"loss": 0.3238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3250298500061035,
"step": 315,
"valid_targets_mean": 3626.7,
"valid_targets_min": 879
},
{
"epoch": 1.0158730158730158,
"grad_norm": 0.43787454332305814,
"learning_rate": 9.636883597292762e-05,
"loss": 0.2469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23794296383857727,
"step": 320,
"valid_targets_mean": 3565.3,
"valid_targets_min": 485
},
{
"epoch": 1.0317460317460316,
"grad_norm": 0.3338170186100263,
"learning_rate": 9.625071869019215e-05,
"loss": 0.249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2461809515953064,
"step": 325,
"valid_targets_mean": 3799.6,
"valid_targets_min": 879
},
{
"epoch": 1.0476190476190477,
"grad_norm": 0.35543378491397704,
"learning_rate": 9.613078569049344e-05,
"loss": 0.2497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2544521689414978,
"step": 330,
"valid_targets_mean": 3516.8,
"valid_targets_min": 853
},
{
"epoch": 1.0634920634920635,
"grad_norm": 0.37748650521262617,
"learning_rate": 9.600904168217734e-05,
"loss": 0.2431,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23706725239753723,
"step": 335,
"valid_targets_mean": 3417.8,
"valid_targets_min": 703
},
{
"epoch": 1.0793650793650793,
"grad_norm": 0.3435381967499918,
"learning_rate": 9.588549144468664e-05,
"loss": 0.2519,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24922573566436768,
"step": 340,
"valid_targets_mean": 3267.8,
"valid_targets_min": 886
},
{
"epoch": 1.0952380952380953,
"grad_norm": 0.34815667356425745,
"learning_rate": 9.576013982837324e-05,
"loss": 0.2448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2301144003868103,
"step": 345,
"valid_targets_mean": 3309.3,
"valid_targets_min": 823
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.3412007000641619,
"learning_rate": 9.563299175430782e-05,
"loss": 0.2413,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23151162266731262,
"step": 350,
"valid_targets_mean": 3243.3,
"valid_targets_min": 356
},
{
"epoch": 1.126984126984127,
"grad_norm": 0.35215519695863917,
"learning_rate": 9.550405221408664e-05,
"loss": 0.2523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2685403823852539,
"step": 355,
"valid_targets_mean": 3297.7,
"valid_targets_min": 700
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.31526351914206535,
"learning_rate": 9.537332626963561e-05,
"loss": 0.2449,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22305209934711456,
"step": 360,
"valid_targets_mean": 4274.9,
"valid_targets_min": 742
},
{
"epoch": 1.1587301587301586,
"grad_norm": 0.3450806364783924,
"learning_rate": 9.524081905301152e-05,
"loss": 0.2521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24462619423866272,
"step": 365,
"valid_targets_mean": 3424.6,
"valid_targets_min": 663
},
{
"epoch": 1.1746031746031746,
"grad_norm": 0.33405132130300835,
"learning_rate": 9.510653576620056e-05,
"loss": 0.2443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23065048456192017,
"step": 370,
"valid_targets_mean": 3583.2,
"valid_targets_min": 1016
},
{
"epoch": 1.1904761904761905,
"grad_norm": 0.3992743258279796,
"learning_rate": 9.497048168091418e-05,
"loss": 0.2541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2545672059059143,
"step": 375,
"valid_targets_mean": 2822.1,
"valid_targets_min": 567
},
{
"epoch": 1.2063492063492063,
"grad_norm": 0.3786390758502515,
"learning_rate": 9.483266213838202e-05,
"loss": 0.2542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26987892389297485,
"step": 380,
"valid_targets_mean": 3785.4,
"valid_targets_min": 1214
},
{
"epoch": 1.2222222222222223,
"grad_norm": 0.32546227871241973,
"learning_rate": 9.469308254914231e-05,
"loss": 0.2446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23422034084796906,
"step": 385,
"valid_targets_mean": 3780.2,
"valid_targets_min": 1152
},
{
"epoch": 1.2380952380952381,
"grad_norm": 0.3065912912687044,
"learning_rate": 9.455174839282941e-05,
"loss": 0.2455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22501501441001892,
"step": 390,
"valid_targets_mean": 3669.4,
"valid_targets_min": 707
},
{
"epoch": 1.253968253968254,
"grad_norm": 0.37271041897458185,
"learning_rate": 9.440866521795874e-05,
"loss": 0.247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2661711275577545,
"step": 395,
"valid_targets_mean": 3099.3,
"valid_targets_min": 720
},
{
"epoch": 1.2698412698412698,
"grad_norm": 0.369136406791236,
"learning_rate": 9.426383864170891e-05,
"loss": 0.2561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2433825135231018,
"step": 400,
"valid_targets_mean": 2825.9,
"valid_targets_min": 848
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.37357502650745017,
"learning_rate": 9.411727434970121e-05,
"loss": 0.2566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2653917372226715,
"step": 405,
"valid_targets_mean": 3194.1,
"valid_targets_min": 486
},
{
"epoch": 1.3015873015873016,
"grad_norm": 0.3294343894160417,
"learning_rate": 9.396897809577643e-05,
"loss": 0.257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23809459805488586,
"step": 410,
"valid_targets_mean": 3412.5,
"valid_targets_min": 878
},
{
"epoch": 1.3174603174603174,
"grad_norm": 0.40670887974632824,
"learning_rate": 9.381895570176893e-05,
"loss": 0.2601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2684550881385803,
"step": 415,
"valid_targets_mean": 2609.8,
"valid_targets_min": 692
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.3848066802547911,
"learning_rate": 9.366721305727813e-05,
"loss": 0.2502,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.270733505487442,
"step": 420,
"valid_targets_mean": 2846.2,
"valid_targets_min": 794
},
{
"epoch": 1.3492063492063493,
"grad_norm": 0.31019131919407833,
"learning_rate": 9.351375611943724e-05,
"loss": 0.2382,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21716828644275665,
"step": 425,
"valid_targets_mean": 3436.6,
"valid_targets_min": 1281
},
{
"epoch": 1.3650793650793651,
"grad_norm": 0.33361242153190557,
"learning_rate": 9.335859091267952e-05,
"loss": 0.2423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25320854783058167,
"step": 430,
"valid_targets_mean": 3695.6,
"valid_targets_min": 972
},
{
"epoch": 1.380952380952381,
"grad_norm": 0.36539490370574296,
"learning_rate": 9.320172352850156e-05,
"loss": 0.2546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25512421131134033,
"step": 435,
"valid_targets_mean": 2833.2,
"valid_targets_min": 655
},
{
"epoch": 1.3968253968253967,
"grad_norm": 0.3457757597947298,
"learning_rate": 9.304316012522437e-05,
"loss": 0.2486,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24607813358306885,
"step": 440,
"valid_targets_mean": 3310.0,
"valid_targets_min": 891
},
{
"epoch": 1.4126984126984126,
"grad_norm": 0.348654246853821,
"learning_rate": 9.288290692775143e-05,
"loss": 0.2501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25384521484375,
"step": 445,
"valid_targets_mean": 3137.3,
"valid_targets_min": 974
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.3454233001057256,
"learning_rate": 9.272097022732443e-05,
"loss": 0.2518,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26066356897354126,
"step": 450,
"valid_targets_mean": 3637.1,
"valid_targets_min": 677
},
{
"epoch": 1.4444444444444444,
"grad_norm": 0.33143501884931204,
"learning_rate": 9.255735638127623e-05,
"loss": 0.2505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25016534328460693,
"step": 455,
"valid_targets_mean": 3601.1,
"valid_targets_min": 854
},
{
"epoch": 1.4603174603174602,
"grad_norm": 0.35241836784306196,
"learning_rate": 9.239207181278131e-05,
"loss": 0.2615,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25485676527023315,
"step": 460,
"valid_targets_mean": 3419.8,
"valid_targets_min": 1077
},
{
"epoch": 1.4761904761904763,
"grad_norm": 0.3604417859438151,
"learning_rate": 9.222512301060358e-05,
"loss": 0.2589,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2657771110534668,
"step": 465,
"valid_targets_mean": 3546.2,
"valid_targets_min": 316
},
{
"epoch": 1.492063492063492,
"grad_norm": 0.37365401606945614,
"learning_rate": 9.205651652884169e-05,
"loss": 0.2419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26828211545944214,
"step": 470,
"valid_targets_mean": 3005.4,
"valid_targets_min": 859
},
{
"epoch": 1.507936507936508,
"grad_norm": 0.34227061343365994,
"learning_rate": 9.188625898667165e-05,
"loss": 0.2503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2581418752670288,
"step": 475,
"valid_targets_mean": 3869.3,
"valid_targets_min": 889
},
{
"epoch": 1.5238095238095237,
"grad_norm": 0.39954769934278406,
"learning_rate": 9.171435706808709e-05,
"loss": 0.2618,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27356404066085815,
"step": 480,
"valid_targets_mean": 2616.6,
"valid_targets_min": 567
},
{
"epoch": 1.5396825396825395,
"grad_norm": 0.3779648783322647,
"learning_rate": 9.154081752163675e-05,
"loss": 0.2584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2693216800689697,
"step": 485,
"valid_targets_mean": 2724.9,
"valid_targets_min": 752
},
{
"epoch": 1.5555555555555556,
"grad_norm": 0.3409265230520423,
"learning_rate": 9.136564716015956e-05,
"loss": 0.2599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2872137427330017,
"step": 490,
"valid_targets_mean": 3691.8,
"valid_targets_min": 881
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.3335281180583056,
"learning_rate": 9.118885286051726e-05,
"loss": 0.2582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2598532438278198,
"step": 495,
"valid_targets_mean": 3319.2,
"valid_targets_min": 747
},
{
"epoch": 1.5873015873015874,
"grad_norm": 0.3302567574084486,
"learning_rate": 9.101044156332437e-05,
"loss": 0.2444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2369944453239441,
"step": 500,
"valid_targets_mean": 3715.5,
"valid_targets_min": 1155
},
{
"epoch": 1.6031746031746033,
"grad_norm": 0.37703989593478526,
"learning_rate": 9.083042027267567e-05,
"loss": 0.2615,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.286998450756073,
"step": 505,
"valid_targets_mean": 2952.9,
"valid_targets_min": 574
},
{
"epoch": 1.619047619047619,
"grad_norm": 0.35401957704413434,
"learning_rate": 9.064879605587132e-05,
"loss": 0.2537,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26032260060310364,
"step": 510,
"valid_targets_mean": 3504.2,
"valid_targets_min": 773
},
{
"epoch": 1.6349206349206349,
"grad_norm": 0.3641434739283903,
"learning_rate": 9.046557604313937e-05,
"loss": 0.2506,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25402820110321045,
"step": 515,
"valid_targets_mean": 3138.6,
"valid_targets_min": 726
},
{
"epoch": 1.6507936507936507,
"grad_norm": 0.3059033037354985,
"learning_rate": 9.028076742735583e-05,
"loss": 0.2503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23979029059410095,
"step": 520,
"valid_targets_mean": 3641.1,
"valid_targets_min": 900
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.3497297327249346,
"learning_rate": 9.009437746376231e-05,
"loss": 0.2483,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25244393944740295,
"step": 525,
"valid_targets_mean": 3473.1,
"valid_targets_min": 319
},
{
"epoch": 1.6825396825396826,
"grad_norm": 0.3378221717798311,
"learning_rate": 8.990641346968117e-05,
"loss": 0.267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27777156233787537,
"step": 530,
"valid_targets_mean": 3499.6,
"valid_targets_min": 734
},
{
"epoch": 1.6984126984126984,
"grad_norm": 0.33873773306272287,
"learning_rate": 8.97168828242283e-05,
"loss": 0.2485,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2586287558078766,
"step": 535,
"valid_targets_mean": 3036.5,
"valid_targets_min": 725
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.34047790836727093,
"learning_rate": 8.952579296802339e-05,
"loss": 0.247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2612344026565552,
"step": 540,
"valid_targets_mean": 3229.4,
"valid_targets_min": 825
},
{
"epoch": 1.7301587301587302,
"grad_norm": 0.30335154929897096,
"learning_rate": 8.933315140289782e-05,
"loss": 0.2426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22845228016376495,
"step": 545,
"valid_targets_mean": 3568.5,
"valid_targets_min": 702
},
{
"epoch": 1.746031746031746,
"grad_norm": 0.35688828900002534,
"learning_rate": 8.91389656916002e-05,
"loss": 0.2565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2728371322154999,
"step": 550,
"valid_targets_mean": 3387.7,
"valid_targets_min": 1036
},
{
"epoch": 1.7619047619047619,
"grad_norm": 0.35026171013621277,
"learning_rate": 8.894324345749939e-05,
"loss": 0.257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2628636956214905,
"step": 555,
"valid_targets_mean": 2987.4,
"valid_targets_min": 1389
},
{
"epoch": 1.7777777777777777,
"grad_norm": 0.364303792145663,
"learning_rate": 8.874599238428533e-05,
"loss": 0.252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25474119186401367,
"step": 560,
"valid_targets_mean": 2906.6,
"valid_targets_min": 332
},
{
"epoch": 1.7936507936507935,
"grad_norm": 0.3297047549410631,
"learning_rate": 8.85472202156673e-05,
"loss": 0.2416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2573590874671936,
"step": 565,
"valid_targets_mean": 3229.9,
"valid_targets_min": 719
},
{
"epoch": 1.8095238095238095,
"grad_norm": 0.35086501481207644,
"learning_rate": 8.834693475506992e-05,
"loss": 0.2465,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25695112347602844,
"step": 570,
"valid_targets_mean": 3061.9,
"valid_targets_min": 586
},
{
"epoch": 1.8253968253968254,
"grad_norm": 0.3317762868716903,
"learning_rate": 8.814514386532691e-05,
"loss": 0.247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24048689007759094,
"step": 575,
"valid_targets_mean": 2892.9,
"valid_targets_min": 662
},
{
"epoch": 1.8412698412698414,
"grad_norm": 0.3474363106947119,
"learning_rate": 8.794185546837224e-05,
"loss": 0.2583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26812222599983215,
"step": 580,
"valid_targets_mean": 2800.1,
"valid_targets_min": 741
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.30940634479021384,
"learning_rate": 8.773707754492928e-05,
"loss": 0.2613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25138765573501587,
"step": 585,
"valid_targets_mean": 3781.5,
"valid_targets_min": 759
},
{
"epoch": 1.873015873015873,
"grad_norm": 0.34038821122242374,
"learning_rate": 8.753081813419743e-05,
"loss": 0.2572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2681252360343933,
"step": 590,
"valid_targets_mean": 3191.2,
"valid_targets_min": 881
},
{
"epoch": 1.8888888888888888,
"grad_norm": 0.3166643196960125,
"learning_rate": 8.73230853335365e-05,
"loss": 0.2548,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2620237469673157,
"step": 595,
"valid_targets_mean": 3481.8,
"valid_targets_min": 493
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.30108357485678805,
"learning_rate": 8.711388729814882e-05,
"loss": 0.249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24603253602981567,
"step": 600,
"valid_targets_mean": 3596.4,
"valid_targets_min": 1012
},
{
"epoch": 1.9206349206349205,
"grad_norm": 0.3334463754433709,
"learning_rate": 8.690323224075917e-05,
"loss": 0.255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2664140462875366,
"step": 605,
"valid_targets_mean": 3660.5,
"valid_targets_min": 821
},
{
"epoch": 1.9365079365079365,
"grad_norm": 0.29345444440656204,
"learning_rate": 8.669112843129221e-05,
"loss": 0.2404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23653852939605713,
"step": 610,
"valid_targets_mean": 3828.9,
"valid_targets_min": 872
},
{
"epoch": 1.9523809523809523,
"grad_norm": 0.3545591460034767,
"learning_rate": 8.647758419654798e-05,
"loss": 0.2632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28314149379730225,
"step": 615,
"valid_targets_mean": 3005.8,
"valid_targets_min": 950
},
{
"epoch": 1.9682539682539684,
"grad_norm": 0.37518499037902897,
"learning_rate": 8.626260791987488e-05,
"loss": 0.264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27982187271118164,
"step": 620,
"valid_targets_mean": 2737.7,
"valid_targets_min": 655
},
{
"epoch": 1.9841269841269842,
"grad_norm": 0.3141946190884809,
"learning_rate": 8.604620804084065e-05,
"loss": 0.2604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24237555265426636,
"step": 625,
"valid_targets_mean": 3285.4,
"valid_targets_min": 782
},
{
"epoch": 2.0,
"grad_norm": 0.31685211804889135,
"learning_rate": 8.582839305490094e-05,
"loss": 0.246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24083860218524933,
"step": 630,
"valid_targets_mean": 3718.9,
"valid_targets_min": 628
},
{
"epoch": 2.015873015873016,
"grad_norm": 0.5759647548742526,
"learning_rate": 8.560917151306593e-05,
"loss": 0.1678,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17167878150939941,
"step": 635,
"valid_targets_mean": 3780.4,
"valid_targets_min": 1167
},
{
"epoch": 2.0317460317460316,
"grad_norm": 0.3727656111117338,
"learning_rate": 8.538855202156453e-05,
"loss": 0.1622,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17625080049037933,
"step": 640,
"valid_targets_mean": 2937.9,
"valid_targets_min": 798
},
{
"epoch": 2.0476190476190474,
"grad_norm": 0.3884320540436871,
"learning_rate": 8.516654324150652e-05,
"loss": 0.1567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17556791007518768,
"step": 645,
"valid_targets_mean": 3192.7,
"valid_targets_min": 538
},
{
"epoch": 2.0634920634920633,
"grad_norm": 0.3289674113366658,
"learning_rate": 8.49431538885426e-05,
"loss": 0.1614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16414867341518402,
"step": 650,
"valid_targets_mean": 3249.7,
"valid_targets_min": 844
},
{
"epoch": 2.0793650793650795,
"grad_norm": 0.3766522928896842,
"learning_rate": 8.471839273252217e-05,
"loss": 0.1619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18375205993652344,
"step": 655,
"valid_targets_mean": 3164.0,
"valid_targets_min": 947
},
{
"epoch": 2.0952380952380953,
"grad_norm": 0.3507144392463362,
"learning_rate": 8.449226859714907e-05,
"loss": 0.1519,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1520249843597412,
"step": 660,
"valid_targets_mean": 3328.9,
"valid_targets_min": 674
},
{
"epoch": 2.111111111111111,
"grad_norm": 0.37178157182206373,
"learning_rate": 8.426479035963513e-05,
"loss": 0.1598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17126235365867615,
"step": 665,
"valid_targets_mean": 3293.8,
"valid_targets_min": 649
},
{
"epoch": 2.126984126984127,
"grad_norm": 0.3300159891010412,
"learning_rate": 8.403596695035174e-05,
"loss": 0.166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16688695549964905,
"step": 670,
"valid_targets_mean": 3549.7,
"valid_targets_min": 1096
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.31856865543305996,
"learning_rate": 8.380580735247925e-05,
"loss": 0.1657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16608603298664093,
"step": 675,
"valid_targets_mean": 3707.8,
"valid_targets_min": 621
},
{
"epoch": 2.1587301587301586,
"grad_norm": 0.3256115736923054,
"learning_rate": 8.35743206016542e-05,
"loss": 0.1565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17094911634922028,
"step": 680,
"valid_targets_mean": 3957.3,
"valid_targets_min": 1029
},
{
"epoch": 2.1746031746031744,
"grad_norm": 0.34931477561977403,
"learning_rate": 8.334151578561478e-05,
"loss": 0.1535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15973442792892456,
"step": 685,
"valid_targets_mean": 3375.2,
"valid_targets_min": 787
},
{
"epoch": 2.1904761904761907,
"grad_norm": 0.32279390014201975,
"learning_rate": 8.310740204384387e-05,
"loss": 0.1596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16139554977416992,
"step": 690,
"valid_targets_mean": 3563.2,
"valid_targets_min": 698
},
{
"epoch": 2.2063492063492065,
"grad_norm": 0.35609221581271633,
"learning_rate": 8.287198856721042e-05,
"loss": 0.1637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1595011055469513,
"step": 695,
"valid_targets_mean": 2980.4,
"valid_targets_min": 1007
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.37271121800564905,
"learning_rate": 8.263528459760844e-05,
"loss": 0.168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17944392561912537,
"step": 700,
"valid_targets_mean": 3019.2,
"valid_targets_min": 551
},
{
"epoch": 2.238095238095238,
"grad_norm": 0.3492345769276772,
"learning_rate": 8.23972994275944e-05,
"loss": 0.1595,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15329068899154663,
"step": 705,
"valid_targets_mean": 2981.5,
"valid_targets_min": 705
},
{
"epoch": 2.253968253968254,
"grad_norm": 0.37130802424546083,
"learning_rate": 8.215804240002225e-05,
"loss": 0.1623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16565588116645813,
"step": 710,
"valid_targets_mean": 3265.3,
"valid_targets_min": 601
},
{
"epoch": 2.2698412698412698,
"grad_norm": 0.3248478814141257,
"learning_rate": 8.191752290767671e-05,
"loss": 0.1628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1539839506149292,
"step": 715,
"valid_targets_mean": 3446.7,
"valid_targets_min": 858
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.34141572197660325,
"learning_rate": 8.167575039290448e-05,
"loss": 0.1632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16221433877944946,
"step": 720,
"valid_targets_mean": 3580.3,
"valid_targets_min": 903
},
{
"epoch": 2.3015873015873014,
"grad_norm": 0.3457143748578184,
"learning_rate": 8.143273434724363e-05,
"loss": 0.17,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16455700993537903,
"step": 725,
"valid_targets_mean": 3009.4,
"valid_targets_min": 721
},
{
"epoch": 2.317460317460317,
"grad_norm": 0.4063997244980026,
"learning_rate": 8.118848431105091e-05,
"loss": 0.1687,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17688608169555664,
"step": 730,
"valid_targets_mean": 2648.5,
"valid_targets_min": 655
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.36420318625821063,
"learning_rate": 8.094300987312725e-05,
"loss": 0.1658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1653500646352768,
"step": 735,
"valid_targets_mean": 3228.5,
"valid_targets_min": 671
},
{
"epoch": 2.3492063492063493,
"grad_norm": 0.3426356909940338,
"learning_rate": 8.069632067034129e-05,
"loss": 0.1593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16358144581317902,
"step": 740,
"valid_targets_mean": 3252.1,
"valid_targets_min": 318
},
{
"epoch": 2.365079365079365,
"grad_norm": 0.3362094215393869,
"learning_rate": 8.044842638725107e-05,
"loss": 0.162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16318479180335999,
"step": 745,
"valid_targets_mean": 3503.7,
"valid_targets_min": 1345
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.35115213542541646,
"learning_rate": 8.019933675572389e-05,
"loss": 0.1688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17881888151168823,
"step": 750,
"valid_targets_mean": 3436.2,
"valid_targets_min": 833
},
{
"epoch": 2.3968253968253967,
"grad_norm": 0.33653063427971813,
"learning_rate": 7.994906155455411e-05,
"loss": 0.1597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15470348298549652,
"step": 755,
"valid_targets_mean": 3408.1,
"valid_targets_min": 662
},
{
"epoch": 2.4126984126984126,
"grad_norm": 0.41474724537380153,
"learning_rate": 7.969761060907943e-05,
"loss": 0.1757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18444520235061646,
"step": 760,
"valid_targets_mean": 2565.2,
"valid_targets_min": 799
},
{
"epoch": 2.4285714285714284,
"grad_norm": 0.3250725810449299,
"learning_rate": 7.944499379079502e-05,
"loss": 0.1713,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16023294627666473,
"step": 765,
"valid_targets_mean": 3494.1,
"valid_targets_min": 673
},
{
"epoch": 2.4444444444444446,
"grad_norm": 0.33217697389608564,
"learning_rate": 7.919122101696606e-05,
"loss": 0.1691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1604093611240387,
"step": 770,
"valid_targets_mean": 3532.6,
"valid_targets_min": 1386
},
{
"epoch": 2.4603174603174605,
"grad_norm": 0.3158728058296167,
"learning_rate": 7.893630225023842e-05,
"loss": 0.1677,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17412611842155457,
"step": 775,
"valid_targets_mean": 3643.2,
"valid_targets_min": 715
},
{
"epoch": 2.4761904761904763,
"grad_norm": 0.3777164615324642,
"learning_rate": 7.868024749824745e-05,
"loss": 0.1689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17800076305866241,
"step": 780,
"valid_targets_mean": 2654.0,
"valid_targets_min": 645
},
{
"epoch": 2.492063492063492,
"grad_norm": 0.3446838338319628,
"learning_rate": 7.842306681322522e-05,
"loss": 0.1655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.152607262134552,
"step": 785,
"valid_targets_mean": 2785.3,
"valid_targets_min": 626
},
{
"epoch": 2.507936507936508,
"grad_norm": 0.3413803336284991,
"learning_rate": 7.816477029160582e-05,
"loss": 0.169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.169495090842247,
"step": 790,
"valid_targets_mean": 3203.3,
"valid_targets_min": 680
},
{
"epoch": 2.5238095238095237,
"grad_norm": 0.3422251012343508,
"learning_rate": 7.790536807362896e-05,
"loss": 0.1668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16888554394245148,
"step": 795,
"valid_targets_mean": 3368.9,
"valid_targets_min": 762
},
{
"epoch": 2.5396825396825395,
"grad_norm": 0.35255729056148943,
"learning_rate": 7.7644870342942e-05,
"loss": 0.1767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17574498057365417,
"step": 800,
"valid_targets_mean": 3200.3,
"valid_targets_min": 744
},
{
"epoch": 2.5555555555555554,
"grad_norm": 0.3675191971330026,
"learning_rate": 7.738328732620001e-05,
"loss": 0.169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18634290993213654,
"step": 805,
"valid_targets_mean": 3374.5,
"valid_targets_min": 733
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.37403677830549353,
"learning_rate": 7.712062929266444e-05,
"loss": 0.1721,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16618913412094116,
"step": 810,
"valid_targets_mean": 2706.5,
"valid_targets_min": 924
},
{
"epoch": 2.5873015873015874,
"grad_norm": 0.3727371847270236,
"learning_rate": 7.685690655379984e-05,
"loss": 0.1736,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17995798587799072,
"step": 815,
"valid_targets_mean": 3017.0,
"valid_targets_min": 748
},
{
"epoch": 2.6031746031746033,
"grad_norm": 0.30751639885441906,
"learning_rate": 7.659212946286912e-05,
"loss": 0.1753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15746265649795532,
"step": 820,
"valid_targets_mean": 4256.9,
"valid_targets_min": 374
},
{
"epoch": 2.619047619047619,
"grad_norm": 0.3310233627973631,
"learning_rate": 7.632630841452709e-05,
"loss": 0.1688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17239421606063843,
"step": 825,
"valid_targets_mean": 3438.6,
"valid_targets_min": 1014
},
{
"epoch": 2.634920634920635,
"grad_norm": 0.3796448669599942,
"learning_rate": 7.605945384441238e-05,
"loss": 0.1742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17648935317993164,
"step": 830,
"valid_targets_mean": 2657.2,
"valid_targets_min": 678
},
{
"epoch": 2.6507936507936507,
"grad_norm": 0.31336443027803734,
"learning_rate": 7.579157622873779e-05,
"loss": 0.1763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18123339116573334,
"step": 835,
"valid_targets_mean": 3867.7,
"valid_targets_min": 799
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.32297938325759856,
"learning_rate": 7.552268608387889e-05,
"loss": 0.1754,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17806392908096313,
"step": 840,
"valid_targets_mean": 3531.5,
"valid_targets_min": 994
},
{
"epoch": 2.682539682539683,
"grad_norm": 0.3818920541656617,
"learning_rate": 7.525279396596137e-05,
"loss": 0.1668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17371223866939545,
"step": 845,
"valid_targets_mean": 2356.9,
"valid_targets_min": 463
},
{
"epoch": 2.6984126984126986,
"grad_norm": 0.3317772595112286,
"learning_rate": 7.498191047044641e-05,
"loss": 0.1727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18115673959255219,
"step": 850,
"valid_targets_mean": 3477.0,
"valid_targets_min": 982
},
{
"epoch": 2.7142857142857144,
"grad_norm": 0.31555206274921216,
"learning_rate": 7.471004623171493e-05,
"loss": 0.167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16410109400749207,
"step": 855,
"valid_targets_mean": 3581.8,
"valid_targets_min": 694
},
{
"epoch": 2.7301587301587302,
"grad_norm": 0.3339727492896053,
"learning_rate": 7.443721192264991e-05,
"loss": 0.1685,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1775779128074646,
"step": 860,
"valid_targets_mean": 3339.6,
"valid_targets_min": 899
},
{
"epoch": 2.746031746031746,
"grad_norm": 0.3741712364122302,
"learning_rate": 7.416341825421754e-05,
"loss": 0.1747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.181113600730896,
"step": 865,
"valid_targets_mean": 2898.9,
"valid_targets_min": 807
},
{
"epoch": 2.761904761904762,
"grad_norm": 0.3450601435101273,
"learning_rate": 7.388867597504664e-05,
"loss": 0.1755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16140630841255188,
"step": 870,
"valid_targets_mean": 2982.1,
"valid_targets_min": 777
},
{
"epoch": 2.7777777777777777,
"grad_norm": 0.30300100600429875,
"learning_rate": 7.361299587100671e-05,
"loss": 0.1664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14763054251670837,
"step": 875,
"valid_targets_mean": 3546.3,
"valid_targets_min": 331
},
{
"epoch": 2.7936507936507935,
"grad_norm": 0.33822865794587587,
"learning_rate": 7.333638876478453e-05,
"loss": 0.1663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15383568406105042,
"step": 880,
"valid_targets_mean": 3383.7,
"valid_targets_min": 965
},
{
"epoch": 2.8095238095238093,
"grad_norm": 0.33412569714189794,
"learning_rate": 7.305886551545926e-05,
"loss": 0.1742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16151924431324005,
"step": 885,
"valid_targets_mean": 3373.7,
"valid_targets_min": 735
},
{
"epoch": 2.825396825396825,
"grad_norm": 0.33412534025670526,
"learning_rate": 7.27804370180761e-05,
"loss": 0.1781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1706407070159912,
"step": 890,
"valid_targets_mean": 3179.3,
"valid_targets_min": 906
},
{
"epoch": 2.8412698412698414,
"grad_norm": 0.3339179547455438,
"learning_rate": 7.250111420321863e-05,
"loss": 0.1711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16379770636558533,
"step": 895,
"valid_targets_mean": 3285.1,
"valid_targets_min": 369
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.34023627441151805,
"learning_rate": 7.222090803657965e-05,
"loss": 0.1647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16814827919006348,
"step": 900,
"valid_targets_mean": 3231.8,
"valid_targets_min": 1139
},
{
"epoch": 2.873015873015873,
"grad_norm": 0.3340218754644537,
"learning_rate": 7.193982951853072e-05,
"loss": 0.1661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16534548997879028,
"step": 905,
"valid_targets_mean": 3287.0,
"valid_targets_min": 1229
},
{
"epoch": 2.888888888888889,
"grad_norm": 0.3567976236306708,
"learning_rate": 7.165788968369027e-05,
"loss": 0.1671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16220903396606445,
"step": 910,
"valid_targets_mean": 2799.1,
"valid_targets_min": 645
},
{
"epoch": 2.9047619047619047,
"grad_norm": 0.323267924884269,
"learning_rate": 7.137509960049043e-05,
"loss": 0.1699,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1666952222585678,
"step": 915,
"valid_targets_mean": 3537.4,
"valid_targets_min": 743
},
{
"epoch": 2.9206349206349205,
"grad_norm": 0.3344600534927337,
"learning_rate": 7.109147037074249e-05,
"loss": 0.1743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16353312134742737,
"step": 920,
"valid_targets_mean": 3538.2,
"valid_targets_min": 586
},
{
"epoch": 2.9365079365079367,
"grad_norm": 0.3519409005679982,
"learning_rate": 7.080701312920106e-05,
"loss": 0.1709,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17425547540187836,
"step": 925,
"valid_targets_mean": 2968.8,
"valid_targets_min": 734
},
{
"epoch": 2.9523809523809526,
"grad_norm": 0.31842438352891295,
"learning_rate": 7.052173904312699e-05,
"loss": 0.1739,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1763269454240799,
"step": 930,
"valid_targets_mean": 3808.2,
"valid_targets_min": 926
},
{
"epoch": 2.9682539682539684,
"grad_norm": 0.3597461145782737,
"learning_rate": 7.023565931184888e-05,
"loss": 0.1687,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17197969555854797,
"step": 935,
"valid_targets_mean": 2821.0,
"valid_targets_min": 381
},
{
"epoch": 2.984126984126984,
"grad_norm": 0.3773619909688809,
"learning_rate": 6.994878516632347e-05,
"loss": 0.1722,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18970924615859985,
"step": 940,
"valid_targets_mean": 3117.7,
"valid_targets_min": 332
},
{
"epoch": 3.0,
"grad_norm": 0.35904186673915295,
"learning_rate": 6.966112786869471e-05,
"loss": 0.169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1610109508037567,
"step": 945,
"valid_targets_mean": 2840.3,
"valid_targets_min": 1321
},
{
"epoch": 3.015873015873016,
"grad_norm": 0.51988600717683,
"learning_rate": 6.937269871185171e-05,
"loss": 0.0984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09736981987953186,
"step": 950,
"valid_targets_mean": 3713.8,
"valid_targets_min": 728
},
{
"epoch": 3.0317460317460316,
"grad_norm": 0.3566692041425496,
"learning_rate": 6.908350901898522e-05,
"loss": 0.0948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09821034967899323,
"step": 955,
"valid_targets_mean": 3109.2,
"valid_targets_min": 855
},
{
"epoch": 3.0476190476190474,
"grad_norm": 0.34988594260353406,
"learning_rate": 6.87935701431433e-05,
"loss": 0.0908,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08849053084850311,
"step": 960,
"valid_targets_mean": 3486.1,
"valid_targets_min": 759
},
{
"epoch": 3.0634920634920633,
"grad_norm": 0.3684968880517492,
"learning_rate": 6.850289346678552e-05,
"loss": 0.0918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0889333039522171,
"step": 965,
"valid_targets_mean": 3089.4,
"valid_targets_min": 767
},
{
"epoch": 3.0793650793650795,
"grad_norm": 0.3406031062455158,
"learning_rate": 6.821149040133608e-05,
"loss": 0.0932,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08600161969661713,
"step": 970,
"valid_targets_mean": 2967.3,
"valid_targets_min": 846
},
{
"epoch": 3.0952380952380953,
"grad_norm": 0.39035591213321374,
"learning_rate": 6.791937238673592e-05,
"loss": 0.0985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10481473803520203,
"step": 975,
"valid_targets_mean": 2889.2,
"valid_targets_min": 1068
},
{
"epoch": 3.111111111111111,
"grad_norm": 0.341213524572102,
"learning_rate": 6.762655089099353e-05,
"loss": 0.0927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09173455089330673,
"step": 980,
"valid_targets_mean": 3260.5,
"valid_targets_min": 889
},
{
"epoch": 3.126984126984127,
"grad_norm": 0.3104515295578226,
"learning_rate": 6.733303740973476e-05,
"loss": 0.0945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09036482125520706,
"step": 985,
"valid_targets_mean": 3952.8,
"valid_targets_min": 1409
},
{
"epoch": 3.142857142857143,
"grad_norm": 0.3551799137750828,
"learning_rate": 6.703884346575147e-05,
"loss": 0.0912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08662883937358856,
"step": 990,
"valid_targets_mean": 3036.0,
"valid_targets_min": 680
},
{
"epoch": 3.1587301587301586,
"grad_norm": 0.3713835912690338,
"learning_rate": 6.674398060854931e-05,
"loss": 0.1005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1003386452794075,
"step": 995,
"valid_targets_mean": 2793.7,
"valid_targets_min": 588
},
{
"epoch": 3.1746031746031744,
"grad_norm": 0.3471639600992559,
"learning_rate": 6.644846041389414e-05,
"loss": 0.0988,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09873468428850174,
"step": 1000,
"valid_targets_mean": 3826.8,
"valid_targets_min": 563
},
{
"epoch": 3.1904761904761907,
"grad_norm": 0.3628044132628041,
"learning_rate": 6.615229448335769e-05,
"loss": 0.0931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09047640860080719,
"step": 1005,
"valid_targets_mean": 3412.8,
"valid_targets_min": 737
},
{
"epoch": 3.2063492063492065,
"grad_norm": 0.392140458241405,
"learning_rate": 6.58554944438621e-05,
"loss": 0.0944,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10625366866588593,
"step": 1010,
"valid_targets_mean": 2965.2,
"valid_targets_min": 677
},
{
"epoch": 3.2222222222222223,
"grad_norm": 0.3768633246359088,
"learning_rate": 6.555807194722339e-05,
"loss": 0.0945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09283259510993958,
"step": 1015,
"valid_targets_mean": 2806.7,
"valid_targets_min": 551
},
{
"epoch": 3.238095238095238,
"grad_norm": 0.3398289365530458,
"learning_rate": 6.526003866969412e-05,
"loss": 0.096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09495319426059723,
"step": 1020,
"valid_targets_mean": 3490.8,
"valid_targets_min": 636
},
{
"epoch": 3.253968253968254,
"grad_norm": 0.3233141200068704,
"learning_rate": 6.4961406311505e-05,
"loss": 0.0984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09404659271240234,
"step": 1025,
"valid_targets_mean": 3667.8,
"valid_targets_min": 935
},
{
"epoch": 3.2698412698412698,
"grad_norm": 0.3320120444353232,
"learning_rate": 6.466218659640545e-05,
"loss": 0.0976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09830652177333832,
"step": 1030,
"valid_targets_mean": 3334.6,
"valid_targets_min": 821
},
{
"epoch": 3.2857142857142856,
"grad_norm": 0.34959529865828043,
"learning_rate": 6.436239127120351e-05,
"loss": 0.0988,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0943569615483284,
"step": 1035,
"valid_targets_mean": 3270.2,
"valid_targets_min": 1175
},
{
"epoch": 3.3015873015873014,
"grad_norm": 0.3737089477134585,
"learning_rate": 6.406203210530455e-05,
"loss": 0.098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10708297789096832,
"step": 1040,
"valid_targets_mean": 3081.0,
"valid_targets_min": 318
},
{
"epoch": 3.317460317460317,
"grad_norm": 0.36981526277827687,
"learning_rate": 6.376112089024928e-05,
"loss": 0.0981,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10486898571252823,
"step": 1045,
"valid_targets_mean": 3050.3,
"valid_targets_min": 781
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.3520581333414948,
"learning_rate": 6.345966943925085e-05,
"loss": 0.0967,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09494173526763916,
"step": 1050,
"valid_targets_mean": 3372.5,
"valid_targets_min": 331
},
{
"epoch": 3.3492063492063493,
"grad_norm": 0.3044367023749068,
"learning_rate": 6.315768958673103e-05,
"loss": 0.0993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09523281455039978,
"step": 1055,
"valid_targets_mean": 3607.8,
"valid_targets_min": 671
},
{
"epoch": 3.365079365079365,
"grad_norm": 0.31184254040567083,
"learning_rate": 6.285519318785568e-05,
"loss": 0.0957,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08346079289913177,
"step": 1060,
"valid_targets_mean": 3521.0,
"valid_targets_min": 971
},
{
"epoch": 3.380952380952381,
"grad_norm": 0.34386459703071554,
"learning_rate": 6.25521921180693e-05,
"loss": 0.0986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09842342138290405,
"step": 1065,
"valid_targets_mean": 3383.3,
"valid_targets_min": 789
},
{
"epoch": 3.3968253968253967,
"grad_norm": 0.3863719804907234,
"learning_rate": 6.224869827262885e-05,
"loss": 0.1017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10565370321273804,
"step": 1070,
"valid_targets_mean": 2921.4,
"valid_targets_min": 645
},
{
"epoch": 3.4126984126984126,
"grad_norm": 0.3420872873529232,
"learning_rate": 6.194472356613667e-05,
"loss": 0.1045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10064145177602768,
"step": 1075,
"valid_targets_mean": 3480.2,
"valid_targets_min": 750
},
{
"epoch": 3.4285714285714284,
"grad_norm": 0.3603530774863466,
"learning_rate": 6.16402799320729e-05,
"loss": 0.0996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09136994183063507,
"step": 1080,
"valid_targets_mean": 3059.1,
"valid_targets_min": 693
},
{
"epoch": 3.4444444444444446,
"grad_norm": 0.33018926023794154,
"learning_rate": 6.133537932232684e-05,
"loss": 0.0952,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0876457691192627,
"step": 1085,
"valid_targets_mean": 3516.0,
"valid_targets_min": 964
},
{
"epoch": 3.4603174603174605,
"grad_norm": 0.36301475353411755,
"learning_rate": 6.1030033706727815e-05,
"loss": 0.1003,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10114194452762604,
"step": 1090,
"valid_targets_mean": 3181.5,
"valid_targets_min": 905
},
{
"epoch": 3.4761904761904763,
"grad_norm": 0.36961623868161125,
"learning_rate": 6.0724255072575275e-05,
"loss": 0.1007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10001328587532043,
"step": 1095,
"valid_targets_mean": 2927.5,
"valid_targets_min": 1054
},
{
"epoch": 3.492063492063492,
"grad_norm": 0.3354884843924621,
"learning_rate": 6.0418055424168154e-05,
"loss": 0.0944,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0889013260602951,
"step": 1100,
"valid_targets_mean": 3379.9,
"valid_targets_min": 770
},
{
"epoch": 3.507936507936508,
"grad_norm": 0.3471892332651075,
"learning_rate": 6.011144678233359e-05,
"loss": 0.097,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10150094330310822,
"step": 1105,
"valid_targets_mean": 3311.4,
"valid_targets_min": 787
},
{
"epoch": 3.5238095238095237,
"grad_norm": 0.31078709832767126,
"learning_rate": 5.9804441183955104e-05,
"loss": 0.0987,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0964626893401146,
"step": 1110,
"valid_targets_mean": 3926.9,
"valid_targets_min": 1073
},
{
"epoch": 3.5396825396825395,
"grad_norm": 0.3546542889462149,
"learning_rate": 5.9497050681499955e-05,
"loss": 0.106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10847651958465576,
"step": 1115,
"valid_targets_mean": 3566.3,
"valid_targets_min": 1081
},
{
"epoch": 3.5555555555555554,
"grad_norm": 0.3423493309583132,
"learning_rate": 5.9189287342545996e-05,
"loss": 0.0972,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09609606862068176,
"step": 1120,
"valid_targets_mean": 3299.1,
"valid_targets_min": 574
},
{
"epoch": 3.571428571428571,
"grad_norm": 0.3621388313545609,
"learning_rate": 5.888116324930798e-05,
"loss": 0.1014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09804192185401917,
"step": 1125,
"valid_targets_mean": 3195.3,
"valid_targets_min": 806
},
{
"epoch": 3.5873015873015874,
"grad_norm": 0.34845940578989215,
"learning_rate": 5.8572690498163205e-05,
"loss": 0.1007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10415421426296234,
"step": 1130,
"valid_targets_mean": 3283.8,
"valid_targets_min": 849
},
{
"epoch": 3.6031746031746033,
"grad_norm": 0.3511248351881585,
"learning_rate": 5.826388119917658e-05,
"loss": 0.1001,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10305723547935486,
"step": 1135,
"valid_targets_mean": 3360.9,
"valid_targets_min": 318
},
{
"epoch": 3.619047619047619,
"grad_norm": 0.3582642943749132,
"learning_rate": 5.795474747562533e-05,
"loss": 0.0981,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10681834071874619,
"step": 1140,
"valid_targets_mean": 3691.9,
"valid_targets_min": 877
},
{
"epoch": 3.634920634920635,
"grad_norm": 0.38039584848017866,
"learning_rate": 5.7645301463522895e-05,
"loss": 0.101,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1044946163892746,
"step": 1145,
"valid_targets_mean": 3045.8,
"valid_targets_min": 548
},
{
"epoch": 3.6507936507936507,
"grad_norm": 0.3843884369341132,
"learning_rate": 5.7335555311142677e-05,
"loss": 0.0957,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10071367025375366,
"step": 1150,
"valid_targets_mean": 2920.6,
"valid_targets_min": 955
},
{
"epoch": 3.6666666666666665,
"grad_norm": 0.3766895411083596,
"learning_rate": 5.702552117854093e-05,
"loss": 0.1063,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10362906754016876,
"step": 1155,
"valid_targets_mean": 3138.1,
"valid_targets_min": 950
},
{
"epoch": 3.682539682539683,
"grad_norm": 0.3562449704876102,
"learning_rate": 5.671521123707955e-05,
"loss": 0.0966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10384504497051239,
"step": 1160,
"valid_targets_mean": 3039.8,
"valid_targets_min": 958
},
{
"epoch": 3.6984126984126986,
"grad_norm": 0.38804734901154214,
"learning_rate": 5.640463766894813e-05,
"loss": 0.0996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10218723118305206,
"step": 1165,
"valid_targets_mean": 2734.8,
"valid_targets_min": 1405
},
{
"epoch": 3.7142857142857144,
"grad_norm": 0.3381745400442977,
"learning_rate": 5.609381266668578e-05,
"loss": 0.0986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09848766773939133,
"step": 1170,
"valid_targets_mean": 3304.8,
"valid_targets_min": 881
},
{
"epoch": 3.7301587301587302,
"grad_norm": 0.3509546095547907,
"learning_rate": 5.5782748432702426e-05,
"loss": 0.1037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10381171107292175,
"step": 1175,
"valid_targets_mean": 3211.6,
"valid_targets_min": 611
},
{
"epoch": 3.746031746031746,
"grad_norm": 0.37045066245950364,
"learning_rate": 5.54714571787998e-05,
"loss": 0.0967,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09512515366077423,
"step": 1180,
"valid_targets_mean": 3034.8,
"valid_targets_min": 1448
},
{
"epoch": 3.761904761904762,
"grad_norm": 0.3392682530297764,
"learning_rate": 5.5159951125692e-05,
"loss": 0.1031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09948498010635376,
"step": 1185,
"valid_targets_mean": 3234.0,
"valid_targets_min": 733
},
{
"epoch": 3.7777777777777777,
"grad_norm": 0.36472000804314153,
"learning_rate": 5.484824250252574e-05,
"loss": 0.1056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10936148464679718,
"step": 1190,
"valid_targets_mean": 3208.0,
"valid_targets_min": 1007
},
{
"epoch": 3.7936507936507935,
"grad_norm": 0.3286424664187973,
"learning_rate": 5.453634354640028e-05,
"loss": 0.0956,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10229085385799408,
"step": 1195,
"valid_targets_mean": 3895.4,
"valid_targets_min": 901
},
{
"epoch": 3.8095238095238093,
"grad_norm": 0.33558484161998225,
"learning_rate": 5.422426650188698e-05,
"loss": 0.1038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10568304359912872,
"step": 1200,
"valid_targets_mean": 3686.3,
"valid_targets_min": 811
},
{
"epoch": 3.825396825396825,
"grad_norm": 0.331818482885697,
"learning_rate": 5.391202362054859e-05,
"loss": 0.1039,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09697206318378448,
"step": 1205,
"valid_targets_mean": 3690.9,
"valid_targets_min": 1105
},
{
"epoch": 3.8412698412698414,
"grad_norm": 0.34884233353948313,
"learning_rate": 5.359962716045835e-05,
"loss": 0.1012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10613285005092621,
"step": 1210,
"valid_targets_mean": 3277.8,
"valid_targets_min": 1366
},
{
"epoch": 3.857142857142857,
"grad_norm": 0.3377093966535472,
"learning_rate": 5.328708938571872e-05,
"loss": 0.0997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09620672464370728,
"step": 1215,
"valid_targets_mean": 3225.2,
"valid_targets_min": 586
},
{
"epoch": 3.873015873015873,
"grad_norm": 0.34963611135918393,
"learning_rate": 5.2974422565979866e-05,
"loss": 0.094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09651202708482742,
"step": 1220,
"valid_targets_mean": 3314.6,
"valid_targets_min": 615
},
{
"epoch": 3.888888888888889,
"grad_norm": 0.312982373502565,
"learning_rate": 5.266163897595804e-05,
"loss": 0.1005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10331282019615173,
"step": 1225,
"valid_targets_mean": 4324.3,
"valid_targets_min": 1346
},
{
"epoch": 3.9047619047619047,
"grad_norm": 0.3084921760199167,
"learning_rate": 5.234875089495368e-05,
"loss": 0.0957,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08868332207202911,
"step": 1230,
"valid_targets_mean": 4299.4,
"valid_targets_min": 1377
},
{
"epoch": 3.9206349206349205,
"grad_norm": 0.31812977348828253,
"learning_rate": 5.203577060636935e-05,
"loss": 0.0985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0956139788031578,
"step": 1235,
"valid_targets_mean": 3409.8,
"valid_targets_min": 1189
},
{
"epoch": 3.9365079365079367,
"grad_norm": 0.36283301664873513,
"learning_rate": 5.172271039722749e-05,
"loss": 0.1054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10029184818267822,
"step": 1240,
"valid_targets_mean": 2871.2,
"valid_targets_min": 1119
},
{
"epoch": 3.9523809523809526,
"grad_norm": 0.3747003910929305,
"learning_rate": 5.140958255768812e-05,
"loss": 0.1009,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10342127829790115,
"step": 1245,
"valid_targets_mean": 2885.4,
"valid_targets_min": 762
},
{
"epoch": 3.9682539682539684,
"grad_norm": 0.3158439672350005,
"learning_rate": 5.109639938056625e-05,
"loss": 0.0931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08681650459766388,
"step": 1250,
"valid_targets_mean": 3408.3,
"valid_targets_min": 586
},
{
"epoch": 3.984126984126984,
"grad_norm": 0.37808062071501214,
"learning_rate": 5.078317316084935e-05,
"loss": 0.1016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10480676591396332,
"step": 1255,
"valid_targets_mean": 2720.9,
"valid_targets_min": 373
},
{
"epoch": 4.0,
"grad_norm": 0.3684537274368403,
"learning_rate": 5.0469916195214694e-05,
"loss": 0.1014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09857641160488129,
"step": 1260,
"valid_targets_mean": 2967.4,
"valid_targets_min": 825
},
{
"epoch": 4.015873015873016,
"grad_norm": 0.4153943983019351,
"learning_rate": 5.015664078154655e-05,
"loss": 0.0531,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.060983940958976746,
"step": 1265,
"valid_targets_mean": 3012.4,
"valid_targets_min": 1052
},
{
"epoch": 4.031746031746032,
"grad_norm": 0.3581444583068178,
"learning_rate": 4.9843359218453455e-05,
"loss": 0.0541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.052729278802871704,
"step": 1270,
"valid_targets_mean": 2914.1,
"valid_targets_min": 1004
},
{
"epoch": 4.0476190476190474,
"grad_norm": 0.31437918787892,
"learning_rate": 4.953008380478532e-05,
"loss": 0.0514,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.054422155022621155,
"step": 1275,
"valid_targets_mean": 3480.0,
"valid_targets_min": 1017
},
{
"epoch": 4.063492063492063,
"grad_norm": 0.351025328877443,
"learning_rate": 4.921682683915066e-05,
"loss": 0.0528,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0532863512635231,
"step": 1280,
"valid_targets_mean": 3265.0,
"valid_targets_min": 684
},
{
"epoch": 4.079365079365079,
"grad_norm": 0.33069084731208065,
"learning_rate": 4.8903600619433775e-05,
"loss": 0.0503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050467319786548615,
"step": 1285,
"valid_targets_mean": 3606.2,
"valid_targets_min": 1179
},
{
"epoch": 4.095238095238095,
"grad_norm": 0.3380162738505722,
"learning_rate": 4.85904174423119e-05,
"loss": 0.0503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0538782961666584,
"step": 1290,
"valid_targets_mean": 3495.3,
"valid_targets_min": 777
},
{
"epoch": 4.111111111111111,
"grad_norm": 0.3275848802978787,
"learning_rate": 4.8277289602772514e-05,
"loss": 0.0501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05028657615184784,
"step": 1295,
"valid_targets_mean": 3340.8,
"valid_targets_min": 726
},
{
"epoch": 4.1269841269841265,
"grad_norm": 0.30731385336857986,
"learning_rate": 4.796422939363066e-05,
"loss": 0.0518,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05034211650490761,
"step": 1300,
"valid_targets_mean": 3559.4,
"valid_targets_min": 994
},
{
"epoch": 4.142857142857143,
"grad_norm": 0.2886226480947433,
"learning_rate": 4.7651249105046325e-05,
"loss": 0.0514,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.047623828053474426,
"step": 1305,
"valid_targets_mean": 3644.8,
"valid_targets_min": 1056
},
{
"epoch": 4.158730158730159,
"grad_norm": 0.3292470966545804,
"learning_rate": 4.733836102404197e-05,
"loss": 0.053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0555991530418396,
"step": 1310,
"valid_targets_mean": 3310.0,
"valid_targets_min": 1148
},
{
"epoch": 4.174603174603175,
"grad_norm": 0.31671105253869936,
"learning_rate": 4.7025577434020146e-05,
"loss": 0.0515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050965093076229095,
"step": 1315,
"valid_targets_mean": 3481.4,
"valid_targets_min": 1080
},
{
"epoch": 4.190476190476191,
"grad_norm": 0.2691814268392951,
"learning_rate": 4.671291061428129e-05,
"loss": 0.0519,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048076000064611435,
"step": 1320,
"valid_targets_mean": 4136.2,
"valid_targets_min": 563
},
{
"epoch": 4.2063492063492065,
"grad_norm": 0.3193716065061294,
"learning_rate": 4.640037283954165e-05,
"loss": 0.0488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0491257943212986,
"step": 1325,
"valid_targets_mean": 3621.5,
"valid_targets_min": 886
},
{
"epoch": 4.222222222222222,
"grad_norm": 0.3321338547756679,
"learning_rate": 4.608797637945142e-05,
"loss": 0.0503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05201344937086105,
"step": 1330,
"valid_targets_mean": 3407.8,
"valid_targets_min": 939
},
{
"epoch": 4.238095238095238,
"grad_norm": 0.3245396663647784,
"learning_rate": 4.577573349811304e-05,
"loss": 0.0551,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.052470143884420395,
"step": 1335,
"valid_targets_mean": 3235.7,
"valid_targets_min": 1073
},
{
"epoch": 4.253968253968254,
"grad_norm": 0.3395641998038192,
"learning_rate": 4.5463656453599726e-05,
"loss": 0.0528,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05411034822463989,
"step": 1340,
"valid_targets_mean": 3194.9,
"valid_targets_min": 318
},
{
"epoch": 4.26984126984127,
"grad_norm": 0.29449840651246995,
"learning_rate": 4.515175749747426e-05,
"loss": 0.0511,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04776880145072937,
"step": 1345,
"valid_targets_mean": 3797.7,
"valid_targets_min": 702
},
{
"epoch": 4.285714285714286,
"grad_norm": 0.34455860119114856,
"learning_rate": 4.484004887430803e-05,
"loss": 0.0544,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05883609876036644,
"step": 1350,
"valid_targets_mean": 3001.2,
"valid_targets_min": 825
},
{
"epoch": 4.301587301587301,
"grad_norm": 0.3137045381914164,
"learning_rate": 4.452854282120022e-05,
"loss": 0.0539,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050319183617830276,
"step": 1355,
"valid_targets_mean": 3514.4,
"valid_targets_min": 293
},
{
"epoch": 4.317460317460317,
"grad_norm": 0.3133895802149954,
"learning_rate": 4.4217251567297586e-05,
"loss": 0.0537,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0509931854903698,
"step": 1360,
"valid_targets_mean": 3800.1,
"valid_targets_min": 1003
},
{
"epoch": 4.333333333333333,
"grad_norm": 0.29357417262063296,
"learning_rate": 4.390618733331423e-05,
"loss": 0.0543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04734072834253311,
"step": 1365,
"valid_targets_mean": 3653.2,
"valid_targets_min": 728
},
{
"epoch": 4.349206349206349,
"grad_norm": 0.3150075563320328,
"learning_rate": 4.359536233105187e-05,
"loss": 0.0541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05183495208621025,
"step": 1370,
"valid_targets_mean": 3766.0,
"valid_targets_min": 965
},
{
"epoch": 4.365079365079365,
"grad_norm": 0.3552891281644966,
"learning_rate": 4.328478876292045e-05,
"loss": 0.0516,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.055399805307388306,
"step": 1375,
"valid_targets_mean": 2724.2,
"valid_targets_min": 586
},
{
"epoch": 4.380952380952381,
"grad_norm": 0.33304071178653466,
"learning_rate": 4.297447882145907e-05,
"loss": 0.0555,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.053134672343730927,
"step": 1380,
"valid_targets_mean": 2952.0,
"valid_targets_min": 916
},
{
"epoch": 4.396825396825397,
"grad_norm": 0.34771402929173456,
"learning_rate": 4.266444468885735e-05,
"loss": 0.0534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05659467354416847,
"step": 1385,
"valid_targets_mean": 2962.9,
"valid_targets_min": 742
},
{
"epoch": 4.412698412698413,
"grad_norm": 0.30995705929679834,
"learning_rate": 4.235469853647711e-05,
"loss": 0.054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0572165809571743,
"step": 1390,
"valid_targets_mean": 4091.7,
"valid_targets_min": 984
},
{
"epoch": 4.428571428571429,
"grad_norm": 0.3413898378795505,
"learning_rate": 4.204525252437469e-05,
"loss": 0.0535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05606987327337265,
"step": 1395,
"valid_targets_mean": 3297.2,
"valid_targets_min": 916
},
{
"epoch": 4.444444444444445,
"grad_norm": 0.2938410889592773,
"learning_rate": 4.173611880082342e-05,
"loss": 0.0539,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0523369163274765,
"step": 1400,
"valid_targets_mean": 3989.2,
"valid_targets_min": 615
},
{
"epoch": 4.4603174603174605,
"grad_norm": 0.3058932143351616,
"learning_rate": 4.1427309501836806e-05,
"loss": 0.0551,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048057764768600464,
"step": 1405,
"valid_targets_mean": 3375.5,
"valid_targets_min": 942
},
{
"epoch": 4.476190476190476,
"grad_norm": 0.3475308691671556,
"learning_rate": 4.111883675069202e-05,
"loss": 0.0545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05427522212266922,
"step": 1410,
"valid_targets_mean": 3109.4,
"valid_targets_min": 1195
},
{
"epoch": 4.492063492063492,
"grad_norm": 0.3546866810389638,
"learning_rate": 4.081071265745402e-05,
"loss": 0.0546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06188282370567322,
"step": 1415,
"valid_targets_mean": 2553.4,
"valid_targets_min": 1181
},
{
"epoch": 4.507936507936508,
"grad_norm": 0.3549135864226603,
"learning_rate": 4.0502949318500064e-05,
"loss": 0.055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05323749780654907,
"step": 1420,
"valid_targets_mean": 2530.2,
"valid_targets_min": 846
},
{
"epoch": 4.523809523809524,
"grad_norm": 0.34447294739101914,
"learning_rate": 4.01955588160449e-05,
"loss": 0.0561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.057265084236860275,
"step": 1425,
"valid_targets_mean": 3020.2,
"valid_targets_min": 873
},
{
"epoch": 4.5396825396825395,
"grad_norm": 0.32321348267609445,
"learning_rate": 3.9888553217666415e-05,
"loss": 0.0549,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05526804178953171,
"step": 1430,
"valid_targets_mean": 3486.1,
"valid_targets_min": 1013
},
{
"epoch": 4.555555555555555,
"grad_norm": 0.33228527568112487,
"learning_rate": 3.9581944575831864e-05,
"loss": 0.0532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04903782904148102,
"step": 1435,
"valid_targets_mean": 2660.8,
"valid_targets_min": 601
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.36386571786343064,
"learning_rate": 3.927574492742473e-05,
"loss": 0.0583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06255079060792923,
"step": 1440,
"valid_targets_mean": 2925.6,
"valid_targets_min": 862
},
{
"epoch": 4.587301587301587,
"grad_norm": 0.30325305282368664,
"learning_rate": 3.896996629327219e-05,
"loss": 0.0535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050758376717567444,
"step": 1445,
"valid_targets_mean": 3914.9,
"valid_targets_min": 936
},
{
"epoch": 4.603174603174603,
"grad_norm": 0.2877745700763886,
"learning_rate": 3.8664620677673186e-05,
"loss": 0.0501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0491793192923069,
"step": 1450,
"valid_targets_mean": 4105.0,
"valid_targets_min": 932
},
{
"epoch": 4.619047619047619,
"grad_norm": 0.33246313679919126,
"learning_rate": 3.8359720067927115e-05,
"loss": 0.053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05500481277704239,
"step": 1455,
"valid_targets_mean": 3288.3,
"valid_targets_min": 816
},
{
"epoch": 4.634920634920634,
"grad_norm": 0.3255731224642127,
"learning_rate": 3.805527643386334e-05,
"loss": 0.052,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05280488729476929,
"step": 1460,
"valid_targets_mean": 3135.0,
"valid_targets_min": 1126
},
{
"epoch": 4.650793650793651,
"grad_norm": 0.30648161691148584,
"learning_rate": 3.775130172737117e-05,
"loss": 0.0523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05356438457965851,
"step": 1465,
"valid_targets_mean": 3381.0,
"valid_targets_min": 586
},
{
"epoch": 4.666666666666667,
"grad_norm": 0.31556097843679115,
"learning_rate": 3.7447807881930694e-05,
"loss": 0.0527,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.049721553921699524,
"step": 1470,
"valid_targets_mean": 3391.8,
"valid_targets_min": 956
},
{
"epoch": 4.682539682539683,
"grad_norm": 0.337509339024026,
"learning_rate": 3.7144806812144324e-05,
"loss": 0.0545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.056368205696344376,
"step": 1475,
"valid_targets_mean": 3378.8,
"valid_targets_min": 821
},
{
"epoch": 4.698412698412699,
"grad_norm": 0.31522673917083804,
"learning_rate": 3.6842310413269e-05,
"loss": 0.0557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05478835850954056,
"step": 1480,
"valid_targets_mean": 3541.2,
"valid_targets_min": 1464
},
{
"epoch": 4.714285714285714,
"grad_norm": 0.31921752559672634,
"learning_rate": 3.654033056074918e-05,
"loss": 0.0525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.055470626801252365,
"step": 1485,
"valid_targets_mean": 3424.5,
"valid_targets_min": 741
},
{
"epoch": 4.73015873015873,
"grad_norm": 0.2808688151765656,
"learning_rate": 3.6238879109750735e-05,
"loss": 0.053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.045721858739852905,
"step": 1490,
"valid_targets_mean": 3944.5,
"valid_targets_min": 374
},
{
"epoch": 4.746031746031746,
"grad_norm": 0.3300324859616459,
"learning_rate": 3.593796789469546e-05,
"loss": 0.052,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05356018245220184,
"step": 1495,
"valid_targets_mean": 3003.5,
"valid_targets_min": 844
},
{
"epoch": 4.761904761904762,
"grad_norm": 0.3032124702664025,
"learning_rate": 3.563760872879649e-05,
"loss": 0.0514,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.047460850328207016,
"step": 1500,
"valid_targets_mean": 3525.9,
"valid_targets_min": 1121
},
{
"epoch": 4.777777777777778,
"grad_norm": 0.3374039231803274,
"learning_rate": 3.5337813403594545e-05,
"loss": 0.0523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05037646368145943,
"step": 1505,
"valid_targets_mean": 2918.6,
"valid_targets_min": 1081
},
{
"epoch": 4.7936507936507935,
"grad_norm": 0.31170176986389236,
"learning_rate": 3.5038593688495005e-05,
"loss": 0.0541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04973987489938736,
"step": 1510,
"valid_targets_mean": 3366.8,
"valid_targets_min": 285
},
{
"epoch": 4.809523809523809,
"grad_norm": 0.3393101055826999,
"learning_rate": 3.4739961330305894e-05,
"loss": 0.0556,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05811101943254471,
"step": 1515,
"valid_targets_mean": 3200.0,
"valid_targets_min": 1096
},
{
"epoch": 4.825396825396825,
"grad_norm": 0.33852940213510874,
"learning_rate": 3.444192805277663e-05,
"loss": 0.0506,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05156991630792618,
"step": 1520,
"valid_targets_mean": 2913.8,
"valid_targets_min": 645
},
{
"epoch": 4.841269841269841,
"grad_norm": 0.3312924011892462,
"learning_rate": 3.414450555613792e-05,
"loss": 0.0518,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05578611418604851,
"step": 1525,
"valid_targets_mean": 3475.9,
"valid_targets_min": 851
},
{
"epoch": 4.857142857142857,
"grad_norm": 0.2906890421902625,
"learning_rate": 3.3847705516642317e-05,
"loss": 0.0509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04764970391988754,
"step": 1530,
"valid_targets_mean": 3311.6,
"valid_targets_min": 1073
},
{
"epoch": 4.8730158730158735,
"grad_norm": 0.3058135467638653,
"learning_rate": 3.355153958610587e-05,
"loss": 0.0508,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04871264845132828,
"step": 1535,
"valid_targets_mean": 3466.6,
"valid_targets_min": 858
},
{
"epoch": 4.888888888888889,
"grad_norm": 0.33436172037636874,
"learning_rate": 3.325601939145069e-05,
"loss": 0.0541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05800117552280426,
"step": 1540,
"valid_targets_mean": 3362.7,
"valid_targets_min": 1061
},
{
"epoch": 4.904761904761905,
"grad_norm": 0.3688653693932819,
"learning_rate": 3.296115653424854e-05,
"loss": 0.0586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07181097567081451,
"step": 1545,
"valid_targets_mean": 2898.6,
"valid_targets_min": 845
},
{
"epoch": 4.920634920634921,
"grad_norm": 0.3062268300108016,
"learning_rate": 3.266696259026526e-05,
"loss": 0.0538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05916294455528259,
"step": 1550,
"valid_targets_mean": 3883.2,
"valid_targets_min": 567
},
{
"epoch": 4.936507936507937,
"grad_norm": 0.36817501253465373,
"learning_rate": 3.237344910900648e-05,
"loss": 0.0532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05916590616106987,
"step": 1555,
"valid_targets_mean": 2581.8,
"valid_targets_min": 1036
},
{
"epoch": 4.9523809523809526,
"grad_norm": 0.3281489035503953,
"learning_rate": 3.208062761326408e-05,
"loss": 0.0508,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05242155119776726,
"step": 1560,
"valid_targets_mean": 3023.0,
"valid_targets_min": 796
},
{
"epoch": 4.968253968253968,
"grad_norm": 0.33908599199518963,
"learning_rate": 3.178850959866393e-05,
"loss": 0.0525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050644807517528534,
"step": 1565,
"valid_targets_mean": 3031.7,
"valid_targets_min": 849
},
{
"epoch": 4.984126984126984,
"grad_norm": 0.3496241644407292,
"learning_rate": 3.14971065332145e-05,
"loss": 0.0518,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05183893069624901,
"step": 1570,
"valid_targets_mean": 2509.7,
"valid_targets_min": 318
},
{
"epoch": 5.0,
"grad_norm": 0.3195463471966324,
"learning_rate": 3.1206429856856706e-05,
"loss": 0.0507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.051501572132110596,
"step": 1575,
"valid_targets_mean": 3284.1,
"valid_targets_min": 677
},
{
"epoch": 5.015873015873016,
"grad_norm": 0.23550431411264172,
"learning_rate": 3.091649098101479e-05,
"loss": 0.024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02023545652627945,
"step": 1580,
"valid_targets_mean": 3572.4,
"valid_targets_min": 859
},
{
"epoch": 5.031746031746032,
"grad_norm": 0.33195372440897564,
"learning_rate": 3.06273012881483e-05,
"loss": 0.0252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0263539906591177,
"step": 1585,
"valid_targets_mean": 3201.2,
"valid_targets_min": 947
},
{
"epoch": 5.0476190476190474,
"grad_norm": 0.26127862712982075,
"learning_rate": 3.0338872131305284e-05,
"loss": 0.0254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02652253583073616,
"step": 1590,
"valid_targets_mean": 3707.3,
"valid_targets_min": 801
},
{
"epoch": 5.063492063492063,
"grad_norm": 0.2614335637158489,
"learning_rate": 3.0051214833676545e-05,
"loss": 0.0258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02493833750486374,
"step": 1595,
"valid_targets_mean": 3296.4,
"valid_targets_min": 726
},
{
"epoch": 5.079365079365079,
"grad_norm": 0.2674137096002863,
"learning_rate": 2.9764340688151137e-05,
"loss": 0.0256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024273354560136795,
"step": 1600,
"valid_targets_mean": 3360.6,
"valid_targets_min": 654
},
{
"epoch": 5.095238095238095,
"grad_norm": 0.2340596063464024,
"learning_rate": 2.9478260956873028e-05,
"loss": 0.0238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02273944392800331,
"step": 1605,
"valid_targets_mean": 4419.8,
"valid_targets_min": 810
},
{
"epoch": 5.111111111111111,
"grad_norm": 0.308811482193481,
"learning_rate": 2.919298687079895e-05,
"loss": 0.0266,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03129608556628227,
"step": 1610,
"valid_targets_mean": 2848.1,
"valid_targets_min": 742
},
{
"epoch": 5.1269841269841265,
"grad_norm": 0.2842567967216506,
"learning_rate": 2.8908529629257543e-05,
"loss": 0.0265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02591758593916893,
"step": 1615,
"valid_targets_mean": 3129.9,
"valid_targets_min": 590
},
{
"epoch": 5.142857142857143,
"grad_norm": 0.24104336758904743,
"learning_rate": 2.8624900399509603e-05,
"loss": 0.0243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025047089904546738,
"step": 1620,
"valid_targets_mean": 3620.5,
"valid_targets_min": 877
},
{
"epoch": 5.158730158730159,
"grad_norm": 0.2523939190707324,
"learning_rate": 2.8342110316309745e-05,
"loss": 0.0259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025033798068761826,
"step": 1625,
"valid_targets_mean": 3434.6,
"valid_targets_min": 1271
},
{
"epoch": 5.174603174603175,
"grad_norm": 0.2495035494552693,
"learning_rate": 2.8060170481469293e-05,
"loss": 0.0249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.021112697198987007,
"step": 1630,
"valid_targets_mean": 3306.6,
"valid_targets_min": 750
},
{
"epoch": 5.190476190476191,
"grad_norm": 0.2648511629409598,
"learning_rate": 2.777909196342035e-05,
"loss": 0.0277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.027787357568740845,
"step": 1635,
"valid_targets_mean": 3520.7,
"valid_targets_min": 1071
},
{
"epoch": 5.2063492063492065,
"grad_norm": 0.2487265676626827,
"learning_rate": 2.749888579678138e-05,
"loss": 0.0255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025035560131072998,
"step": 1640,
"valid_targets_mean": 3585.5,
"valid_targets_min": 702
},
{
"epoch": 5.222222222222222,
"grad_norm": 0.27878036038709497,
"learning_rate": 2.721956298192392e-05,
"loss": 0.027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026966020464897156,
"step": 1645,
"valid_targets_mean": 3309.6,
"valid_targets_min": 725
},
{
"epoch": 5.238095238095238,
"grad_norm": 0.2707139283998997,
"learning_rate": 2.6941134484540774e-05,
"loss": 0.0267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026516567915678024,
"step": 1650,
"valid_targets_mean": 3290.4,
"valid_targets_min": 695
},
{
"epoch": 5.253968253968254,
"grad_norm": 0.3091277534101446,
"learning_rate": 2.6663611235215486e-05,
"loss": 0.0265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.027157654985785484,
"step": 1655,
"valid_targets_mean": 2847.1,
"valid_targets_min": 1120
},
{
"epoch": 5.26984126984127,
"grad_norm": 0.2616349404434438,
"learning_rate": 2.6387004128993314e-05,
"loss": 0.0251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024449646472930908,
"step": 1660,
"valid_targets_mean": 3586.6,
"valid_targets_min": 1115
},
{
"epoch": 5.285714285714286,
"grad_norm": 0.27356996302919745,
"learning_rate": 2.6111324024953378e-05,
"loss": 0.0256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025334272533655167,
"step": 1665,
"valid_targets_mean": 3436.2,
"valid_targets_min": 963
},
{
"epoch": 5.301587301587301,
"grad_norm": 0.3214202340630028,
"learning_rate": 2.5836581745782475e-05,
"loss": 0.0256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0278994832187891,
"step": 1670,
"valid_targets_mean": 2833.7,
"valid_targets_min": 1031
},
{
"epoch": 5.317460317460317,
"grad_norm": 0.2695726847515144,
"learning_rate": 2.556278807735008e-05,
"loss": 0.0263,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02789219841361046,
"step": 1675,
"valid_targets_mean": 3449.8,
"valid_targets_min": 924
},
{
"epoch": 5.333333333333333,
"grad_norm": 0.2727973520928148,
"learning_rate": 2.5289953768285092e-05,
"loss": 0.0257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02461610920727253,
"step": 1680,
"valid_targets_mean": 3137.1,
"valid_targets_min": 759
},
{
"epoch": 5.349206349206349,
"grad_norm": 0.24666589277394446,
"learning_rate": 2.501808952955359e-05,
"loss": 0.0262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022942395880818367,
"step": 1685,
"valid_targets_mean": 3659.5,
"valid_targets_min": 778
},
{
"epoch": 5.365079365079365,
"grad_norm": 0.2336284978989606,
"learning_rate": 2.474720603403866e-05,
"loss": 0.0242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02202015370130539,
"step": 1690,
"valid_targets_mean": 4008.6,
"valid_targets_min": 1621
},
{
"epoch": 5.380952380952381,
"grad_norm": 0.2801903131745413,
"learning_rate": 2.447731391612112e-05,
"loss": 0.0271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02588559314608574,
"step": 1695,
"valid_targets_mean": 3414.8,
"valid_targets_min": 744
},
{
"epoch": 5.396825396825397,
"grad_norm": 0.2181467290392336,
"learning_rate": 2.4208423771262238e-05,
"loss": 0.0249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023064320906996727,
"step": 1700,
"valid_targets_mean": 4122.8,
"valid_targets_min": 998
},
{
"epoch": 5.412698412698413,
"grad_norm": 0.2795289718838943,
"learning_rate": 2.3940546155587618e-05,
"loss": 0.0272,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.030484072864055634,
"step": 1705,
"valid_targets_mean": 3116.4,
"valid_targets_min": 356
},
{
"epoch": 5.428571428571429,
"grad_norm": 0.2949329786637831,
"learning_rate": 2.367369158547292e-05,
"loss": 0.0265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.030314363539218903,
"step": 1710,
"valid_targets_mean": 3290.7,
"valid_targets_min": 1221
},
{
"epoch": 5.444444444444445,
"grad_norm": 0.2764043874434965,
"learning_rate": 2.3407870537130898e-05,
"loss": 0.0255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026211140677332878,
"step": 1715,
"valid_targets_mean": 3457.7,
"valid_targets_min": 1297
},
{
"epoch": 5.4603174603174605,
"grad_norm": 0.30290538424312186,
"learning_rate": 2.314309344620019e-05,
"loss": 0.0274,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.028061866760253906,
"step": 1720,
"valid_targets_mean": 2804.1,
"valid_targets_min": 742
},
{
"epoch": 5.476190476190476,
"grad_norm": 0.3107682447774674,
"learning_rate": 2.287937070733557e-05,
"loss": 0.0245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02621731534600258,
"step": 1725,
"valid_targets_mean": 2719.9,
"valid_targets_min": 373
},
{
"epoch": 5.492063492063492,
"grad_norm": 0.2544021308351876,
"learning_rate": 2.2616712673799994e-05,
"loss": 0.0254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025156507268548012,
"step": 1730,
"valid_targets_mean": 3616.8,
"valid_targets_min": 833
},
{
"epoch": 5.507936507936508,
"grad_norm": 0.26483540124283883,
"learning_rate": 2.2355129657058004e-05,
"loss": 0.0255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022783182561397552,
"step": 1735,
"valid_targets_mean": 3544.8,
"valid_targets_min": 576
},
{
"epoch": 5.523809523809524,
"grad_norm": 0.26901313523570725,
"learning_rate": 2.2094631926371045e-05,
"loss": 0.0248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024442121386528015,
"step": 1740,
"valid_targets_mean": 3287.4,
"valid_targets_min": 684
},
{
"epoch": 5.5396825396825395,
"grad_norm": 0.24704696828339448,
"learning_rate": 2.18352297083942e-05,
"loss": 0.0259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023202896118164062,
"step": 1745,
"valid_targets_mean": 3396.6,
"valid_targets_min": 782
},
{
"epoch": 5.555555555555555,
"grad_norm": 0.24636088299850367,
"learning_rate": 2.1576933186774777e-05,
"loss": 0.0258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02773324027657509,
"step": 1750,
"valid_targets_mean": 3575.5,
"valid_targets_min": 741
},
{
"epoch": 5.571428571428571,
"grad_norm": 0.25071236680671866,
"learning_rate": 2.131975250175256e-05,
"loss": 0.0265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023207779973745346,
"step": 1755,
"valid_targets_mean": 3630.2,
"valid_targets_min": 1033
},
{
"epoch": 5.587301587301587,
"grad_norm": 0.27275654624779627,
"learning_rate": 2.1063697749761603e-05,
"loss": 0.0256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02427113801240921,
"step": 1760,
"valid_targets_mean": 3178.8,
"valid_targets_min": 371
},
{
"epoch": 5.603174603174603,
"grad_norm": 0.2379523221904174,
"learning_rate": 2.080877898303394e-05,
"loss": 0.0252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022033121436834335,
"step": 1765,
"valid_targets_mean": 3439.0,
"valid_targets_min": 762
},
{
"epoch": 5.619047619047619,
"grad_norm": 0.2960282320406999,
"learning_rate": 2.0555006209204997e-05,
"loss": 0.0255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026485547423362732,
"step": 1770,
"valid_targets_mean": 2969.6,
"valid_targets_min": 655
},
{
"epoch": 5.634920634920634,
"grad_norm": 0.26944449671962667,
"learning_rate": 2.030238939092059e-05,
"loss": 0.0252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02524230070412159,
"step": 1775,
"valid_targets_mean": 3050.5,
"valid_targets_min": 1216
},
{
"epoch": 5.650793650793651,
"grad_norm": 0.2554660378162961,
"learning_rate": 2.0050938445445894e-05,
"loss": 0.0251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023806503042578697,
"step": 1780,
"valid_targets_mean": 3239.5,
"valid_targets_min": 338
},
{
"epoch": 5.666666666666667,
"grad_norm": 0.28458142143891874,
"learning_rate": 1.980066324427613e-05,
"loss": 0.0246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.027210228145122528,
"step": 1785,
"valid_targets_mean": 2952.8,
"valid_targets_min": 262
},
{
"epoch": 5.682539682539683,
"grad_norm": 0.26739426156418444,
"learning_rate": 1.9551573612748923e-05,
"loss": 0.0256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02571839839220047,
"step": 1790,
"valid_targets_mean": 3384.7,
"valid_targets_min": 1216
},
{
"epoch": 5.698412698412699,
"grad_norm": 0.2631155742829386,
"learning_rate": 1.9303679329658723e-05,
"loss": 0.0245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02573530748486519,
"step": 1795,
"valid_targets_mean": 3206.7,
"valid_targets_min": 731
},
{
"epoch": 5.714285714285714,
"grad_norm": 0.2824175420225818,
"learning_rate": 1.905699012687275e-05,
"loss": 0.0241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025772523134946823,
"step": 1800,
"valid_targets_mean": 3128.2,
"valid_targets_min": 656
},
{
"epoch": 5.73015873015873,
"grad_norm": 0.2671365865327459,
"learning_rate": 1.881151568894909e-05,
"loss": 0.0245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02606208436191082,
"step": 1805,
"valid_targets_mean": 3463.7,
"valid_targets_min": 1332
},
{
"epoch": 5.746031746031746,
"grad_norm": 0.2460946861875537,
"learning_rate": 1.8567265652756378e-05,
"loss": 0.0249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022165346890687943,
"step": 1810,
"valid_targets_mean": 3046.5,
"valid_targets_min": 866
},
{
"epoch": 5.761904761904762,
"grad_norm": 0.25239884686693714,
"learning_rate": 1.8324249607095534e-05,
"loss": 0.025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024087360128760338,
"step": 1815,
"valid_targets_mean": 3273.7,
"valid_targets_min": 1115
},
{
"epoch": 5.777777777777778,
"grad_norm": 0.23790823566806268,
"learning_rate": 1.8082477092323297e-05,
"loss": 0.024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02218548208475113,
"step": 1820,
"valid_targets_mean": 3519.1,
"valid_targets_min": 318
},
{
"epoch": 5.7936507936507935,
"grad_norm": 0.24495804994980389,
"learning_rate": 1.7841957599977755e-05,
"loss": 0.0242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022038817405700684,
"step": 1825,
"valid_targets_mean": 3439.0,
"valid_targets_min": 941
},
{
"epoch": 5.809523809523809,
"grad_norm": 0.27584063642219875,
"learning_rate": 1.760270057240559e-05,
"loss": 0.0241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.027009010314941406,
"step": 1830,
"valid_targets_mean": 2943.4,
"valid_targets_min": 319
},
{
"epoch": 5.825396825396825,
"grad_norm": 0.2827248862474914,
"learning_rate": 1.736471540239156e-05,
"loss": 0.0246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02594105526804924,
"step": 1835,
"valid_targets_mean": 3063.8,
"valid_targets_min": 992
},
{
"epoch": 5.841269841269841,
"grad_norm": 0.24374670667972242,
"learning_rate": 1.712801143278961e-05,
"loss": 0.0243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02299753949046135,
"step": 1840,
"valid_targets_mean": 3784.0,
"valid_targets_min": 586
},
{
"epoch": 5.857142857142857,
"grad_norm": 0.2655779019009014,
"learning_rate": 1.6892597956156148e-05,
"loss": 0.0244,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02573399990797043,
"step": 1845,
"valid_targets_mean": 3114.1,
"valid_targets_min": 703
},
{
"epoch": 5.8730158730158735,
"grad_norm": 0.2723810167625872,
"learning_rate": 1.6658484214385234e-05,
"loss": 0.024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0256446972489357,
"step": 1850,
"valid_targets_mean": 2782.9,
"valid_targets_min": 1190
},
{
"epoch": 5.888888888888889,
"grad_norm": 0.223290705015297,
"learning_rate": 1.6425679398345812e-05,
"loss": 0.0241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.020922958850860596,
"step": 1855,
"valid_targets_mean": 3931.8,
"valid_targets_min": 796
},
{
"epoch": 5.904761904761905,
"grad_norm": 0.27944419013119903,
"learning_rate": 1.619419264752076e-05,
"loss": 0.024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025158777832984924,
"step": 1860,
"valid_targets_mean": 2923.1,
"valid_targets_min": 466
},
{
"epoch": 5.920634920634921,
"grad_norm": 0.2681139770398924,
"learning_rate": 1.5964033049648262e-05,
"loss": 0.0249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02556907758116722,
"step": 1865,
"valid_targets_mean": 3237.6,
"valid_targets_min": 992
},
{
"epoch": 5.936507936507937,
"grad_norm": 0.26218394989898647,
"learning_rate": 1.5735209640364873e-05,
"loss": 0.0243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023507630452513695,
"step": 1870,
"valid_targets_mean": 3332.7,
"valid_targets_min": 838
},
{
"epoch": 5.9523809523809526,
"grad_norm": 0.24469964139348233,
"learning_rate": 1.5507731402850956e-05,
"loss": 0.0235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.021347498521208763,
"step": 1875,
"valid_targets_mean": 3352.9,
"valid_targets_min": 820
},
{
"epoch": 5.968253968253968,
"grad_norm": 0.2440356190541872,
"learning_rate": 1.528160726747783e-05,
"loss": 0.0238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02353665605187416,
"step": 1880,
"valid_targets_mean": 3424.4,
"valid_targets_min": 895
},
{
"epoch": 5.984126984126984,
"grad_norm": 0.27079401801374,
"learning_rate": 1.5056846111457407e-05,
"loss": 0.0231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023557720705866814,
"step": 1885,
"valid_targets_mean": 3352.8,
"valid_targets_min": 825
},
{
"epoch": 6.0,
"grad_norm": 0.24564494829508438,
"learning_rate": 1.483345675849348e-05,
"loss": 0.0241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02200917899608612,
"step": 1890,
"valid_targets_mean": 3229.2,
"valid_targets_min": 752
},
{
"epoch": 6.015873015873016,
"grad_norm": 0.16169482529118429,
"learning_rate": 1.4611447978435478e-05,
"loss": 0.0116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.00997502077370882,
"step": 1895,
"valid_targets_mean": 3454.8,
"valid_targets_min": 601
},
{
"epoch": 6.031746031746032,
"grad_norm": 0.24946653742799907,
"learning_rate": 1.439082848693406e-05,
"loss": 0.0107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.012786821462213993,
"step": 1900,
"valid_targets_mean": 2587.1,
"valid_targets_min": 645
},
{
"epoch": 6.0476190476190474,
"grad_norm": 0.2180413946533584,
"learning_rate": 1.4171606945099076e-05,
"loss": 0.0104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009333532303571701,
"step": 1905,
"valid_targets_mean": 3655.9,
"valid_targets_min": 364
},
{
"epoch": 6.063492063492063,
"grad_norm": 0.19567558644860955,
"learning_rate": 1.3953791959159368e-05,
"loss": 0.0107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009645476937294006,
"step": 1910,
"valid_targets_mean": 2975.1,
"valid_targets_min": 723
},
{
"epoch": 6.079365079365079,
"grad_norm": 0.16598482793198757,
"learning_rate": 1.3737392080125134e-05,
"loss": 0.011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010899921879172325,
"step": 1915,
"valid_targets_mean": 4373.3,
"valid_targets_min": 776
},
{
"epoch": 6.095238095238095,
"grad_norm": 0.18594848404069328,
"learning_rate": 1.3522415803452027e-05,
"loss": 0.0112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010842259973287582,
"step": 1920,
"valid_targets_mean": 3016.0,
"valid_targets_min": 846
},
{
"epoch": 6.111111111111111,
"grad_norm": 0.15994392653373266,
"learning_rate": 1.3308871568707798e-05,
"loss": 0.0108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009601420722901821,
"step": 1925,
"valid_targets_mean": 3397.0,
"valid_targets_min": 845
},
{
"epoch": 6.1269841269841265,
"grad_norm": 0.18397230525918348,
"learning_rate": 1.3096767759240836e-05,
"loss": 0.0117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.012009745463728905,
"step": 1930,
"valid_targets_mean": 3255.9,
"valid_targets_min": 1029
},
{
"epoch": 6.142857142857143,
"grad_norm": 0.15937200483272312,
"learning_rate": 1.2886112701851178e-05,
"loss": 0.0107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.008708149194717407,
"step": 1935,
"valid_targets_mean": 3998.8,
"valid_targets_min": 1493
},
{
"epoch": 6.158730158730159,
"grad_norm": 0.22332299562406527,
"learning_rate": 1.2676914666463508e-05,
"loss": 0.011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.013267268426716328,
"step": 1940,
"valid_targets_mean": 3523.3,
"valid_targets_min": 778
},
{
"epoch": 6.174603174603175,
"grad_norm": 0.2114465027443654,
"learning_rate": 1.2469181865802576e-05,
"loss": 0.0118,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01177770271897316,
"step": 1945,
"valid_targets_mean": 2789.5,
"valid_targets_min": 775
},
{
"epoch": 6.190476190476191,
"grad_norm": 0.2002001975909518,
"learning_rate": 1.2262922455070719e-05,
"loss": 0.0105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011541876941919327,
"step": 1950,
"valid_targets_mean": 3120.6,
"valid_targets_min": 356
},
{
"epoch": 6.2063492063492065,
"grad_norm": 0.19841543508080267,
"learning_rate": 1.2058144531627774e-05,
"loss": 0.0112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010849805548787117,
"step": 1955,
"valid_targets_mean": 3501.3,
"valid_targets_min": 381
},
{
"epoch": 6.222222222222222,
"grad_norm": 0.22079617709202617,
"learning_rate": 1.1854856134673097e-05,
"loss": 0.0106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011766679584980011,
"step": 1960,
"valid_targets_mean": 2914.3,
"valid_targets_min": 706
},
{
"epoch": 6.238095238095238,
"grad_norm": 0.20366254465466968,
"learning_rate": 1.1653065244930083e-05,
"loss": 0.0111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011541249230504036,
"step": 1965,
"valid_targets_mean": 3002.7,
"valid_targets_min": 318
},
{
"epoch": 6.253968253968254,
"grad_norm": 0.21870013628872753,
"learning_rate": 1.1452779784332718e-05,
"loss": 0.0113,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011631770990788937,
"step": 1970,
"valid_targets_mean": 2956.9,
"valid_targets_min": 955
},
{
"epoch": 6.26984126984127,
"grad_norm": 0.19876493233509734,
"learning_rate": 1.1254007615714685e-05,
"loss": 0.0108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010932897217571735,
"step": 1975,
"valid_targets_mean": 2878.1,
"valid_targets_min": 990
},
{
"epoch": 6.285714285714286,
"grad_norm": 0.22127790602397174,
"learning_rate": 1.1056756542500613e-05,
"loss": 0.0124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0136713907122612,
"step": 1980,
"valid_targets_mean": 3617.0,
"valid_targets_min": 723
},
{
"epoch": 6.301587301587301,
"grad_norm": 0.2107125195160337,
"learning_rate": 1.086103430839982e-05,
"loss": 0.0111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.012097819708287716,
"step": 1985,
"valid_targets_mean": 3181.6,
"valid_targets_min": 270
},
{
"epoch": 6.317460317460317,
"grad_norm": 0.1816728141260571,
"learning_rate": 1.066684859710218e-05,
"loss": 0.0101,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009708253666758537,
"step": 1990,
"valid_targets_mean": 3332.8,
"valid_targets_min": 641
},
{
"epoch": 6.333333333333333,
"grad_norm": 0.16887535290523142,
"learning_rate": 1.0474207031976618e-05,
"loss": 0.01,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010005680844187737,
"step": 1995,
"valid_targets_mean": 3711.7,
"valid_targets_min": 728
},
{
"epoch": 6.349206349206349,
"grad_norm": 0.21411607846162323,
"learning_rate": 1.0283117175771701e-05,
"loss": 0.012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011855946853756905,
"step": 2000,
"valid_targets_mean": 2993.4,
"valid_targets_min": 900
},
{
"epoch": 6.365079365079365,
"grad_norm": 0.17645356487975158,
"learning_rate": 1.0093586530318849e-05,
"loss": 0.0108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009679011069238186,
"step": 2005,
"valid_targets_mean": 3481.0,
"valid_targets_min": 634
},
{
"epoch": 6.380952380952381,
"grad_norm": 0.17743382869891283,
"learning_rate": 9.905622536237708e-06,
"loss": 0.0115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010511040687561035,
"step": 2010,
"valid_targets_mean": 3653.9,
"valid_targets_min": 463
},
{
"epoch": 6.396825396825397,
"grad_norm": 0.1748858090662512,
"learning_rate": 9.719232572644187e-06,
"loss": 0.0109,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010769926011562347,
"step": 2015,
"valid_targets_mean": 3555.8,
"valid_targets_min": 473
},
{
"epoch": 6.412698412698413,
"grad_norm": 0.21446394993733692,
"learning_rate": 9.534423956860638e-06,
"loss": 0.0116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.012041926383972168,
"step": 2020,
"valid_targets_mean": 3285.3,
"valid_targets_min": 819
},
{
"epoch": 6.428571428571429,
"grad_norm": 0.20144880581326097,
"learning_rate": 9.351203944128694e-06,
"loss": 0.0112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011740963906049728,
"step": 2025,
"valid_targets_mean": 3020.3,
"valid_targets_min": 906
},
{
"epoch": 6.444444444444445,
"grad_norm": 0.19143766850758767,
"learning_rate": 9.16957972732434e-06,
"loss": 0.0109,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010045674629509449,
"step": 2030,
"valid_targets_mean": 3397.0,
"valid_targets_min": 680
},
{
"epoch": 6.4603174603174605,
"grad_norm": 0.2086923194209106,
"learning_rate": 8.989558436675643e-06,
"loss": 0.011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011913447640836239,
"step": 2035,
"valid_targets_mean": 3015.7,
"valid_targets_min": 785
},
{
"epoch": 6.476190476190476,
"grad_norm": 0.22807360258432707,
"learning_rate": 8.811147139482745e-06,
"loss": 0.0105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011762842535972595,
"step": 2040,
"valid_targets_mean": 3092.6,
"valid_targets_min": 590
},
{
"epoch": 6.492063492063492,
"grad_norm": 0.23354176928347664,
"learning_rate": 8.634352839840459e-06,
"loss": 0.0115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.013229792937636375,
"step": 2045,
"valid_targets_mean": 3058.2,
"valid_targets_min": 678
},
{
"epoch": 6.507936507936508,
"grad_norm": 0.1844983037827689,
"learning_rate": 8.45918247836327e-06,
"loss": 0.0099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010129349306225777,
"step": 2050,
"valid_targets_mean": 3581.6,
"valid_targets_min": 1165
},
{
"epoch": 6.523809523809524,
"grad_norm": 0.19399825454673583,
"learning_rate": 8.285642931912918e-06,
"loss": 0.0108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009755829349160194,
"step": 2055,
"valid_targets_mean": 2613.1,
"valid_targets_min": 834
},
{
"epoch": 6.5396825396825395,
"grad_norm": 0.1389749321198228,
"learning_rate": 8.113741013328352e-06,
"loss": 0.0103,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.007903524674475193,
"step": 2060,
"valid_targets_mean": 4402.2,
"valid_targets_min": 847
},
{
"epoch": 6.555555555555555,
"grad_norm": 0.18758006176253833,
"learning_rate": 7.943483471158326e-06,
"loss": 0.0104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010629910975694656,
"step": 2065,
"valid_targets_mean": 4012.3,
"valid_targets_min": 834
},
{
"epoch": 6.571428571428571,
"grad_norm": 0.19682939141810737,
"learning_rate": 7.774876989396434e-06,
"loss": 0.0104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011159196496009827,
"step": 2070,
"valid_targets_mean": 2739.4,
"valid_targets_min": 725
},
{
"epoch": 6.587301587301587,
"grad_norm": 0.22211885260493783,
"learning_rate": 7.607928187218699e-06,
"loss": 0.0112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011714112013578415,
"step": 2075,
"valid_targets_mean": 2769.2,
"valid_targets_min": 645
},
{
"epoch": 6.603174603174603,
"grad_norm": 0.18954660156576889,
"learning_rate": 7.442643618723777e-06,
"loss": 0.0102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010226866230368614,
"step": 2080,
"valid_targets_mean": 3419.1,
"valid_targets_min": 659
},
{
"epoch": 6.619047619047619,
"grad_norm": 0.18370859857232805,
"learning_rate": 7.2790297726755716e-06,
"loss": 0.0104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010806581936776638,
"step": 2085,
"valid_targets_mean": 3737.0,
"valid_targets_min": 1297
},
{
"epoch": 6.634920634920634,
"grad_norm": 0.18918121566352425,
"learning_rate": 7.117093072248571e-06,
"loss": 0.0116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010280042886734009,
"step": 2090,
"valid_targets_mean": 3381.7,
"valid_targets_min": 825
},
{
"epoch": 6.650793650793651,
"grad_norm": 0.16521970564313165,
"learning_rate": 6.9568398747756396e-06,
"loss": 0.0104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009875962510704994,
"step": 2095,
"valid_targets_mean": 3724.8,
"valid_targets_min": 695
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.17760484054865824,
"learning_rate": 6.798276471498444e-06,
"loss": 0.0106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.008955635130405426,
"step": 2100,
"valid_targets_mean": 3321.4,
"valid_targets_min": 676
},
{
"epoch": 6.682539682539683,
"grad_norm": 0.19520090654719327,
"learning_rate": 6.6414090873204886e-06,
"loss": 0.0096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01039792224764824,
"step": 2105,
"valid_targets_mean": 3116.7,
"valid_targets_min": 486
},
{
"epoch": 6.698412698412699,
"grad_norm": 0.18028684323612768,
"learning_rate": 6.486243880562759e-06,
"loss": 0.01,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009589990600943565,
"step": 2110,
"valid_targets_mean": 3307.2,
"valid_targets_min": 1014
},
{
"epoch": 6.714285714285714,
"grad_norm": 0.1818626090504096,
"learning_rate": 6.3327869427218855e-06,
"loss": 0.0096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.008870855905115604,
"step": 2115,
"valid_targets_mean": 3770.0,
"valid_targets_min": 671
},
{
"epoch": 6.73015873015873,
"grad_norm": 0.20259355193852477,
"learning_rate": 6.181044298231081e-06,
"loss": 0.011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011193148791790009,
"step": 2120,
"valid_targets_mean": 3412.4,
"valid_targets_min": 1066
},
{
"epoch": 6.746031746031746,
"grad_norm": 0.1826233680836296,
"learning_rate": 6.031021904223572e-06,
"loss": 0.01,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009859883226454258,
"step": 2125,
"valid_targets_mean": 3204.7,
"valid_targets_min": 909
},
{
"epoch": 6.761904761904762,
"grad_norm": 0.19159375445872537,
"learning_rate": 5.882725650298787e-06,
"loss": 0.0101,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010744025930762291,
"step": 2130,
"valid_targets_mean": 3296.4,
"valid_targets_min": 782
},
{
"epoch": 6.777777777777778,
"grad_norm": 0.19885905304525853,
"learning_rate": 5.736161358291092e-06,
"loss": 0.0101,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010427623987197876,
"step": 2135,
"valid_targets_mean": 3111.9,
"valid_targets_min": 677
},
{
"epoch": 6.7936507936507935,
"grad_norm": 0.17789784161152683,
"learning_rate": 5.5913347820412635e-06,
"loss": 0.0117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010946325957775116,
"step": 2140,
"valid_targets_mean": 3855.3,
"valid_targets_min": 1147
},
{
"epoch": 6.809523809523809,
"grad_norm": 0.15707297813715498,
"learning_rate": 5.44825160717059e-06,
"loss": 0.0094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.00876244530081749,
"step": 2145,
"valid_targets_mean": 3554.3,
"valid_targets_min": 1052
},
{
"epoch": 6.825396825396825,
"grad_norm": 0.18254202956390367,
"learning_rate": 5.306917450857702e-06,
"loss": 0.0106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010324838571250439,
"step": 2150,
"valid_targets_mean": 3432.2,
"valid_targets_min": 1151
},
{
"epoch": 6.841269841269841,
"grad_norm": 0.1930099772388768,
"learning_rate": 5.167337861617982e-06,
"loss": 0.0106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010216512717306614,
"step": 2155,
"valid_targets_mean": 3203.4,
"valid_targets_min": 627
},
{
"epoch": 6.857142857142857,
"grad_norm": 0.19005087618453437,
"learning_rate": 5.029518319085824e-06,
"loss": 0.0107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009590497240424156,
"step": 2160,
"valid_targets_mean": 2832.6,
"valid_targets_min": 567
},
{
"epoch": 6.8730158730158735,
"grad_norm": 0.1909793872450157,
"learning_rate": 4.893464233799433e-06,
"loss": 0.0105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009531540796160698,
"step": 2165,
"valid_targets_mean": 3204.7,
"valid_targets_min": 1214
},
{
"epoch": 6.888888888888889,
"grad_norm": 0.1793904616196824,
"learning_rate": 4.759180946988495e-06,
"loss": 0.0095,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009257054887712002,
"step": 2170,
"valid_targets_mean": 3621.2,
"valid_targets_min": 648
},
{
"epoch": 6.904761904761905,
"grad_norm": 0.1667931999548248,
"learning_rate": 4.626673730364395e-06,
"loss": 0.0104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01003570482134819,
"step": 2175,
"valid_targets_mean": 3463.8,
"valid_targets_min": 285
},
{
"epoch": 6.920634920634921,
"grad_norm": 0.20660624187240578,
"learning_rate": 4.495947785913368e-06,
"loss": 0.0104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011296918615698814,
"step": 2180,
"valid_targets_mean": 3009.5,
"valid_targets_min": 1065
},
{
"epoch": 6.936507936507937,
"grad_norm": 0.20045998659363987,
"learning_rate": 4.367008245692189e-06,
"loss": 0.0107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010871585458517075,
"step": 2185,
"valid_targets_mean": 2747.8,
"valid_targets_min": 944
},
{
"epoch": 6.9523809523809526,
"grad_norm": 0.18038151471049843,
"learning_rate": 4.239860171626769e-06,
"loss": 0.0096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.010644408874213696,
"step": 2190,
"valid_targets_mean": 3555.1,
"valid_targets_min": 1412
},
{
"epoch": 6.968253968253968,
"grad_norm": 0.2067773581962787,
"learning_rate": 4.114508555313351e-06,
"loss": 0.0105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011355595663189888,
"step": 2195,
"valid_targets_mean": 2859.0,
"valid_targets_min": 1016
},
{
"epoch": 6.984126984126984,
"grad_norm": 0.17103297550131963,
"learning_rate": 3.990958317822663e-06,
"loss": 0.0099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.008938717655837536,
"step": 2200,
"valid_targets_mean": 3593.5,
"valid_targets_min": 755
},
{
"epoch": 7.0,
"grad_norm": 0.1851720566559619,
"learning_rate": 3.86921430950658e-06,
"loss": 0.0097,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.009520391002297401,
"step": 2205,
"valid_targets_mean": 3075.6,
"valid_targets_min": 319
},
{
"epoch": 7.015873015873016,
"grad_norm": 0.10035776473927605,
"learning_rate": 3.7492813098078506e-06,
"loss": 0.0055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005469363648444414,
"step": 2210,
"valid_targets_mean": 3383.3,
"valid_targets_min": 904
},
{
"epoch": 7.031746031746032,
"grad_norm": 0.11216010325309983,
"learning_rate": 3.6311640270723757e-06,
"loss": 0.0049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0050615957006812096,
"step": 2215,
"valid_targets_mean": 3358.8,
"valid_targets_min": 906
},
{
"epoch": 7.0476190476190474,
"grad_norm": 0.10929451815598369,
"learning_rate": 3.5148670983644104e-06,
"loss": 0.0045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004745697136968374,
"step": 2220,
"valid_targets_mean": 3131.4,
"valid_targets_min": 641
},
{
"epoch": 7.063492063492063,
"grad_norm": 0.12271903497817556,
"learning_rate": 3.400395089284475e-06,
"loss": 0.0051,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005019777454435825,
"step": 2225,
"valid_targets_mean": 3120.6,
"valid_targets_min": 1083
},
{
"epoch": 7.079365079365079,
"grad_norm": 0.12730389484499585,
"learning_rate": 3.287752493790186e-06,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005130327306687832,
"step": 2230,
"valid_targets_mean": 3167.2,
"valid_targets_min": 1097
},
{
"epoch": 7.095238095238095,
"grad_norm": 0.11182416285479005,
"learning_rate": 3.1769437340197715e-06,
"loss": 0.005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005524179898202419,
"step": 2235,
"valid_targets_mean": 3423.6,
"valid_targets_min": 724
},
{
"epoch": 7.111111111111111,
"grad_norm": 0.11695963016448252,
"learning_rate": 3.067973160118498e-06,
"loss": 0.0046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004500727169215679,
"step": 2240,
"valid_targets_mean": 3632.2,
"valid_targets_min": 854
},
{
"epoch": 7.1269841269841265,
"grad_norm": 0.11470576194363803,
"learning_rate": 2.9608450500678565e-06,
"loss": 0.005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004933122545480728,
"step": 2245,
"valid_targets_mean": 3541.1,
"valid_targets_min": 955
},
{
"epoch": 7.142857142857143,
"grad_norm": 0.12111224092063874,
"learning_rate": 2.8555636095176975e-06,
"loss": 0.0046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004573405720293522,
"step": 2250,
"valid_targets_mean": 3104.1,
"valid_targets_min": 656
},
{
"epoch": 7.158730158730159,
"grad_norm": 0.12261137209357191,
"learning_rate": 2.7521329716210074e-06,
"loss": 0.0048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005299875512719154,
"step": 2255,
"valid_targets_mean": 4327.8,
"valid_targets_min": 993
},
{
"epoch": 7.174603174603175,
"grad_norm": 0.12983398943430338,
"learning_rate": 2.6505571968717725e-06,
"loss": 0.0051,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005398509092628956,
"step": 2260,
"valid_targets_mean": 3107.6,
"valid_targets_min": 786
},
{
"epoch": 7.190476190476191,
"grad_norm": 0.10675934532792695,
"learning_rate": 2.550840272945465e-06,
"loss": 0.0046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004216499626636505,
"step": 2265,
"valid_targets_mean": 3529.8,
"valid_targets_min": 911
},
{
"epoch": 7.2063492063492065,
"grad_norm": 0.1299523571452913,
"learning_rate": 2.4529861145425605e-06,
"loss": 0.0053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004921115003526211,
"step": 2270,
"valid_targets_mean": 2665.1,
"valid_targets_min": 778
},
{
"epoch": 7.222222222222222,
"grad_norm": 0.09718138220360564,
"learning_rate": 2.3569985632348247e-06,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004324613604694605,
"step": 2275,
"valid_targets_mean": 3938.3,
"valid_targets_min": 770
},
{
"epoch": 7.238095238095238,
"grad_norm": 0.15050121932011226,
"learning_rate": 2.2628813873145303e-06,
"loss": 0.0053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.006321427412331104,
"step": 2280,
"valid_targets_mean": 2946.2,
"valid_targets_min": 293
},
{
"epoch": 7.253968253968254,
"grad_norm": 0.10028028833360875,
"learning_rate": 2.17063828164647e-06,
"loss": 0.0048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0037990009877830744,
"step": 2285,
"valid_targets_mean": 4333.9,
"valid_targets_min": 1228
},
{
"epoch": 7.26984126984127,
"grad_norm": 0.12203494313480417,
"learning_rate": 2.0802728675229587e-06,
"loss": 0.0046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004578214138746262,
"step": 2290,
"valid_targets_mean": 3254.3,
"valid_targets_min": 741
},
{
"epoch": 7.285714285714286,
"grad_norm": 0.12085258195724963,
"learning_rate": 1.9917886925216234e-06,
"loss": 0.0045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0045486027374863625,
"step": 2295,
"valid_targets_mean": 3183.5,
"valid_targets_min": 702
},
{
"epoch": 7.301587301587301,
"grad_norm": 0.10039022352128651,
"learning_rate": 1.9051892303661834e-06,
"loss": 0.0053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004291100427508354,
"step": 2300,
"valid_targets_mean": 3600.1,
"valid_targets_min": 1324
},
{
"epoch": 7.317460317460317,
"grad_norm": 0.12218249251554697,
"learning_rate": 1.8204778807900003e-06,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005083533935248852,
"step": 2305,
"valid_targets_mean": 3657.6,
"valid_targets_min": 1242
},
{
"epoch": 7.333333333333333,
"grad_norm": 0.10958599619723258,
"learning_rate": 1.7376579694026896e-06,
"loss": 0.0049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004395646043121815,
"step": 2310,
"valid_targets_mean": 2992.5,
"valid_targets_min": 676
},
{
"epoch": 7.349206349206349,
"grad_norm": 0.11998147910250385,
"learning_rate": 1.6567327475595195e-06,
"loss": 0.0046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.00454556941986084,
"step": 2315,
"valid_targets_mean": 3329.6,
"valid_targets_min": 318
},
{
"epoch": 7.365079365079365,
"grad_norm": 0.13220362888562553,
"learning_rate": 1.577705392233797e-06,
"loss": 0.005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005032036453485489,
"step": 2320,
"valid_targets_mean": 3215.4,
"valid_targets_min": 1121
},
{
"epoch": 7.380952380952381,
"grad_norm": 0.10580151354074074,
"learning_rate": 1.5005790058920943e-06,
"loss": 0.005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.00551568390801549,
"step": 2325,
"valid_targets_mean": 4641.6,
"valid_targets_min": 879
},
{
"epoch": 7.396825396825397,
"grad_norm": 0.11928758266665618,
"learning_rate": 1.4253566163725252e-06,
"loss": 0.0048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005063705146312714,
"step": 2330,
"valid_targets_mean": 3024.2,
"valid_targets_min": 654
},
{
"epoch": 7.412698412698413,
"grad_norm": 0.12921524618139682,
"learning_rate": 1.3520411767658059e-06,
"loss": 0.0049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005293884314596653,
"step": 2335,
"valid_targets_mean": 3078.6,
"valid_targets_min": 886
},
{
"epoch": 7.428571428571429,
"grad_norm": 0.11765595069380638,
"learning_rate": 1.2806355652993762e-06,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0043135518208146095,
"step": 2340,
"valid_targets_mean": 3240.3,
"valid_targets_min": 905
},
{
"epoch": 7.444444444444445,
"grad_norm": 0.14268732952024096,
"learning_rate": 1.2111425852243785e-06,
"loss": 0.0051,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005359090864658356,
"step": 2345,
"valid_targets_mean": 2711.0,
"valid_targets_min": 900
},
{
"epoch": 7.4603174603174605,
"grad_norm": 0.11255760360881431,
"learning_rate": 1.143564964705618e-06,
"loss": 0.0049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005487900227308273,
"step": 2350,
"valid_targets_mean": 3677.5,
"valid_targets_min": 446
},
{
"epoch": 7.476190476190476,
"grad_norm": 0.12881208811316566,
"learning_rate": 1.0779053567144427e-06,
"loss": 0.005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.00521219614893198,
"step": 2355,
"valid_targets_mean": 3273.8,
"valid_targets_min": 1278
},
{
"epoch": 7.492063492063492,
"grad_norm": 0.10843979278626246,
"learning_rate": 1.014166338924627e-06,
"loss": 0.0046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004336035810410976,
"step": 2360,
"valid_targets_mean": 3976.3,
"valid_targets_min": 1259
},
{
"epoch": 7.507936507936508,
"grad_norm": 0.11503295790694797,
"learning_rate": 9.523504136111306e-07,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004730178043246269,
"step": 2365,
"valid_targets_mean": 3674.6,
"valid_targets_min": 791
},
{
"epoch": 7.523809523809524,
"grad_norm": 0.11155461084205073,
"learning_rate": 8.92460007551904e-07,
"loss": 0.0043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004000429529696703,
"step": 2370,
"valid_targets_mean": 3718.4,
"valid_targets_min": 1046
},
{
"epoch": 7.5396825396825395,
"grad_norm": 0.11157625198691828,
"learning_rate": 8.344974719326104e-07,
"loss": 0.0049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004754913039505482,
"step": 2375,
"valid_targets_mean": 3638.6,
"valid_targets_min": 1131
},
{
"epoch": 7.555555555555555,
"grad_norm": 0.12422499624429512,
"learning_rate": 7.784650822542871e-07,
"loss": 0.0048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0053131962195038795,
"step": 2380,
"valid_targets_mean": 3119.1,
"valid_targets_min": 835
},
{
"epoch": 7.571428571428571,
"grad_norm": 0.11431681627624184,
"learning_rate": 7.243650382440736e-07,
"loss": 0.0054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005361621268093586,
"step": 2385,
"valid_targets_mean": 3909.5,
"valid_targets_min": 741
},
{
"epoch": 7.587301587301587,
"grad_norm": 0.11004296496426871,
"learning_rate": 6.721994637687967e-07,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004509164020419121,
"step": 2390,
"valid_targets_mean": 3389.0,
"valid_targets_min": 984
},
{
"epoch": 7.603174603174603,
"grad_norm": 0.11863839089336183,
"learning_rate": 6.219704067516374e-07,
"loss": 0.0048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004457586910575628,
"step": 2395,
"valid_targets_mean": 3174.5,
"valid_targets_min": 655
},
{
"epoch": 7.619047619047619,
"grad_norm": 0.10878395533070197,
"learning_rate": 5.736798390916898e-07,
"loss": 0.0045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005195206962525845,
"step": 2400,
"valid_targets_mean": 2997.7,
"valid_targets_min": 331
},
{
"epoch": 7.634920634920634,
"grad_norm": 0.10609110020177216,
"learning_rate": 5.273296565865948e-07,
"loss": 0.0051,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004106806591153145,
"step": 2405,
"valid_targets_mean": 3824.8,
"valid_targets_min": 586
},
{
"epoch": 7.650793650793651,
"grad_norm": 0.1089309599479779,
"learning_rate": 4.829216788580726e-07,
"loss": 0.005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004249934572726488,
"step": 2410,
"valid_targets_mean": 3445.7,
"valid_targets_min": 723
},
{
"epoch": 7.666666666666667,
"grad_norm": 0.10831204625157581,
"learning_rate": 4.404576492805179e-07,
"loss": 0.0049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005328735336661339,
"step": 2415,
"valid_targets_mean": 3627.3,
"valid_targets_min": 1129
},
{
"epoch": 7.682539682539683,
"grad_norm": 0.09343002059771495,
"learning_rate": 3.999392349125386e-07,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004198350943624973,
"step": 2420,
"valid_targets_mean": 4037.8,
"valid_targets_min": 802
},
{
"epoch": 7.698412698412699,
"grad_norm": 0.11986503661890716,
"learning_rate": 3.613680264315189e-07,
"loss": 0.0045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0049739317037165165,
"step": 2425,
"valid_targets_mean": 2951.8,
"valid_targets_min": 671
},
{
"epoch": 7.714285714285714,
"grad_norm": 0.12864393943302033,
"learning_rate": 3.247455380711806e-07,
"loss": 0.0046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004533006809651852,
"step": 2430,
"valid_targets_mean": 3775.6,
"valid_targets_min": 965
},
{
"epoch": 7.73015873015873,
"grad_norm": 0.10330729032751995,
"learning_rate": 2.900732075621082e-07,
"loss": 0.0048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004784930497407913,
"step": 2435,
"valid_targets_mean": 3709.4,
"valid_targets_min": 1062
},
{
"epoch": 7.746031746031746,
"grad_norm": 0.10498794605153543,
"learning_rate": 2.5735239607534434e-07,
"loss": 0.0045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0041520013473927975,
"step": 2440,
"valid_targets_mean": 3549.4,
"valid_targets_min": 728
},
{
"epoch": 7.761904761904762,
"grad_norm": 0.14249588185715686,
"learning_rate": 2.2658438816892112e-07,
"loss": 0.0052,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.006070821080356836,
"step": 2445,
"valid_targets_mean": 2594.3,
"valid_targets_min": 964
},
{
"epoch": 7.777777777777778,
"grad_norm": 0.11000276713367281,
"learning_rate": 1.9777039173746182e-07,
"loss": 0.0044,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004534910432994366,
"step": 2450,
"valid_targets_mean": 3489.1,
"valid_targets_min": 993
},
{
"epoch": 7.7936507936507935,
"grad_norm": 0.11795354828909771,
"learning_rate": 1.709115379647186e-07,
"loss": 0.0044,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004670111462473869,
"step": 2455,
"valid_targets_mean": 2994.7,
"valid_targets_min": 1170
},
{
"epoch": 7.809523809523809,
"grad_norm": 0.11938780754001632,
"learning_rate": 1.460088812792082e-07,
"loss": 0.0052,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0043688188306987286,
"step": 2460,
"valid_targets_mean": 3415.9,
"valid_targets_min": 1507
},
{
"epoch": 7.825396825396825,
"grad_norm": 0.09803097399786176,
"learning_rate": 1.2306339931279499e-07,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004104914143681526,
"step": 2465,
"valid_targets_mean": 3690.8,
"valid_targets_min": 789
},
{
"epoch": 7.841269841269841,
"grad_norm": 0.1190925478495514,
"learning_rate": 1.0207599286229941e-07,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004594314843416214,
"step": 2470,
"valid_targets_mean": 3170.5,
"valid_targets_min": 974
},
{
"epoch": 7.857142857142857,
"grad_norm": 0.11013026165583094,
"learning_rate": 8.304748585417078e-08,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004138472955673933,
"step": 2475,
"valid_targets_mean": 3476.1,
"valid_targets_min": 897
},
{
"epoch": 7.8730158730158735,
"grad_norm": 0.13041391832556282,
"learning_rate": 6.597862531210197e-08,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004992269910871983,
"step": 2480,
"valid_targets_mean": 3190.1,
"valid_targets_min": 810
},
{
"epoch": 7.888888888888889,
"grad_norm": 0.17400697758449113,
"learning_rate": 5.0870081327725194e-08,
"loss": 0.0046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005131562706083059,
"step": 2485,
"valid_targets_mean": 2452.6,
"valid_targets_min": 705
},
{
"epoch": 7.904761904761905,
"grad_norm": 0.1270973871093473,
"learning_rate": 3.7722447034305164e-08,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004991158843040466,
"step": 2490,
"valid_targets_mean": 2931.3,
"valid_targets_min": 735
},
{
"epoch": 7.920634920634921,
"grad_norm": 0.15159667114358516,
"learning_rate": 2.653623858344667e-08,
"loss": 0.0053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.00546233681961894,
"step": 2495,
"valid_targets_mean": 2654.0,
"valid_targets_min": 622
},
{
"epoch": 7.936507936507937,
"grad_norm": 0.11113963222959865,
"learning_rate": 1.731189512482745e-08,
"loss": 0.0049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004757697228342295,
"step": 2500,
"valid_targets_mean": 3359.4,
"valid_targets_min": 373
},
{
"epoch": 7.9523809523809526,
"grad_norm": 0.11728609394092665,
"learning_rate": 1.0049778788967513e-08,
"loss": 0.0047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005282067693769932,
"step": 2505,
"valid_targets_mean": 3915.2,
"valid_targets_min": 1487
},
{
"epoch": 7.968253968253968,
"grad_norm": 0.11591915545705475,
"learning_rate": 4.750174673018304e-09,
"loss": 0.0054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0053293961100280285,
"step": 2510,
"valid_targets_mean": 3678.9,
"valid_targets_min": 1378
},
{
"epoch": 7.984126984126984,
"grad_norm": 0.1296960881375092,
"learning_rate": 1.4132908295549919e-09,
"loss": 0.0049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.00557930301874876,
"step": 2515,
"valid_targets_mean": 3071.5,
"valid_targets_min": 798
},
{
"epoch": 8.0,
"grad_norm": 0.10517511047841067,
"learning_rate": 3.925825840522812e-11,
"loss": 0.0046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004353777505457401,
"step": 2520,
"valid_targets_mean": 3003.7,
"valid_targets_min": 671
},
{
"epoch": 8.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.004353777505457401,
"step": 2520,
"total_flos": 1257015450664960.0,
"train_loss": 0.07835676025835768,
"train_runtime": 38657.4963,
"train_samples_per_second": 2.085,
"train_steps_per_second": 0.065,
"valid_targets_mean": 3003.7,
"valid_targets_min": 671
}
],
"logging_steps": 5,
"max_steps": 2520,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1257015450664960.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}