Model: laion/openthoughts-4-code-qwen3-32b-annotated-32k_qwen2.5-1.5B_32k Source: Original Platform
16526 lines
459 KiB
JSON
16526 lines
459 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 2.0,
|
|
"eval_steps": 500,
|
|
"global_step": 7494,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0013344008540165466,
|
|
"grad_norm": 1.5823220146867805,
|
|
"learning_rate": 7.999999999999999e-07,
|
|
"loss": 0.9535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9687002301216125,
|
|
"step": 5,
|
|
"valid_targets_mean": 16998.1,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.0026688017080330933,
|
|
"grad_norm": 1.4613232796627884,
|
|
"learning_rate": 1.8e-06,
|
|
"loss": 0.9367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9529542326927185,
|
|
"step": 10,
|
|
"valid_targets_mean": 15522.0,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 0.0040032025620496394,
|
|
"grad_norm": 0.9983532962437741,
|
|
"learning_rate": 2.8e-06,
|
|
"loss": 0.9145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9085040092468262,
|
|
"step": 15,
|
|
"valid_targets_mean": 16460.8,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 0.0053376034160661865,
|
|
"grad_norm": 0.6838873648945817,
|
|
"learning_rate": 3.7999999999999996e-06,
|
|
"loss": 0.9386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9313464164733887,
|
|
"step": 20,
|
|
"valid_targets_mean": 15545.1,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 0.006672004270082733,
|
|
"grad_norm": 0.692627322161273,
|
|
"learning_rate": 4.8e-06,
|
|
"loss": 0.8995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9295620322227478,
|
|
"step": 25,
|
|
"valid_targets_mean": 14973.8,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 0.008006405124099279,
|
|
"grad_norm": 0.49043286997373386,
|
|
"learning_rate": 5.7999999999999995e-06,
|
|
"loss": 0.9114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8984705209732056,
|
|
"step": 30,
|
|
"valid_targets_mean": 16087.6,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 0.009340805978115827,
|
|
"grad_norm": 0.3493870125757026,
|
|
"learning_rate": 6.8e-06,
|
|
"loss": 0.8679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.863591730594635,
|
|
"step": 35,
|
|
"valid_targets_mean": 15901.3,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 0.010675206832132373,
|
|
"grad_norm": 0.28005099953823187,
|
|
"learning_rate": 7.799999999999998e-06,
|
|
"loss": 0.8752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8416589498519897,
|
|
"step": 40,
|
|
"valid_targets_mean": 17096.1,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 0.01200960768614892,
|
|
"grad_norm": 0.2633854430730848,
|
|
"learning_rate": 8.799999999999999e-06,
|
|
"loss": 0.8419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8102971315383911,
|
|
"step": 45,
|
|
"valid_targets_mean": 16528.9,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.013344008540165465,
|
|
"grad_norm": 0.2416755355371708,
|
|
"learning_rate": 9.799999999999998e-06,
|
|
"loss": 0.8621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8548877239227295,
|
|
"step": 50,
|
|
"valid_targets_mean": 15770.2,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 0.014678409394182012,
|
|
"grad_norm": 0.22742192850097903,
|
|
"learning_rate": 1.0799999999999998e-05,
|
|
"loss": 0.8292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7773482799530029,
|
|
"step": 55,
|
|
"valid_targets_mean": 16897.5,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 0.016012810248198558,
|
|
"grad_norm": 0.21185009825474607,
|
|
"learning_rate": 1.1799999999999999e-05,
|
|
"loss": 0.8224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8373425602912903,
|
|
"step": 60,
|
|
"valid_targets_mean": 15090.8,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 0.017347211102215106,
|
|
"grad_norm": 0.16530947573341376,
|
|
"learning_rate": 1.2799999999999998e-05,
|
|
"loss": 0.8305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8192263841629028,
|
|
"step": 65,
|
|
"valid_targets_mean": 16723.2,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 0.018681611956231654,
|
|
"grad_norm": 0.19595842220841034,
|
|
"learning_rate": 1.3799999999999998e-05,
|
|
"loss": 0.8164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8121230602264404,
|
|
"step": 70,
|
|
"valid_targets_mean": 15894.8,
|
|
"valid_targets_min": 85
|
|
},
|
|
{
|
|
"epoch": 0.020016012810248198,
|
|
"grad_norm": 0.14895354237962097,
|
|
"learning_rate": 1.4799999999999999e-05,
|
|
"loss": 0.7964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7786427736282349,
|
|
"step": 75,
|
|
"valid_targets_mean": 16543.0,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 0.021350413664264746,
|
|
"grad_norm": 0.1893637073683363,
|
|
"learning_rate": 1.5799999999999998e-05,
|
|
"loss": 0.7989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8141142129898071,
|
|
"step": 80,
|
|
"valid_targets_mean": 16467.3,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 0.02268481451828129,
|
|
"grad_norm": 0.20064348676009006,
|
|
"learning_rate": 1.68e-05,
|
|
"loss": 0.7965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7834018468856812,
|
|
"step": 85,
|
|
"valid_targets_mean": 15867.8,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.02401921537229784,
|
|
"grad_norm": 0.1617554141425518,
|
|
"learning_rate": 1.78e-05,
|
|
"loss": 0.7926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7742629051208496,
|
|
"step": 90,
|
|
"valid_targets_mean": 16098.4,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 0.025353616226314386,
|
|
"grad_norm": 0.21091252121676837,
|
|
"learning_rate": 1.8799999999999996e-05,
|
|
"loss": 0.8009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8052405714988708,
|
|
"step": 95,
|
|
"valid_targets_mean": 16067.6,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 0.02668801708033093,
|
|
"grad_norm": 0.24666835856193395,
|
|
"learning_rate": 1.98e-05,
|
|
"loss": 0.772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7658431529998779,
|
|
"step": 100,
|
|
"valid_targets_mean": 15941.7,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 0.02802241793434748,
|
|
"grad_norm": 0.23447043514026744,
|
|
"learning_rate": 2.0799999999999997e-05,
|
|
"loss": 0.7746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7565805315971375,
|
|
"step": 105,
|
|
"valid_targets_mean": 16312.7,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 0.029356818788364023,
|
|
"grad_norm": 0.20646522638524778,
|
|
"learning_rate": 2.1799999999999998e-05,
|
|
"loss": 0.7691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7705087661743164,
|
|
"step": 110,
|
|
"valid_targets_mean": 17673.3,
|
|
"valid_targets_min": 408
|
|
},
|
|
{
|
|
"epoch": 0.03069121964238057,
|
|
"grad_norm": 0.2711537842641349,
|
|
"learning_rate": 2.28e-05,
|
|
"loss": 0.7771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7970471382141113,
|
|
"step": 115,
|
|
"valid_targets_mean": 14606.4,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 0.032025620496397116,
|
|
"grad_norm": 0.35503609213795806,
|
|
"learning_rate": 2.38e-05,
|
|
"loss": 0.7815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.803318977355957,
|
|
"step": 120,
|
|
"valid_targets_mean": 15105.9,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 0.03336002135041367,
|
|
"grad_norm": 0.2716354134035751,
|
|
"learning_rate": 2.4799999999999996e-05,
|
|
"loss": 0.7829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.799247145652771,
|
|
"step": 125,
|
|
"valid_targets_mean": 16679.2,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 0.03469442220443021,
|
|
"grad_norm": 0.3091732932839323,
|
|
"learning_rate": 2.5799999999999997e-05,
|
|
"loss": 0.7623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7341678738594055,
|
|
"step": 130,
|
|
"valid_targets_mean": 16538.3,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 0.036028823058446756,
|
|
"grad_norm": 0.2635427050582435,
|
|
"learning_rate": 2.6799999999999998e-05,
|
|
"loss": 0.7565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7394607067108154,
|
|
"step": 135,
|
|
"valid_targets_mean": 16406.4,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 0.03736322391246331,
|
|
"grad_norm": 0.3435518610097947,
|
|
"learning_rate": 2.7799999999999995e-05,
|
|
"loss": 0.7886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8229363560676575,
|
|
"step": 140,
|
|
"valid_targets_mean": 14519.3,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 0.03869762476647985,
|
|
"grad_norm": 0.417362036919672,
|
|
"learning_rate": 2.88e-05,
|
|
"loss": 0.7753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7946305871009827,
|
|
"step": 145,
|
|
"valid_targets_mean": 16715.3,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 0.040032025620496396,
|
|
"grad_norm": 0.35755095831337047,
|
|
"learning_rate": 2.9799999999999996e-05,
|
|
"loss": 0.7661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7696456909179688,
|
|
"step": 150,
|
|
"valid_targets_mean": 17409.9,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 0.04136642647451294,
|
|
"grad_norm": 0.28011910125099576,
|
|
"learning_rate": 3.0799999999999996e-05,
|
|
"loss": 0.7708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7648700475692749,
|
|
"step": 155,
|
|
"valid_targets_mean": 15438.4,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 0.04270082732852949,
|
|
"grad_norm": 0.27045959495765237,
|
|
"learning_rate": 3.1799999999999994e-05,
|
|
"loss": 0.7601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7666411399841309,
|
|
"step": 160,
|
|
"valid_targets_mean": 16994.8,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.044035228182546036,
|
|
"grad_norm": 0.33884030644268365,
|
|
"learning_rate": 3.28e-05,
|
|
"loss": 0.7435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7470793128013611,
|
|
"step": 165,
|
|
"valid_targets_mean": 16104.6,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 0.04536962903656258,
|
|
"grad_norm": 0.3712406411051424,
|
|
"learning_rate": 3.3799999999999995e-05,
|
|
"loss": 0.7605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7480790615081787,
|
|
"step": 170,
|
|
"valid_targets_mean": 16437.9,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 0.04670402989057913,
|
|
"grad_norm": 0.3460176427784315,
|
|
"learning_rate": 3.48e-05,
|
|
"loss": 0.7376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7141637206077576,
|
|
"step": 175,
|
|
"valid_targets_mean": 16461.9,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 0.04803843074459568,
|
|
"grad_norm": 0.2596570804564528,
|
|
"learning_rate": 3.5799999999999996e-05,
|
|
"loss": 0.7759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7451268434524536,
|
|
"step": 180,
|
|
"valid_targets_mean": 16862.6,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.04937283159861222,
|
|
"grad_norm": 0.25687426933762886,
|
|
"learning_rate": 3.679999999999999e-05,
|
|
"loss": 0.7487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7626557946205139,
|
|
"step": 185,
|
|
"valid_targets_mean": 16003.7,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 0.05070723245262877,
|
|
"grad_norm": 0.22447007810160508,
|
|
"learning_rate": 3.78e-05,
|
|
"loss": 0.7693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7929567098617554,
|
|
"step": 190,
|
|
"valid_targets_mean": 14926.7,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 0.05204163330664532,
|
|
"grad_norm": 0.2391985466160003,
|
|
"learning_rate": 3.8799999999999994e-05,
|
|
"loss": 0.7515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.742867112159729,
|
|
"step": 195,
|
|
"valid_targets_mean": 16102.4,
|
|
"valid_targets_min": 97
|
|
},
|
|
{
|
|
"epoch": 0.05337603416066186,
|
|
"grad_norm": 0.23295188871090147,
|
|
"learning_rate": 3.979999999999999e-05,
|
|
"loss": 0.751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7330241203308105,
|
|
"step": 200,
|
|
"valid_targets_mean": 16928.9,
|
|
"valid_targets_min": 122
|
|
},
|
|
{
|
|
"epoch": 0.054710435014678406,
|
|
"grad_norm": 0.2883432188312913,
|
|
"learning_rate": 4.08e-05,
|
|
"loss": 0.7633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7030720114707947,
|
|
"step": 205,
|
|
"valid_targets_mean": 16557.2,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 0.05604483586869496,
|
|
"grad_norm": 0.2810220148951038,
|
|
"learning_rate": 4.18e-05,
|
|
"loss": 0.7607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7284795045852661,
|
|
"step": 210,
|
|
"valid_targets_mean": 15676.2,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 0.0573792367227115,
|
|
"grad_norm": 0.44105767232030274,
|
|
"learning_rate": 4.28e-05,
|
|
"loss": 0.7494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6875466108322144,
|
|
"step": 215,
|
|
"valid_targets_mean": 16369.6,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 0.058713637576728046,
|
|
"grad_norm": 0.31069975408639855,
|
|
"learning_rate": 4.3799999999999994e-05,
|
|
"loss": 0.7532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7670011520385742,
|
|
"step": 220,
|
|
"valid_targets_mean": 16207.9,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 0.0600480384307446,
|
|
"grad_norm": 0.38918464319312723,
|
|
"learning_rate": 4.48e-05,
|
|
"loss": 0.7452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7355377674102783,
|
|
"step": 225,
|
|
"valid_targets_mean": 16136.9,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 0.06138243928476114,
|
|
"grad_norm": 0.39130577028120467,
|
|
"learning_rate": 4.5799999999999995e-05,
|
|
"loss": 0.7659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7933982610702515,
|
|
"step": 230,
|
|
"valid_targets_mean": 14762.8,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 0.0627168401387777,
|
|
"grad_norm": 0.34250803215849346,
|
|
"learning_rate": 4.68e-05,
|
|
"loss": 0.7531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.740280270576477,
|
|
"step": 235,
|
|
"valid_targets_mean": 15716.9,
|
|
"valid_targets_min": 179
|
|
},
|
|
{
|
|
"epoch": 0.06405124099279423,
|
|
"grad_norm": 0.34271827895918605,
|
|
"learning_rate": 4.7799999999999996e-05,
|
|
"loss": 0.7481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7414883375167847,
|
|
"step": 240,
|
|
"valid_targets_mean": 15428.0,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 0.06538564184681078,
|
|
"grad_norm": 0.23278045941311432,
|
|
"learning_rate": 4.8799999999999994e-05,
|
|
"loss": 0.7361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7401506900787354,
|
|
"step": 245,
|
|
"valid_targets_mean": 16283.0,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 0.06672004270082733,
|
|
"grad_norm": 0.27127309559658486,
|
|
"learning_rate": 4.98e-05,
|
|
"loss": 0.7647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7441200017929077,
|
|
"step": 250,
|
|
"valid_targets_mean": 16885.9,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 0.06805444355484387,
|
|
"grad_norm": 0.481644259210808,
|
|
"learning_rate": 5.0799999999999995e-05,
|
|
"loss": 0.7516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7468514442443848,
|
|
"step": 255,
|
|
"valid_targets_mean": 15019.4,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 0.06938884440886042,
|
|
"grad_norm": 0.4098831620054727,
|
|
"learning_rate": 5.179999999999999e-05,
|
|
"loss": 0.7373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7163417339324951,
|
|
"step": 260,
|
|
"valid_targets_mean": 16107.3,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 0.07072324526287697,
|
|
"grad_norm": 0.39289466248174665,
|
|
"learning_rate": 5.279999999999999e-05,
|
|
"loss": 0.7385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7264701128005981,
|
|
"step": 265,
|
|
"valid_targets_mean": 15163.3,
|
|
"valid_targets_min": 105
|
|
},
|
|
{
|
|
"epoch": 0.07205764611689351,
|
|
"grad_norm": 0.36659091377537845,
|
|
"learning_rate": 5.38e-05,
|
|
"loss": 0.7459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7142473459243774,
|
|
"step": 270,
|
|
"valid_targets_mean": 16897.8,
|
|
"valid_targets_min": 135
|
|
},
|
|
{
|
|
"epoch": 0.07339204697091006,
|
|
"grad_norm": 0.29714942560135077,
|
|
"learning_rate": 5.48e-05,
|
|
"loss": 0.7382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7210055589675903,
|
|
"step": 275,
|
|
"valid_targets_mean": 16187.2,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.07472644782492661,
|
|
"grad_norm": 0.25299625938272635,
|
|
"learning_rate": 5.5799999999999994e-05,
|
|
"loss": 0.7288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6921160221099854,
|
|
"step": 280,
|
|
"valid_targets_mean": 15581.8,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 0.07606084867894315,
|
|
"grad_norm": 0.37119467794678546,
|
|
"learning_rate": 5.679999999999999e-05,
|
|
"loss": 0.7388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7240397334098816,
|
|
"step": 285,
|
|
"valid_targets_mean": 15023.2,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 0.0773952495329597,
|
|
"grad_norm": 0.3945122893124607,
|
|
"learning_rate": 5.78e-05,
|
|
"loss": 0.7289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7283365726470947,
|
|
"step": 290,
|
|
"valid_targets_mean": 15270.7,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 0.07872965038697624,
|
|
"grad_norm": 0.377065993532186,
|
|
"learning_rate": 5.88e-05,
|
|
"loss": 0.7277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7348593473434448,
|
|
"step": 295,
|
|
"valid_targets_mean": 16535.4,
|
|
"valid_targets_min": 130
|
|
},
|
|
{
|
|
"epoch": 0.08006405124099279,
|
|
"grad_norm": 0.2580659845527572,
|
|
"learning_rate": 5.98e-05,
|
|
"loss": 0.7596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.736121654510498,
|
|
"step": 300,
|
|
"valid_targets_mean": 15587.4,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.08139845209500934,
|
|
"grad_norm": 0.4376033475371323,
|
|
"learning_rate": 6.0799999999999994e-05,
|
|
"loss": 0.7345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7281279563903809,
|
|
"step": 305,
|
|
"valid_targets_mean": 15663.5,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 0.08273285294902588,
|
|
"grad_norm": 0.4820258206141076,
|
|
"learning_rate": 6.18e-05,
|
|
"loss": 0.7449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7241256237030029,
|
|
"step": 310,
|
|
"valid_targets_mean": 16960.6,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.08406725380304243,
|
|
"grad_norm": 0.46073068241555637,
|
|
"learning_rate": 6.28e-05,
|
|
"loss": 0.7299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7059940695762634,
|
|
"step": 315,
|
|
"valid_targets_mean": 16698.9,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 0.08540165465705898,
|
|
"grad_norm": 0.4089341185808167,
|
|
"learning_rate": 6.379999999999999e-05,
|
|
"loss": 0.7372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7223199605941772,
|
|
"step": 320,
|
|
"valid_targets_mean": 15688.4,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.08673605551107552,
|
|
"grad_norm": 0.3223826554928393,
|
|
"learning_rate": 6.479999999999999e-05,
|
|
"loss": 0.7176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7099969983100891,
|
|
"step": 325,
|
|
"valid_targets_mean": 16591.9,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 0.08807045636509207,
|
|
"grad_norm": 0.40317021543159837,
|
|
"learning_rate": 6.579999999999999e-05,
|
|
"loss": 0.716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7116838693618774,
|
|
"step": 330,
|
|
"valid_targets_mean": 15636.7,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 0.08940485721910862,
|
|
"grad_norm": 0.3714431977080272,
|
|
"learning_rate": 6.68e-05,
|
|
"loss": 0.725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7317278385162354,
|
|
"step": 335,
|
|
"valid_targets_mean": 17115.1,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 0.09073925807312516,
|
|
"grad_norm": 0.4525830067727992,
|
|
"learning_rate": 6.78e-05,
|
|
"loss": 0.7378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.711759090423584,
|
|
"step": 340,
|
|
"valid_targets_mean": 16281.7,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 0.09207365892714171,
|
|
"grad_norm": 0.4673023679344057,
|
|
"learning_rate": 6.879999999999999e-05,
|
|
"loss": 0.7283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7156293392181396,
|
|
"step": 345,
|
|
"valid_targets_mean": 17035.9,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 0.09340805978115826,
|
|
"grad_norm": 0.6302835443346549,
|
|
"learning_rate": 6.979999999999999e-05,
|
|
"loss": 0.7395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7281468510627747,
|
|
"step": 350,
|
|
"valid_targets_mean": 15777.8,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 0.0947424606351748,
|
|
"grad_norm": 0.4776181444975064,
|
|
"learning_rate": 7.079999999999999e-05,
|
|
"loss": 0.7362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.731685221195221,
|
|
"step": 355,
|
|
"valid_targets_mean": 16132.8,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 0.09607686148919135,
|
|
"grad_norm": 0.46366024009522955,
|
|
"learning_rate": 7.18e-05,
|
|
"loss": 0.7228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7093417644500732,
|
|
"step": 360,
|
|
"valid_targets_mean": 17213.5,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 0.0974112623432079,
|
|
"grad_norm": 0.6669182877606497,
|
|
"learning_rate": 7.28e-05,
|
|
"loss": 0.7336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7283340692520142,
|
|
"step": 365,
|
|
"valid_targets_mean": 16158.5,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 0.09874566319722444,
|
|
"grad_norm": 0.5530295536238364,
|
|
"learning_rate": 7.379999999999999e-05,
|
|
"loss": 0.7288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.730088472366333,
|
|
"step": 370,
|
|
"valid_targets_mean": 15394.1,
|
|
"valid_targets_min": 2
|
|
},
|
|
{
|
|
"epoch": 0.100080064051241,
|
|
"grad_norm": 0.625459475063553,
|
|
"learning_rate": 7.479999999999999e-05,
|
|
"loss": 0.7357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7494319677352905,
|
|
"step": 375,
|
|
"valid_targets_mean": 15665.5,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 0.10141446490525755,
|
|
"grad_norm": 0.5768997718865845,
|
|
"learning_rate": 7.579999999999999e-05,
|
|
"loss": 0.7199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6803427338600159,
|
|
"step": 380,
|
|
"valid_targets_mean": 17104.0,
|
|
"valid_targets_min": 101
|
|
},
|
|
{
|
|
"epoch": 0.10274886575927408,
|
|
"grad_norm": 0.4506068757537433,
|
|
"learning_rate": 7.68e-05,
|
|
"loss": 0.7239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7258304357528687,
|
|
"step": 385,
|
|
"valid_targets_mean": 17220.1,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 0.10408326661329063,
|
|
"grad_norm": 0.47171044726445444,
|
|
"learning_rate": 7.780000000000001e-05,
|
|
"loss": 0.7299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7591128349304199,
|
|
"step": 390,
|
|
"valid_targets_mean": 15987.7,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.10541766746730719,
|
|
"grad_norm": 0.38595337799176016,
|
|
"learning_rate": 7.879999999999999e-05,
|
|
"loss": 0.7177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7050427198410034,
|
|
"step": 395,
|
|
"valid_targets_mean": 15030.2,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 0.10675206832132372,
|
|
"grad_norm": 1.0374726912615833,
|
|
"learning_rate": 7.98e-05,
|
|
"loss": 0.73,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7716153264045715,
|
|
"step": 400,
|
|
"valid_targets_mean": 16188.2,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 0.10808646917534027,
|
|
"grad_norm": 0.6324192761921746,
|
|
"learning_rate": 8.079999999999999e-05,
|
|
"loss": 0.7392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7432792782783508,
|
|
"step": 405,
|
|
"valid_targets_mean": 15845.6,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 0.10942087002935681,
|
|
"grad_norm": 0.78537905081529,
|
|
"learning_rate": 8.18e-05,
|
|
"loss": 0.7268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7303118109703064,
|
|
"step": 410,
|
|
"valid_targets_mean": 15707.5,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 0.11075527088337336,
|
|
"grad_norm": 0.9395811255221675,
|
|
"learning_rate": 8.28e-05,
|
|
"loss": 0.7005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7188759446144104,
|
|
"step": 415,
|
|
"valid_targets_mean": 15624.4,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 0.11208967173738991,
|
|
"grad_norm": 0.9295266743546321,
|
|
"learning_rate": 8.379999999999999e-05,
|
|
"loss": 0.703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7148219347000122,
|
|
"step": 420,
|
|
"valid_targets_mean": 16225.2,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 0.11342407259140645,
|
|
"grad_norm": 0.476696559517845,
|
|
"learning_rate": 8.48e-05,
|
|
"loss": 0.715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7083289623260498,
|
|
"step": 425,
|
|
"valid_targets_mean": 15491.8,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 0.114758473445423,
|
|
"grad_norm": 0.43534945533869385,
|
|
"learning_rate": 8.579999999999998e-05,
|
|
"loss": 0.7204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7006481885910034,
|
|
"step": 430,
|
|
"valid_targets_mean": 17209.7,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 0.11609287429943956,
|
|
"grad_norm": 0.6609645621259375,
|
|
"learning_rate": 8.68e-05,
|
|
"loss": 0.7315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7302699089050293,
|
|
"step": 435,
|
|
"valid_targets_mean": 14802.1,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 0.11742727515345609,
|
|
"grad_norm": 0.7199278706441802,
|
|
"learning_rate": 8.779999999999999e-05,
|
|
"loss": 0.7176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7259851098060608,
|
|
"step": 440,
|
|
"valid_targets_mean": 16476.4,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.11876167600747264,
|
|
"grad_norm": 0.4773065968592865,
|
|
"learning_rate": 8.879999999999999e-05,
|
|
"loss": 0.7136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7408217191696167,
|
|
"step": 445,
|
|
"valid_targets_mean": 17142.7,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 0.1200960768614892,
|
|
"grad_norm": 0.37172351171758483,
|
|
"learning_rate": 8.98e-05,
|
|
"loss": 0.7237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7378139495849609,
|
|
"step": 450,
|
|
"valid_targets_mean": 15218.4,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 0.12143047771550573,
|
|
"grad_norm": 0.39304827138785564,
|
|
"learning_rate": 9.079999999999998e-05,
|
|
"loss": 0.7107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7344069480895996,
|
|
"step": 455,
|
|
"valid_targets_mean": 16115.3,
|
|
"valid_targets_min": 81
|
|
},
|
|
{
|
|
"epoch": 0.12276487856952228,
|
|
"grad_norm": 0.42542723715742675,
|
|
"learning_rate": 9.18e-05,
|
|
"loss": 0.7134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7543541193008423,
|
|
"step": 460,
|
|
"valid_targets_mean": 14546.4,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 0.12409927942353884,
|
|
"grad_norm": 0.541397629733599,
|
|
"learning_rate": 9.279999999999999e-05,
|
|
"loss": 0.7253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7133990526199341,
|
|
"step": 465,
|
|
"valid_targets_mean": 16384.3,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 0.1254336802775554,
|
|
"grad_norm": 0.5822512924496642,
|
|
"learning_rate": 9.379999999999999e-05,
|
|
"loss": 0.7129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6963658928871155,
|
|
"step": 470,
|
|
"valid_targets_mean": 15760.1,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 0.12676808113157192,
|
|
"grad_norm": 0.5030863279307085,
|
|
"learning_rate": 9.479999999999999e-05,
|
|
"loss": 0.734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7287914752960205,
|
|
"step": 475,
|
|
"valid_targets_mean": 16120.6,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 0.12810248198558846,
|
|
"grad_norm": 0.4939684474512313,
|
|
"learning_rate": 9.58e-05,
|
|
"loss": 0.72,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7166863679885864,
|
|
"step": 480,
|
|
"valid_targets_mean": 15308.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.12943688283960503,
|
|
"grad_norm": 1.1934926781005402,
|
|
"learning_rate": 9.68e-05,
|
|
"loss": 0.7278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7173789739608765,
|
|
"step": 485,
|
|
"valid_targets_mean": 15423.5,
|
|
"valid_targets_min": 147
|
|
},
|
|
{
|
|
"epoch": 0.13077128369362157,
|
|
"grad_norm": 0.5255232868822954,
|
|
"learning_rate": 9.779999999999999e-05,
|
|
"loss": 0.714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7186357975006104,
|
|
"step": 490,
|
|
"valid_targets_mean": 16427.6,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 0.1321056845476381,
|
|
"grad_norm": 0.5484769961875839,
|
|
"learning_rate": 9.879999999999999e-05,
|
|
"loss": 0.7212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7366380095481873,
|
|
"step": 495,
|
|
"valid_targets_mean": 17284.8,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 0.13344008540165467,
|
|
"grad_norm": 0.5074708333313445,
|
|
"learning_rate": 9.979999999999999e-05,
|
|
"loss": 0.7251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7222837209701538,
|
|
"step": 500,
|
|
"valid_targets_mean": 16145.7,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 0.1347744862556712,
|
|
"grad_norm": 0.44933138863293226,
|
|
"learning_rate": 0.0001008,
|
|
"loss": 0.72,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7153065204620361,
|
|
"step": 505,
|
|
"valid_targets_mean": 16280.8,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 0.13610888710968774,
|
|
"grad_norm": 0.44505945767105004,
|
|
"learning_rate": 0.00010179999999999998,
|
|
"loss": 0.7155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6971901655197144,
|
|
"step": 510,
|
|
"valid_targets_mean": 17205.7,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 0.1374432879637043,
|
|
"grad_norm": 0.48703552246931575,
|
|
"learning_rate": 0.00010279999999999999,
|
|
"loss": 0.7102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6962928771972656,
|
|
"step": 515,
|
|
"valid_targets_mean": 15323.8,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 0.13877768881772085,
|
|
"grad_norm": 0.5239136710241173,
|
|
"learning_rate": 0.00010379999999999999,
|
|
"loss": 0.7049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7096296548843384,
|
|
"step": 520,
|
|
"valid_targets_mean": 17015.6,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.14011208967173738,
|
|
"grad_norm": 0.685550891317315,
|
|
"learning_rate": 0.00010479999999999999,
|
|
"loss": 0.7213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7331212162971497,
|
|
"step": 525,
|
|
"valid_targets_mean": 17924.0,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 0.14144649052575395,
|
|
"grad_norm": 0.9371855061417276,
|
|
"learning_rate": 0.0001058,
|
|
"loss": 0.7189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.756299614906311,
|
|
"step": 530,
|
|
"valid_targets_mean": 15321.0,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 0.14278089137977049,
|
|
"grad_norm": 0.5750367848876685,
|
|
"learning_rate": 0.00010679999999999998,
|
|
"loss": 0.7188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7179563045501709,
|
|
"step": 535,
|
|
"valid_targets_mean": 14206.7,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 0.14411529223378702,
|
|
"grad_norm": 0.5833077589670811,
|
|
"learning_rate": 0.00010779999999999999,
|
|
"loss": 0.7283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7211377620697021,
|
|
"step": 540,
|
|
"valid_targets_mean": 16022.5,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 0.1454496930878036,
|
|
"grad_norm": 0.7269177019931343,
|
|
"learning_rate": 0.0001088,
|
|
"loss": 0.7178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7293734550476074,
|
|
"step": 545,
|
|
"valid_targets_mean": 15989.6,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 0.14678409394182013,
|
|
"grad_norm": 0.5334975069811895,
|
|
"learning_rate": 0.00010979999999999999,
|
|
"loss": 0.7393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7264102697372437,
|
|
"step": 550,
|
|
"valid_targets_mean": 15266.4,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 0.14811849479583666,
|
|
"grad_norm": 0.6250384008635103,
|
|
"learning_rate": 0.0001108,
|
|
"loss": 0.6967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6969237923622131,
|
|
"step": 555,
|
|
"valid_targets_mean": 16026.1,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 0.14945289564985323,
|
|
"grad_norm": 0.8468842115841293,
|
|
"learning_rate": 0.00011179999999999998,
|
|
"loss": 0.7226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7111748456954956,
|
|
"step": 560,
|
|
"valid_targets_mean": 15614.1,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 0.15078729650386977,
|
|
"grad_norm": 0.8021724458590994,
|
|
"learning_rate": 0.00011279999999999999,
|
|
"loss": 0.689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6887655258178711,
|
|
"step": 565,
|
|
"valid_targets_mean": 17246.1,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 0.1521216973578863,
|
|
"grad_norm": 0.6445231347491678,
|
|
"learning_rate": 0.0001138,
|
|
"loss": 0.7125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7299594283103943,
|
|
"step": 570,
|
|
"valid_targets_mean": 15399.9,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 0.15345609821190287,
|
|
"grad_norm": 0.9632004486100639,
|
|
"learning_rate": 0.00011479999999999999,
|
|
"loss": 0.7258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.725225031375885,
|
|
"step": 575,
|
|
"valid_targets_mean": 16895.6,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 0.1547904990659194,
|
|
"grad_norm": 0.6125391497023498,
|
|
"learning_rate": 0.0001158,
|
|
"loss": 0.7283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7031444907188416,
|
|
"step": 580,
|
|
"valid_targets_mean": 16781.4,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 0.15612489991993594,
|
|
"grad_norm": 0.914929177218736,
|
|
"learning_rate": 0.00011679999999999998,
|
|
"loss": 0.73,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7310180068016052,
|
|
"step": 585,
|
|
"valid_targets_mean": 14891.6,
|
|
"valid_targets_min": 104
|
|
},
|
|
{
|
|
"epoch": 0.15745930077395248,
|
|
"grad_norm": 0.8814371014768175,
|
|
"learning_rate": 0.00011779999999999999,
|
|
"loss": 0.7309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7159960269927979,
|
|
"step": 590,
|
|
"valid_targets_mean": 14935.1,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 0.15879370162796905,
|
|
"grad_norm": 0.5975243304153101,
|
|
"learning_rate": 0.0001188,
|
|
"loss": 0.7138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7245515584945679,
|
|
"step": 595,
|
|
"valid_targets_mean": 17035.9,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.16012810248198558,
|
|
"grad_norm": 0.46679873052553167,
|
|
"learning_rate": 0.00011979999999999998,
|
|
"loss": 0.7085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6976876258850098,
|
|
"step": 600,
|
|
"valid_targets_mean": 15646.5,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 0.16146250333600212,
|
|
"grad_norm": 0.5128674027346761,
|
|
"learning_rate": 0.0001208,
|
|
"loss": 0.7223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7510048151016235,
|
|
"step": 605,
|
|
"valid_targets_mean": 15762.4,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 0.1627969041900187,
|
|
"grad_norm": 0.5819395696398177,
|
|
"learning_rate": 0.00012179999999999999,
|
|
"loss": 0.7157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.657742977142334,
|
|
"step": 610,
|
|
"valid_targets_mean": 16854.7,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 0.16413130504403523,
|
|
"grad_norm": 0.5162670382257071,
|
|
"learning_rate": 0.00012279999999999998,
|
|
"loss": 0.7084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7006169557571411,
|
|
"step": 615,
|
|
"valid_targets_mean": 15703.4,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 0.16546570589805176,
|
|
"grad_norm": 0.6945068724820247,
|
|
"learning_rate": 0.0001238,
|
|
"loss": 0.7216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.756109356880188,
|
|
"step": 620,
|
|
"valid_targets_mean": 15891.4,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 0.16680010675206833,
|
|
"grad_norm": 0.9642949707242404,
|
|
"learning_rate": 0.00012479999999999997,
|
|
"loss": 0.7238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.695439875125885,
|
|
"step": 625,
|
|
"valid_targets_mean": 17157.8,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.16813450760608487,
|
|
"grad_norm": 0.5833542867382966,
|
|
"learning_rate": 0.0001258,
|
|
"loss": 0.715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6990182399749756,
|
|
"step": 630,
|
|
"valid_targets_mean": 15990.5,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 0.1694689084601014,
|
|
"grad_norm": 0.7007660146650656,
|
|
"learning_rate": 0.0001268,
|
|
"loss": 0.7157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7077897787094116,
|
|
"step": 635,
|
|
"valid_targets_mean": 15861.1,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 0.17080330931411797,
|
|
"grad_norm": 0.49195751975639107,
|
|
"learning_rate": 0.0001278,
|
|
"loss": 0.7161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6865917444229126,
|
|
"step": 640,
|
|
"valid_targets_mean": 15295.1,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 0.1721377101681345,
|
|
"grad_norm": 0.7191679724888371,
|
|
"learning_rate": 0.0001288,
|
|
"loss": 0.7069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7064446210861206,
|
|
"step": 645,
|
|
"valid_targets_mean": 16334.0,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 0.17347211102215104,
|
|
"grad_norm": 0.5594698481432445,
|
|
"learning_rate": 0.00012979999999999998,
|
|
"loss": 0.7229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.708102822303772,
|
|
"step": 650,
|
|
"valid_targets_mean": 16455.3,
|
|
"valid_targets_min": 87
|
|
},
|
|
{
|
|
"epoch": 0.1748065118761676,
|
|
"grad_norm": 0.735025293283187,
|
|
"learning_rate": 0.00013079999999999998,
|
|
"loss": 0.7222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7207574248313904,
|
|
"step": 655,
|
|
"valid_targets_mean": 16754.3,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 0.17614091273018415,
|
|
"grad_norm": 0.3958769606977501,
|
|
"learning_rate": 0.0001318,
|
|
"loss": 0.7019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6824870109558105,
|
|
"step": 660,
|
|
"valid_targets_mean": 16950.8,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 0.17747531358420068,
|
|
"grad_norm": 0.38239059142969134,
|
|
"learning_rate": 0.00013279999999999998,
|
|
"loss": 0.718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6917785406112671,
|
|
"step": 665,
|
|
"valid_targets_mean": 16209.8,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.17880971443821725,
|
|
"grad_norm": 0.5118945312941819,
|
|
"learning_rate": 0.0001338,
|
|
"loss": 0.7221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7017595767974854,
|
|
"step": 670,
|
|
"valid_targets_mean": 15769.2,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 0.1801441152922338,
|
|
"grad_norm": 0.6464646630845813,
|
|
"learning_rate": 0.00013479999999999997,
|
|
"loss": 0.7236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7357965707778931,
|
|
"step": 675,
|
|
"valid_targets_mean": 16688.3,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 0.18147851614625032,
|
|
"grad_norm": 0.4239557207049385,
|
|
"learning_rate": 0.0001358,
|
|
"loss": 0.7025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6929694414138794,
|
|
"step": 680,
|
|
"valid_targets_mean": 15849.2,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 0.1828129170002669,
|
|
"grad_norm": 0.7804567161815549,
|
|
"learning_rate": 0.0001368,
|
|
"loss": 0.7189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7219686508178711,
|
|
"step": 685,
|
|
"valid_targets_mean": 16094.0,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 0.18414731785428343,
|
|
"grad_norm": 0.5732418331235286,
|
|
"learning_rate": 0.0001378,
|
|
"loss": 0.7047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6867932081222534,
|
|
"step": 690,
|
|
"valid_targets_mean": 15930.7,
|
|
"valid_targets_min": 42
|
|
},
|
|
{
|
|
"epoch": 0.18548171870829996,
|
|
"grad_norm": 0.42458958191005924,
|
|
"learning_rate": 0.00013879999999999999,
|
|
"loss": 0.7014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6767255067825317,
|
|
"step": 695,
|
|
"valid_targets_mean": 15631.3,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 0.18681611956231653,
|
|
"grad_norm": 0.83041183706765,
|
|
"learning_rate": 0.00013979999999999998,
|
|
"loss": 0.7104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7344939112663269,
|
|
"step": 700,
|
|
"valid_targets_mean": 14661.2,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 0.18815052041633307,
|
|
"grad_norm": 0.4551116123008287,
|
|
"learning_rate": 0.00014079999999999998,
|
|
"loss": 0.7053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6875162720680237,
|
|
"step": 705,
|
|
"valid_targets_mean": 17108.3,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 0.1894849212703496,
|
|
"grad_norm": 0.42062013074810284,
|
|
"learning_rate": 0.0001418,
|
|
"loss": 0.7087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7142444849014282,
|
|
"step": 710,
|
|
"valid_targets_mean": 15786.6,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.19081932212436617,
|
|
"grad_norm": 0.6305250919291718,
|
|
"learning_rate": 0.00014279999999999997,
|
|
"loss": 0.6996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.690528154373169,
|
|
"step": 715,
|
|
"valid_targets_mean": 16620.3,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 0.1921537229783827,
|
|
"grad_norm": 0.49236440297808903,
|
|
"learning_rate": 0.0001438,
|
|
"loss": 0.6826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7059494256973267,
|
|
"step": 720,
|
|
"valid_targets_mean": 15789.3,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 0.19348812383239924,
|
|
"grad_norm": 1.0246968836811798,
|
|
"learning_rate": 0.0001448,
|
|
"loss": 0.6966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7069756984710693,
|
|
"step": 725,
|
|
"valid_targets_mean": 15479.2,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 0.1948225246864158,
|
|
"grad_norm": 0.9166310918813874,
|
|
"learning_rate": 0.0001458,
|
|
"loss": 0.712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.696449339389801,
|
|
"step": 730,
|
|
"valid_targets_mean": 17183.2,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 0.19615692554043235,
|
|
"grad_norm": 1.0363387105812145,
|
|
"learning_rate": 0.0001468,
|
|
"loss": 0.7071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7203611135482788,
|
|
"step": 735,
|
|
"valid_targets_mean": 14946.1,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.19749132639444889,
|
|
"grad_norm": 0.5417483344221564,
|
|
"learning_rate": 0.0001478,
|
|
"loss": 0.7106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7111995816230774,
|
|
"step": 740,
|
|
"valid_targets_mean": 14798.5,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 0.19882572724846545,
|
|
"grad_norm": 0.777755567926226,
|
|
"learning_rate": 0.00014879999999999998,
|
|
"loss": 0.7001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6688179969787598,
|
|
"step": 745,
|
|
"valid_targets_mean": 15765.6,
|
|
"valid_targets_min": 130
|
|
},
|
|
{
|
|
"epoch": 0.200160128102482,
|
|
"grad_norm": 0.7871992497617583,
|
|
"learning_rate": 0.00014979999999999998,
|
|
"loss": 0.7054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7222711443901062,
|
|
"step": 750,
|
|
"valid_targets_mean": 15587.0,
|
|
"valid_targets_min": 142
|
|
},
|
|
{
|
|
"epoch": 0.20149452895649853,
|
|
"grad_norm": 0.7589612805003493,
|
|
"learning_rate": 0.00014999986979857214,
|
|
"loss": 0.7035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7213101387023926,
|
|
"step": 755,
|
|
"valid_targets_mean": 15475.6,
|
|
"valid_targets_min": 3
|
|
},
|
|
{
|
|
"epoch": 0.2028289298105151,
|
|
"grad_norm": 0.8806420553414223,
|
|
"learning_rate": 0.00014999934085604638,
|
|
"loss": 0.7045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.721771240234375,
|
|
"step": 760,
|
|
"valid_targets_mean": 15470.4,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 0.20416333066453163,
|
|
"grad_norm": 0.6490469254239379,
|
|
"learning_rate": 0.00014999840503770068,
|
|
"loss": 0.7057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6929470300674438,
|
|
"step": 765,
|
|
"valid_targets_mean": 15379.0,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 0.20549773151854817,
|
|
"grad_norm": 0.632766163326704,
|
|
"learning_rate": 0.00014999706234861205,
|
|
"loss": 0.7181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7145708799362183,
|
|
"step": 770,
|
|
"valid_targets_mean": 15203.7,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 0.20683213237256473,
|
|
"grad_norm": 0.7382870047734233,
|
|
"learning_rate": 0.00014999531279606457,
|
|
"loss": 0.7086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7060419321060181,
|
|
"step": 775,
|
|
"valid_targets_mean": 16520.4,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 0.20816653322658127,
|
|
"grad_norm": 0.9240426936239289,
|
|
"learning_rate": 0.00014999315638954965,
|
|
"loss": 0.7147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.709404468536377,
|
|
"step": 780,
|
|
"valid_targets_mean": 17228.6,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 0.2095009340805978,
|
|
"grad_norm": 0.8934733650392805,
|
|
"learning_rate": 0.000149990593140766,
|
|
"loss": 0.7113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7076325416564941,
|
|
"step": 785,
|
|
"valid_targets_mean": 16678.5,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 0.21083533493461437,
|
|
"grad_norm": 0.836599216200632,
|
|
"learning_rate": 0.00014998762306361933,
|
|
"loss": 0.694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6929929852485657,
|
|
"step": 790,
|
|
"valid_targets_mean": 16718.5,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.2121697357886309,
|
|
"grad_norm": 3.5029546430195277,
|
|
"learning_rate": 0.00014998424617422253,
|
|
"loss": 0.7001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7014665603637695,
|
|
"step": 795,
|
|
"valid_targets_mean": 17313.6,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 0.21350413664264745,
|
|
"grad_norm": 0.5418752118979653,
|
|
"learning_rate": 0.00014998046249089538,
|
|
"loss": 0.7014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.688701868057251,
|
|
"step": 800,
|
|
"valid_targets_mean": 16082.9,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 0.214838537496664,
|
|
"grad_norm": 0.6706169884495433,
|
|
"learning_rate": 0.00014997627203416458,
|
|
"loss": 0.6998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7123109102249146,
|
|
"step": 805,
|
|
"valid_targets_mean": 15668.5,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 0.21617293835068055,
|
|
"grad_norm": 0.7159637795497619,
|
|
"learning_rate": 0.00014997167482676366,
|
|
"loss": 0.7013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7268193960189819,
|
|
"step": 810,
|
|
"valid_targets_mean": 14148.5,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 0.2175073392046971,
|
|
"grad_norm": 0.39213537306358476,
|
|
"learning_rate": 0.00014996667089363272,
|
|
"loss": 0.6972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6896648406982422,
|
|
"step": 815,
|
|
"valid_targets_mean": 15892.1,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 0.21884174005871362,
|
|
"grad_norm": 0.6142783873448505,
|
|
"learning_rate": 0.00014996126026191832,
|
|
"loss": 0.7162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7129541635513306,
|
|
"step": 820,
|
|
"valid_targets_mean": 16946.2,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 0.2201761409127302,
|
|
"grad_norm": 0.5260685820283173,
|
|
"learning_rate": 0.00014995544296097355,
|
|
"loss": 0.7122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7193052768707275,
|
|
"step": 825,
|
|
"valid_targets_mean": 14956.7,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 0.22151054176674673,
|
|
"grad_norm": 0.5686280764452485,
|
|
"learning_rate": 0.00014994921902235757,
|
|
"loss": 0.7094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7117457985877991,
|
|
"step": 830,
|
|
"valid_targets_mean": 15185.0,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 0.22284494262076326,
|
|
"grad_norm": 0.6755060505696501,
|
|
"learning_rate": 0.0001499425884798356,
|
|
"loss": 0.6913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6957063674926758,
|
|
"step": 835,
|
|
"valid_targets_mean": 15872.1,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 0.22417934347477983,
|
|
"grad_norm": 0.48553090040615876,
|
|
"learning_rate": 0.00014993555136937872,
|
|
"loss": 0.7015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6867814064025879,
|
|
"step": 840,
|
|
"valid_targets_mean": 16234.7,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 0.22551374432879637,
|
|
"grad_norm": 0.40569646079083105,
|
|
"learning_rate": 0.0001499281077291637,
|
|
"loss": 0.7108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7366761565208435,
|
|
"step": 845,
|
|
"valid_targets_mean": 15982.0,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 0.2268481451828129,
|
|
"grad_norm": 1.017546932562299,
|
|
"learning_rate": 0.00014992025759957267,
|
|
"loss": 0.7268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7172836065292358,
|
|
"step": 850,
|
|
"valid_targets_mean": 15778.7,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 0.22818254603682947,
|
|
"grad_norm": 0.6688766038496526,
|
|
"learning_rate": 0.0001499120010231931,
|
|
"loss": 0.6981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.687598705291748,
|
|
"step": 855,
|
|
"valid_targets_mean": 16142.9,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 0.229516946890846,
|
|
"grad_norm": 0.6613026228057015,
|
|
"learning_rate": 0.00014990333804481738,
|
|
"loss": 0.6987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6992521286010742,
|
|
"step": 860,
|
|
"valid_targets_mean": 16895.7,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 0.23085134774486255,
|
|
"grad_norm": 0.8302928138355796,
|
|
"learning_rate": 0.00014989426871144266,
|
|
"loss": 0.6985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7034393548965454,
|
|
"step": 865,
|
|
"valid_targets_mean": 16505.0,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 0.2321857485988791,
|
|
"grad_norm": 0.7509998803398379,
|
|
"learning_rate": 0.00014988479307227062,
|
|
"loss": 0.6855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7085684537887573,
|
|
"step": 870,
|
|
"valid_targets_mean": 14895.0,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 0.23352014945289565,
|
|
"grad_norm": 0.6037951591636942,
|
|
"learning_rate": 0.00014987491117870717,
|
|
"loss": 0.7033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6840174198150635,
|
|
"step": 875,
|
|
"valid_targets_mean": 16543.5,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 0.23485455030691219,
|
|
"grad_norm": 0.5286893064532538,
|
|
"learning_rate": 0.00014986462308436214,
|
|
"loss": 0.6884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7020936012268066,
|
|
"step": 880,
|
|
"valid_targets_mean": 14788.7,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 0.23618895116092875,
|
|
"grad_norm": 0.480456782545674,
|
|
"learning_rate": 0.00014985392884504903,
|
|
"loss": 0.7011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7093113660812378,
|
|
"step": 885,
|
|
"valid_targets_mean": 15861.4,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 0.2375233520149453,
|
|
"grad_norm": 0.7679411381978142,
|
|
"learning_rate": 0.00014984282851878477,
|
|
"loss": 0.698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7077801823616028,
|
|
"step": 890,
|
|
"valid_targets_mean": 16783.0,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 0.23885775286896183,
|
|
"grad_norm": 0.5231140658944428,
|
|
"learning_rate": 0.00014983132216578923,
|
|
"loss": 0.6982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6934367418289185,
|
|
"step": 895,
|
|
"valid_targets_mean": 16316.8,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 0.2401921537229784,
|
|
"grad_norm": 0.42864022271796487,
|
|
"learning_rate": 0.00014981940984848508,
|
|
"loss": 0.6764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6391340494155884,
|
|
"step": 900,
|
|
"valid_targets_mean": 16328.9,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 0.24152655457699493,
|
|
"grad_norm": 0.33650902419596107,
|
|
"learning_rate": 0.00014980709163149732,
|
|
"loss": 0.7052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7220794558525085,
|
|
"step": 905,
|
|
"valid_targets_mean": 15666.6,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 0.24286095543101147,
|
|
"grad_norm": 0.6102385943286962,
|
|
"learning_rate": 0.000149794367581653,
|
|
"loss": 0.6993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7103188037872314,
|
|
"step": 910,
|
|
"valid_targets_mean": 16207.6,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 0.24419535628502803,
|
|
"grad_norm": 0.6766800650304651,
|
|
"learning_rate": 0.00014978123776798082,
|
|
"loss": 0.6879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6693642735481262,
|
|
"step": 915,
|
|
"valid_targets_mean": 17180.4,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 0.24552975713904457,
|
|
"grad_norm": 0.5708107933964744,
|
|
"learning_rate": 0.00014976770226171084,
|
|
"loss": 0.6906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6481786966323853,
|
|
"step": 920,
|
|
"valid_targets_mean": 17630.6,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 0.2468641579930611,
|
|
"grad_norm": 0.45298074962402585,
|
|
"learning_rate": 0.00014975376113627394,
|
|
"loss": 0.6954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6705655455589294,
|
|
"step": 925,
|
|
"valid_targets_mean": 17155.6,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 0.24819855884707767,
|
|
"grad_norm": 0.6539562997849696,
|
|
"learning_rate": 0.00014973941446730154,
|
|
"loss": 0.7149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7168123126029968,
|
|
"step": 930,
|
|
"valid_targets_mean": 16628.1,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 0.2495329597010942,
|
|
"grad_norm": 0.5986561836133009,
|
|
"learning_rate": 0.00014972466233262517,
|
|
"loss": 0.6949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7100132703781128,
|
|
"step": 935,
|
|
"valid_targets_mean": 16743.7,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 0.2508673605551108,
|
|
"grad_norm": 0.5148245538699373,
|
|
"learning_rate": 0.00014970950481227603,
|
|
"loss": 0.7109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6915335655212402,
|
|
"step": 940,
|
|
"valid_targets_mean": 15595.5,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 0.2522017614091273,
|
|
"grad_norm": 0.786060251192184,
|
|
"learning_rate": 0.00014969394198848456,
|
|
"loss": 0.7029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7469134330749512,
|
|
"step": 945,
|
|
"valid_targets_mean": 14296.9,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 0.25353616226314385,
|
|
"grad_norm": 0.43995264747649415,
|
|
"learning_rate": 0.00014967797394567993,
|
|
"loss": 0.6799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6515603065490723,
|
|
"step": 950,
|
|
"valid_targets_mean": 16029.6,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 0.2548705631171604,
|
|
"grad_norm": 0.7666679127339393,
|
|
"learning_rate": 0.00014966160077048982,
|
|
"loss": 0.6784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6881482601165771,
|
|
"step": 955,
|
|
"valid_targets_mean": 16402.8,
|
|
"valid_targets_min": 108
|
|
},
|
|
{
|
|
"epoch": 0.2562049639711769,
|
|
"grad_norm": 0.575233221839231,
|
|
"learning_rate": 0.00014964482255173958,
|
|
"loss": 0.703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.680946946144104,
|
|
"step": 960,
|
|
"valid_targets_mean": 15782.9,
|
|
"valid_targets_min": 13
|
|
},
|
|
{
|
|
"epoch": 0.25753936482519346,
|
|
"grad_norm": 0.4955657079524624,
|
|
"learning_rate": 0.00014962763938045206,
|
|
"loss": 0.7009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6926153898239136,
|
|
"step": 965,
|
|
"valid_targets_mean": 14686.8,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 0.25887376567921005,
|
|
"grad_norm": 0.6172410048860858,
|
|
"learning_rate": 0.00014961005134984693,
|
|
"loss": 0.6866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6675003170967102,
|
|
"step": 970,
|
|
"valid_targets_mean": 17335.6,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 0.2602081665332266,
|
|
"grad_norm": 0.5604574782581667,
|
|
"learning_rate": 0.00014959205855534036,
|
|
"loss": 0.6845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.705886721611023,
|
|
"step": 975,
|
|
"valid_targets_mean": 16804.7,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 0.26154256738724313,
|
|
"grad_norm": 0.5313317151302402,
|
|
"learning_rate": 0.00014957366109454427,
|
|
"loss": 0.6816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6733583211898804,
|
|
"step": 980,
|
|
"valid_targets_mean": 16253.9,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 0.26287696824125967,
|
|
"grad_norm": 0.43864368887865324,
|
|
"learning_rate": 0.00014955485906726596,
|
|
"loss": 0.6959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6941563487052917,
|
|
"step": 985,
|
|
"valid_targets_mean": 15571.7,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 0.2642113690952762,
|
|
"grad_norm": 0.5181543388907015,
|
|
"learning_rate": 0.00014953565257550756,
|
|
"loss": 0.7069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6880620718002319,
|
|
"step": 990,
|
|
"valid_targets_mean": 16759.1,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 0.26554576994929274,
|
|
"grad_norm": 0.7477865809506131,
|
|
"learning_rate": 0.00014951604172346535,
|
|
"loss": 0.6946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7352211475372314,
|
|
"step": 995,
|
|
"valid_targets_mean": 14561.0,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 0.26688017080330934,
|
|
"grad_norm": 0.6373119718120067,
|
|
"learning_rate": 0.00014949602661752944,
|
|
"loss": 0.7005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6938153505325317,
|
|
"step": 1000,
|
|
"valid_targets_mean": 17262.1,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 0.2682145716573259,
|
|
"grad_norm": 0.32709542106154893,
|
|
"learning_rate": 0.0001494756073662829,
|
|
"loss": 0.6683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6421211957931519,
|
|
"step": 1005,
|
|
"valid_targets_mean": 16814.7,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 0.2695489725113424,
|
|
"grad_norm": 0.6698123080687065,
|
|
"learning_rate": 0.00014945478408050135,
|
|
"loss": 0.69,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6433126926422119,
|
|
"step": 1010,
|
|
"valid_targets_mean": 17187.6,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 0.27088337336535895,
|
|
"grad_norm": 0.5130901199029623,
|
|
"learning_rate": 0.00014943355687315239,
|
|
"loss": 0.6932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.694229781627655,
|
|
"step": 1015,
|
|
"valid_targets_mean": 15706.6,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 0.2722177742193755,
|
|
"grad_norm": 0.5310687957289015,
|
|
"learning_rate": 0.0001494119258593948,
|
|
"loss": 0.6833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6675958633422852,
|
|
"step": 1020,
|
|
"valid_targets_mean": 16197.8,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.273552175073392,
|
|
"grad_norm": 0.6328700461529049,
|
|
"learning_rate": 0.00014938989115657815,
|
|
"loss": 0.7083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7138117551803589,
|
|
"step": 1025,
|
|
"valid_targets_mean": 16780.0,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 0.2748865759274086,
|
|
"grad_norm": 0.8323618177058604,
|
|
"learning_rate": 0.00014936745288424198,
|
|
"loss": 0.7071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7033565044403076,
|
|
"step": 1030,
|
|
"valid_targets_mean": 16363.2,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.27622097678142515,
|
|
"grad_norm": 0.5615678318349799,
|
|
"learning_rate": 0.0001493446111641152,
|
|
"loss": 0.7001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6922980546951294,
|
|
"step": 1035,
|
|
"valid_targets_mean": 17032.3,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 0.2775553776354417,
|
|
"grad_norm": 0.9216225408504705,
|
|
"learning_rate": 0.00014932136612011554,
|
|
"loss": 0.6806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6632153391838074,
|
|
"step": 1040,
|
|
"valid_targets_mean": 14829.7,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 0.27888977848945823,
|
|
"grad_norm": 0.916779484221643,
|
|
"learning_rate": 0.00014929771787834868,
|
|
"loss": 0.6878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6791641712188721,
|
|
"step": 1045,
|
|
"valid_targets_mean": 14985.0,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 0.28022417934347477,
|
|
"grad_norm": 0.5637979161337277,
|
|
"learning_rate": 0.00014927366656710772,
|
|
"loss": 0.6978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6769039630889893,
|
|
"step": 1050,
|
|
"valid_targets_mean": 16495.8,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.2815585801974913,
|
|
"grad_norm": 0.7200961111808594,
|
|
"learning_rate": 0.00014924921231687245,
|
|
"loss": 0.7055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6782281398773193,
|
|
"step": 1055,
|
|
"valid_targets_mean": 16101.9,
|
|
"valid_targets_min": 115
|
|
},
|
|
{
|
|
"epoch": 0.2828929810515079,
|
|
"grad_norm": 1.0978604253731457,
|
|
"learning_rate": 0.0001492243552603086,
|
|
"loss": 0.6899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6719521284103394,
|
|
"step": 1060,
|
|
"valid_targets_mean": 16325.6,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 0.28422738190552443,
|
|
"grad_norm": 0.7823473663674503,
|
|
"learning_rate": 0.00014919909553226716,
|
|
"loss": 0.6943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.658243715763092,
|
|
"step": 1065,
|
|
"valid_targets_mean": 17418.4,
|
|
"valid_targets_min": 39
|
|
},
|
|
{
|
|
"epoch": 0.28556178275954097,
|
|
"grad_norm": 0.8215632846087839,
|
|
"learning_rate": 0.00014917343326978366,
|
|
"loss": 0.689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6821578145027161,
|
|
"step": 1070,
|
|
"valid_targets_mean": 16322.2,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 0.2868961836135575,
|
|
"grad_norm": 0.687488420158643,
|
|
"learning_rate": 0.00014914736861207733,
|
|
"loss": 0.6751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6720151901245117,
|
|
"step": 1075,
|
|
"valid_targets_mean": 16538.7,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 0.28823058446757405,
|
|
"grad_norm": 0.6210551753687071,
|
|
"learning_rate": 0.0001491209017005505,
|
|
"loss": 0.6952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.701689600944519,
|
|
"step": 1080,
|
|
"valid_targets_mean": 15265.4,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 0.2895649853215906,
|
|
"grad_norm": 0.919751129443059,
|
|
"learning_rate": 0.00014909403267878771,
|
|
"loss": 0.6866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6928168535232544,
|
|
"step": 1085,
|
|
"valid_targets_mean": 15299.6,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 0.2908993861756072,
|
|
"grad_norm": 0.6031403591660455,
|
|
"learning_rate": 0.000149066761692555,
|
|
"loss": 0.7153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7102971076965332,
|
|
"step": 1090,
|
|
"valid_targets_mean": 15627.9,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 0.2922337870296237,
|
|
"grad_norm": 0.6644450276274809,
|
|
"learning_rate": 0.00014903908888979904,
|
|
"loss": 0.7205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7056329250335693,
|
|
"step": 1095,
|
|
"valid_targets_mean": 16785.1,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 0.29356818788364025,
|
|
"grad_norm": 0.4764608245584093,
|
|
"learning_rate": 0.00014901101442064637,
|
|
"loss": 0.6987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6979807615280151,
|
|
"step": 1100,
|
|
"valid_targets_mean": 16167.7,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 0.2949025887376568,
|
|
"grad_norm": 0.6014049267424753,
|
|
"learning_rate": 0.00014898253843740271,
|
|
"loss": 0.68,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6723406314849854,
|
|
"step": 1105,
|
|
"valid_targets_mean": 16871.1,
|
|
"valid_targets_min": 59
|
|
},
|
|
{
|
|
"epoch": 0.2962369895916733,
|
|
"grad_norm": 0.5448219489623065,
|
|
"learning_rate": 0.00014895366109455189,
|
|
"loss": 0.6838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6996907591819763,
|
|
"step": 1110,
|
|
"valid_targets_mean": 14544.4,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 0.29757139044568987,
|
|
"grad_norm": 0.5583860198734728,
|
|
"learning_rate": 0.00014892438254875522,
|
|
"loss": 0.6971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6966080665588379,
|
|
"step": 1115,
|
|
"valid_targets_mean": 15939.2,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 0.29890579129970646,
|
|
"grad_norm": 0.5808714512224714,
|
|
"learning_rate": 0.00014889470295885047,
|
|
"loss": 0.6898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7076345682144165,
|
|
"step": 1120,
|
|
"valid_targets_mean": 15790.9,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 0.300240192153723,
|
|
"grad_norm": 0.4927279040965231,
|
|
"learning_rate": 0.0001488646224858512,
|
|
"loss": 0.6845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7084103226661682,
|
|
"step": 1125,
|
|
"valid_targets_mean": 16181.6,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 0.30157459300773953,
|
|
"grad_norm": 0.4612855821814559,
|
|
"learning_rate": 0.00014883414129294575,
|
|
"loss": 0.6882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.674759030342102,
|
|
"step": 1130,
|
|
"valid_targets_mean": 16895.6,
|
|
"valid_targets_min": 204
|
|
},
|
|
{
|
|
"epoch": 0.30290899386175607,
|
|
"grad_norm": 0.46765724289815713,
|
|
"learning_rate": 0.00014880325954549635,
|
|
"loss": 0.6723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6812288761138916,
|
|
"step": 1135,
|
|
"valid_targets_mean": 16409.4,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.3042433947157726,
|
|
"grad_norm": 0.4824982006416931,
|
|
"learning_rate": 0.00014877197741103827,
|
|
"loss": 0.6794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6763830184936523,
|
|
"step": 1140,
|
|
"valid_targets_mean": 16869.7,
|
|
"valid_targets_min": 16
|
|
},
|
|
{
|
|
"epoch": 0.30557779556978915,
|
|
"grad_norm": 0.6549002835261453,
|
|
"learning_rate": 0.00014874029505927897,
|
|
"loss": 0.6922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6630896925926208,
|
|
"step": 1145,
|
|
"valid_targets_mean": 15811.5,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 0.30691219642380574,
|
|
"grad_norm": 0.4184651103712921,
|
|
"learning_rate": 0.00014870821266209705,
|
|
"loss": 0.7049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7090791463851929,
|
|
"step": 1150,
|
|
"valid_targets_mean": 16379.1,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 0.3082465972778223,
|
|
"grad_norm": 0.5759381108213268,
|
|
"learning_rate": 0.00014867573039354138,
|
|
"loss": 0.7001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6881549954414368,
|
|
"step": 1155,
|
|
"valid_targets_mean": 16831.0,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 0.3095809981318388,
|
|
"grad_norm": 0.8376894365561316,
|
|
"learning_rate": 0.00014864284842983018,
|
|
"loss": 0.7009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6884739398956299,
|
|
"step": 1160,
|
|
"valid_targets_mean": 17503.7,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 0.31091539898585535,
|
|
"grad_norm": 0.616307622715748,
|
|
"learning_rate": 0.00014860956694935003,
|
|
"loss": 0.6803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.657461941242218,
|
|
"step": 1165,
|
|
"valid_targets_mean": 15582.3,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 0.3122497998398719,
|
|
"grad_norm": 0.6537720899572182,
|
|
"learning_rate": 0.0001485758861326549,
|
|
"loss": 0.6737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6778349876403809,
|
|
"step": 1170,
|
|
"valid_targets_mean": 15389.1,
|
|
"valid_targets_min": 209
|
|
},
|
|
{
|
|
"epoch": 0.3135842006938884,
|
|
"grad_norm": 0.5118272518220445,
|
|
"learning_rate": 0.00014854180616246523,
|
|
"loss": 0.6764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6655255556106567,
|
|
"step": 1175,
|
|
"valid_targets_mean": 14757.1,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 0.31491860154790496,
|
|
"grad_norm": 0.5785910239800395,
|
|
"learning_rate": 0.00014850732722366682,
|
|
"loss": 0.6993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6939231753349304,
|
|
"step": 1180,
|
|
"valid_targets_mean": 16032.9,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 0.31625300240192156,
|
|
"grad_norm": 0.6962605965216837,
|
|
"learning_rate": 0.00014847244950330998,
|
|
"loss": 0.6919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.672405481338501,
|
|
"step": 1185,
|
|
"valid_targets_mean": 15897.4,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.3175874032559381,
|
|
"grad_norm": 0.7289269797910964,
|
|
"learning_rate": 0.00014843717319060833,
|
|
"loss": 0.697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6860827207565308,
|
|
"step": 1190,
|
|
"valid_targets_mean": 15977.2,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.31892180410995463,
|
|
"grad_norm": 0.6757648389736319,
|
|
"learning_rate": 0.00014840149847693794,
|
|
"loss": 0.6903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6953767538070679,
|
|
"step": 1195,
|
|
"valid_targets_mean": 16016.2,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 0.32025620496397117,
|
|
"grad_norm": 0.8798593401470403,
|
|
"learning_rate": 0.00014836542555583628,
|
|
"loss": 0.6804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6733475923538208,
|
|
"step": 1200,
|
|
"valid_targets_mean": 15802.3,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 0.3215906058179877,
|
|
"grad_norm": 0.5337148157641388,
|
|
"learning_rate": 0.000148328954623001,
|
|
"loss": 0.6931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6732375621795654,
|
|
"step": 1205,
|
|
"valid_targets_mean": 16520.0,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 0.32292500667200424,
|
|
"grad_norm": 0.46270985298339473,
|
|
"learning_rate": 0.00014829208587628908,
|
|
"loss": 0.6782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6778278350830078,
|
|
"step": 1210,
|
|
"valid_targets_mean": 15218.8,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 0.32425940752602084,
|
|
"grad_norm": 0.34954154788174924,
|
|
"learning_rate": 0.0001482548195157156,
|
|
"loss": 0.6792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7126316428184509,
|
|
"step": 1215,
|
|
"valid_targets_mean": 14662.9,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.3255938083800374,
|
|
"grad_norm": 0.5194846979114964,
|
|
"learning_rate": 0.00014821715574345277,
|
|
"loss": 0.6724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6763893961906433,
|
|
"step": 1220,
|
|
"valid_targets_mean": 16349.8,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 0.3269282092340539,
|
|
"grad_norm": 0.7093759511996763,
|
|
"learning_rate": 0.0001481790947638288,
|
|
"loss": 0.6786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7101802825927734,
|
|
"step": 1225,
|
|
"valid_targets_mean": 15322.1,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 0.32826261008807045,
|
|
"grad_norm": 0.5192161153871548,
|
|
"learning_rate": 0.00014814063678332667,
|
|
"loss": 0.6756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6686065793037415,
|
|
"step": 1230,
|
|
"valid_targets_mean": 16233.2,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 0.329597010942087,
|
|
"grad_norm": 0.5106099067287116,
|
|
"learning_rate": 0.00014810178201058323,
|
|
"loss": 0.6734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6661728620529175,
|
|
"step": 1235,
|
|
"valid_targets_mean": 15632.0,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 0.3309314117961035,
|
|
"grad_norm": 0.4881815060864999,
|
|
"learning_rate": 0.00014806253065638786,
|
|
"loss": 0.674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6498801112174988,
|
|
"step": 1240,
|
|
"valid_targets_mean": 17617.5,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.3322658126501201,
|
|
"grad_norm": 0.39268738643883516,
|
|
"learning_rate": 0.00014802288293368148,
|
|
"loss": 0.6727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6666196584701538,
|
|
"step": 1245,
|
|
"valid_targets_mean": 16153.7,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 0.33360021350413666,
|
|
"grad_norm": 0.45417304392434,
|
|
"learning_rate": 0.0001479828390575553,
|
|
"loss": 0.6853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6876928210258484,
|
|
"step": 1250,
|
|
"valid_targets_mean": 15841.5,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.3349346143581532,
|
|
"grad_norm": 0.5268128840871439,
|
|
"learning_rate": 0.00014794239924524968,
|
|
"loss": 0.6826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6616125106811523,
|
|
"step": 1255,
|
|
"valid_targets_mean": 16973.3,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 0.33626901521216973,
|
|
"grad_norm": 0.39958555391425904,
|
|
"learning_rate": 0.00014790156371615303,
|
|
"loss": 0.6851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6925134062767029,
|
|
"step": 1260,
|
|
"valid_targets_mean": 16514.4,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 0.33760341606618627,
|
|
"grad_norm": 0.5516259004118991,
|
|
"learning_rate": 0.00014786033269180044,
|
|
"loss": 0.6656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.663733959197998,
|
|
"step": 1265,
|
|
"valid_targets_mean": 16839.6,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 0.3389378169202028,
|
|
"grad_norm": 0.5390064885544619,
|
|
"learning_rate": 0.00014781870639587262,
|
|
"loss": 0.688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6990649700164795,
|
|
"step": 1270,
|
|
"valid_targets_mean": 15894.0,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 0.3402722177742194,
|
|
"grad_norm": 0.5509217844316656,
|
|
"learning_rate": 0.0001477766850541947,
|
|
"loss": 0.703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6773085594177246,
|
|
"step": 1275,
|
|
"valid_targets_mean": 17158.7,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 0.34160661862823594,
|
|
"grad_norm": 0.4521786173995471,
|
|
"learning_rate": 0.00014773426889473493,
|
|
"loss": 0.6944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6869018077850342,
|
|
"step": 1280,
|
|
"valid_targets_mean": 15783.5,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 0.3429410194822525,
|
|
"grad_norm": 0.45400640165875983,
|
|
"learning_rate": 0.0001476914581476034,
|
|
"loss": 0.7053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6948401927947998,
|
|
"step": 1285,
|
|
"valid_targets_mean": 16444.6,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 0.344275420336269,
|
|
"grad_norm": 0.362088704550205,
|
|
"learning_rate": 0.000147648253045051,
|
|
"loss": 0.6806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6431831121444702,
|
|
"step": 1290,
|
|
"valid_targets_mean": 16384.6,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 0.34560982119028555,
|
|
"grad_norm": 0.48893960485771915,
|
|
"learning_rate": 0.0001476046538214679,
|
|
"loss": 0.6874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7072966694831848,
|
|
"step": 1295,
|
|
"valid_targets_mean": 14666.8,
|
|
"valid_targets_min": 152
|
|
},
|
|
{
|
|
"epoch": 0.3469442220443021,
|
|
"grad_norm": 0.41898598637304735,
|
|
"learning_rate": 0.00014756066071338247,
|
|
"loss": 0.7022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7229539155960083,
|
|
"step": 1300,
|
|
"valid_targets_mean": 15230.1,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 0.3482786228983187,
|
|
"grad_norm": 0.3188940756358046,
|
|
"learning_rate": 0.00014751627395945984,
|
|
"loss": 0.6833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6817530393600464,
|
|
"step": 1305,
|
|
"valid_targets_mean": 16233.5,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 0.3496130237523352,
|
|
"grad_norm": 0.4933450593141746,
|
|
"learning_rate": 0.0001474714938005008,
|
|
"loss": 0.6801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7076213359832764,
|
|
"step": 1310,
|
|
"valid_targets_mean": 15680.3,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 0.35094742460635175,
|
|
"grad_norm": 0.7583732357714017,
|
|
"learning_rate": 0.00014742632047944033,
|
|
"loss": 0.7026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7003888487815857,
|
|
"step": 1315,
|
|
"valid_targets_mean": 15842.3,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 0.3522818254603683,
|
|
"grad_norm": 0.5317718895846837,
|
|
"learning_rate": 0.00014738075424134634,
|
|
"loss": 0.6867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.691694438457489,
|
|
"step": 1320,
|
|
"valid_targets_mean": 15884.4,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.35361622631438483,
|
|
"grad_norm": 0.3501119585201358,
|
|
"learning_rate": 0.00014733479533341837,
|
|
"loss": 0.6798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6872516870498657,
|
|
"step": 1325,
|
|
"valid_targets_mean": 15958.3,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 0.35495062716840137,
|
|
"grad_norm": 1.00059795566324,
|
|
"learning_rate": 0.00014728844400498616,
|
|
"loss": 0.6892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6725267767906189,
|
|
"step": 1330,
|
|
"valid_targets_mean": 17093.4,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 0.35628502802241796,
|
|
"grad_norm": 0.48989419606651297,
|
|
"learning_rate": 0.00014724170050750836,
|
|
"loss": 0.6817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6638458967208862,
|
|
"step": 1335,
|
|
"valid_targets_mean": 16706.3,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 0.3576194288764345,
|
|
"grad_norm": 0.7494594095135862,
|
|
"learning_rate": 0.00014719456509457122,
|
|
"loss": 0.6923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6833698749542236,
|
|
"step": 1340,
|
|
"valid_targets_mean": 15229.3,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 0.35895382973045104,
|
|
"grad_norm": 0.37983172533408244,
|
|
"learning_rate": 0.00014714703802188713,
|
|
"loss": 0.6856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6993082165718079,
|
|
"step": 1345,
|
|
"valid_targets_mean": 16149.0,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 0.3602882305844676,
|
|
"grad_norm": 0.3465469659019351,
|
|
"learning_rate": 0.0001470991195472932,
|
|
"loss": 0.6926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6387639045715332,
|
|
"step": 1350,
|
|
"valid_targets_mean": 17439.2,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 0.3616226314384841,
|
|
"grad_norm": 0.5631916032503279,
|
|
"learning_rate": 0.00014705080993075,
|
|
"loss": 0.6797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6804531812667847,
|
|
"step": 1355,
|
|
"valid_targets_mean": 16052.1,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 0.36295703229250065,
|
|
"grad_norm": 0.37277054991570224,
|
|
"learning_rate": 0.00014700210943433998,
|
|
"loss": 0.6887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6752945184707642,
|
|
"step": 1360,
|
|
"valid_targets_mean": 16611.9,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 0.36429143314651724,
|
|
"grad_norm": 0.3293587026381126,
|
|
"learning_rate": 0.00014695301832226627,
|
|
"loss": 0.6946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.674695611000061,
|
|
"step": 1365,
|
|
"valid_targets_mean": 15535.9,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 0.3656258340005338,
|
|
"grad_norm": 0.4365531221583474,
|
|
"learning_rate": 0.00014690353686085098,
|
|
"loss": 0.6784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6803010702133179,
|
|
"step": 1370,
|
|
"valid_targets_mean": 15812.1,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 0.3669602348545503,
|
|
"grad_norm": 0.7908663899399473,
|
|
"learning_rate": 0.00014685366531853395,
|
|
"loss": 0.6851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6726727485656738,
|
|
"step": 1375,
|
|
"valid_targets_mean": 15403.2,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 0.36829463570856685,
|
|
"grad_norm": 1.3815189102486642,
|
|
"learning_rate": 0.00014680340396587118,
|
|
"loss": 0.6846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7047728300094604,
|
|
"step": 1380,
|
|
"valid_targets_mean": 15841.1,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 0.3696290365625834,
|
|
"grad_norm": 0.40461431595371766,
|
|
"learning_rate": 0.0001467527530755335,
|
|
"loss": 0.6702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6716399192810059,
|
|
"step": 1385,
|
|
"valid_targets_mean": 16679.5,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 0.37096343741659993,
|
|
"grad_norm": 0.4068160169676624,
|
|
"learning_rate": 0.000146701712922305,
|
|
"loss": 0.6769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.665133535861969,
|
|
"step": 1390,
|
|
"valid_targets_mean": 16473.8,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 0.37229783827061647,
|
|
"grad_norm": 0.3550489059286771,
|
|
"learning_rate": 0.00014665028378308138,
|
|
"loss": 0.6821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.682301938533783,
|
|
"step": 1395,
|
|
"valid_targets_mean": 15832.7,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 0.37363223912463306,
|
|
"grad_norm": 0.4178343093762636,
|
|
"learning_rate": 0.00014659846593686885,
|
|
"loss": 0.6841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6964566707611084,
|
|
"step": 1400,
|
|
"valid_targets_mean": 15847.7,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 0.3749666399786496,
|
|
"grad_norm": 0.401979565184018,
|
|
"learning_rate": 0.0001465462596647822,
|
|
"loss": 0.6889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.677052915096283,
|
|
"step": 1405,
|
|
"valid_targets_mean": 15447.2,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 0.37630104083266613,
|
|
"grad_norm": 0.5448181658750829,
|
|
"learning_rate": 0.0001464936652500435,
|
|
"loss": 0.6584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6615155339241028,
|
|
"step": 1410,
|
|
"valid_targets_mean": 16624.9,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.37763544168668267,
|
|
"grad_norm": 0.5579138632655442,
|
|
"learning_rate": 0.0001464406829779806,
|
|
"loss": 0.6749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.674919843673706,
|
|
"step": 1415,
|
|
"valid_targets_mean": 15348.1,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.3789698425406992,
|
|
"grad_norm": 0.39013043281827303,
|
|
"learning_rate": 0.0001463873131360254,
|
|
"loss": 0.6687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6693284511566162,
|
|
"step": 1420,
|
|
"valid_targets_mean": 16909.7,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 0.38030424339471575,
|
|
"grad_norm": 0.5011272124507244,
|
|
"learning_rate": 0.0001463335560137124,
|
|
"loss": 0.6747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6802507042884827,
|
|
"step": 1425,
|
|
"valid_targets_mean": 16083.5,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 0.38163864424873234,
|
|
"grad_norm": 0.5017249688669775,
|
|
"learning_rate": 0.00014627941190267717,
|
|
"loss": 0.6746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.670562744140625,
|
|
"step": 1430,
|
|
"valid_targets_mean": 16523.4,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.3829730451027489,
|
|
"grad_norm": 0.4734565806098903,
|
|
"learning_rate": 0.00014622488109665468,
|
|
"loss": 0.6876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6966683864593506,
|
|
"step": 1435,
|
|
"valid_targets_mean": 16505.3,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 0.3843074459567654,
|
|
"grad_norm": 0.6108752114255883,
|
|
"learning_rate": 0.0001461699638914777,
|
|
"loss": 0.6794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6841604709625244,
|
|
"step": 1440,
|
|
"valid_targets_mean": 15593.2,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 0.38564184681078195,
|
|
"grad_norm": 0.6424892327231314,
|
|
"learning_rate": 0.00014611466058507536,
|
|
"loss": 0.674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6654868125915527,
|
|
"step": 1445,
|
|
"valid_targets_mean": 15160.8,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 0.3869762476647985,
|
|
"grad_norm": 0.9055516118079188,
|
|
"learning_rate": 0.00014605897147747132,
|
|
"loss": 0.6856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6786572933197021,
|
|
"step": 1450,
|
|
"valid_targets_mean": 15595.1,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 0.388310648518815,
|
|
"grad_norm": 0.8543812922261468,
|
|
"learning_rate": 0.0001460028968707822,
|
|
"loss": 0.6903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7012844681739807,
|
|
"step": 1455,
|
|
"valid_targets_mean": 14374.6,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 0.3896450493728316,
|
|
"grad_norm": 0.8972686658958234,
|
|
"learning_rate": 0.0001459464370692161,
|
|
"loss": 0.676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.676931619644165,
|
|
"step": 1460,
|
|
"valid_targets_mean": 16331.3,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 0.39097945022684816,
|
|
"grad_norm": 0.7135716327186163,
|
|
"learning_rate": 0.0001458895923790707,
|
|
"loss": 0.6781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7019766569137573,
|
|
"step": 1465,
|
|
"valid_targets_mean": 15569.0,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 0.3923138510808647,
|
|
"grad_norm": 0.6691202341390122,
|
|
"learning_rate": 0.0001458323631087318,
|
|
"loss": 0.6753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6568318009376526,
|
|
"step": 1470,
|
|
"valid_targets_mean": 16415.5,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 0.39364825193488123,
|
|
"grad_norm": 0.7612668292031027,
|
|
"learning_rate": 0.00014577474956867155,
|
|
"loss": 0.6575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6636608839035034,
|
|
"step": 1475,
|
|
"valid_targets_mean": 15385.0,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 0.39498265278889777,
|
|
"grad_norm": 0.6687924429811246,
|
|
"learning_rate": 0.00014571675207144676,
|
|
"loss": 0.6844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6870772838592529,
|
|
"step": 1480,
|
|
"valid_targets_mean": 14585.1,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 0.3963170536429143,
|
|
"grad_norm": 0.7648107865260791,
|
|
"learning_rate": 0.00014565837093169728,
|
|
"loss": 0.6888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6783605813980103,
|
|
"step": 1485,
|
|
"valid_targets_mean": 16069.2,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 0.3976514544969309,
|
|
"grad_norm": 0.7509803855864979,
|
|
"learning_rate": 0.00014559960646614418,
|
|
"loss": 0.6885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.670478343963623,
|
|
"step": 1490,
|
|
"valid_targets_mean": 15756.6,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 0.39898585535094744,
|
|
"grad_norm": 0.5057989841414792,
|
|
"learning_rate": 0.00014554045899358814,
|
|
"loss": 0.6761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6895899772644043,
|
|
"step": 1495,
|
|
"valid_targets_mean": 16583.3,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 0.400320256204964,
|
|
"grad_norm": 0.7744390589162883,
|
|
"learning_rate": 0.00014548092883490772,
|
|
"loss": 0.6789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7109715938568115,
|
|
"step": 1500,
|
|
"valid_targets_mean": 16087.1,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 0.4016546570589805,
|
|
"grad_norm": 0.5876072256129112,
|
|
"learning_rate": 0.00014542101631305745,
|
|
"loss": 0.691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.69822758436203,
|
|
"step": 1505,
|
|
"valid_targets_mean": 14998.5,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 0.40298905791299705,
|
|
"grad_norm": 0.6284258613098378,
|
|
"learning_rate": 0.00014536072175306628,
|
|
"loss": 0.6722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6593598127365112,
|
|
"step": 1510,
|
|
"valid_targets_mean": 15673.6,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 0.4043234587670136,
|
|
"grad_norm": 0.514432212840916,
|
|
"learning_rate": 0.00014530004548203573,
|
|
"loss": 0.6638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6542955040931702,
|
|
"step": 1515,
|
|
"valid_targets_mean": 15986.3,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 0.4056578596210302,
|
|
"grad_norm": 0.5934282118071859,
|
|
"learning_rate": 0.00014523898782913818,
|
|
"loss": 0.6935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6390407681465149,
|
|
"step": 1520,
|
|
"valid_targets_mean": 17350.2,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 0.4069922604750467,
|
|
"grad_norm": 0.4228690202369281,
|
|
"learning_rate": 0.00014517754912561496,
|
|
"loss": 0.679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6939337849617004,
|
|
"step": 1525,
|
|
"valid_targets_mean": 16316.9,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 0.40832666132906326,
|
|
"grad_norm": 0.33703841726812556,
|
|
"learning_rate": 0.00014511572970477457,
|
|
"loss": 0.6636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6497060060501099,
|
|
"step": 1530,
|
|
"valid_targets_mean": 16643.3,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 0.4096610621830798,
|
|
"grad_norm": 0.526601768832117,
|
|
"learning_rate": 0.00014505352990199107,
|
|
"loss": 0.6777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6721259355545044,
|
|
"step": 1535,
|
|
"valid_targets_mean": 16599.3,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 0.41099546303709633,
|
|
"grad_norm": 0.3717319051316816,
|
|
"learning_rate": 0.000144990950054702,
|
|
"loss": 0.675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6512713432312012,
|
|
"step": 1540,
|
|
"valid_targets_mean": 15734.2,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 0.41232986389111287,
|
|
"grad_norm": 0.3985153443670721,
|
|
"learning_rate": 0.0001449279905024067,
|
|
"loss": 0.6802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.680419921875,
|
|
"step": 1545,
|
|
"valid_targets_mean": 17000.0,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 0.41366426474512946,
|
|
"grad_norm": 0.5620426536574632,
|
|
"learning_rate": 0.00014486465158666443,
|
|
"loss": 0.6786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6959854364395142,
|
|
"step": 1550,
|
|
"valid_targets_mean": 15257.2,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 0.414998665599146,
|
|
"grad_norm": 0.5016374460094373,
|
|
"learning_rate": 0.00014480093365109252,
|
|
"loss": 0.6965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.681014895439148,
|
|
"step": 1555,
|
|
"valid_targets_mean": 16161.4,
|
|
"valid_targets_min": 82
|
|
},
|
|
{
|
|
"epoch": 0.41633306645316254,
|
|
"grad_norm": 0.45848828762812643,
|
|
"learning_rate": 0.00014473683704136454,
|
|
"loss": 0.6732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6678402423858643,
|
|
"step": 1560,
|
|
"valid_targets_mean": 15469.2,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 0.4176674673071791,
|
|
"grad_norm": 0.4510235067643482,
|
|
"learning_rate": 0.0001446723621052083,
|
|
"loss": 0.6729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6836432218551636,
|
|
"step": 1565,
|
|
"valid_targets_mean": 15537.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 0.4190018681611956,
|
|
"grad_norm": 0.2749325178114193,
|
|
"learning_rate": 0.00014460750919240416,
|
|
"loss": 0.661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6491097211837769,
|
|
"step": 1570,
|
|
"valid_targets_mean": 16182.7,
|
|
"valid_targets_min": 122
|
|
},
|
|
{
|
|
"epoch": 0.42033626901521215,
|
|
"grad_norm": 0.4048570514955066,
|
|
"learning_rate": 0.00014454227865478292,
|
|
"loss": 0.6766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6953452229499817,
|
|
"step": 1575,
|
|
"valid_targets_mean": 15749.0,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.42167066986922874,
|
|
"grad_norm": 0.43507105267463514,
|
|
"learning_rate": 0.0001444766708462241,
|
|
"loss": 0.6901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6905909776687622,
|
|
"step": 1580,
|
|
"valid_targets_mean": 15047.8,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 0.4230050707232453,
|
|
"grad_norm": 0.4023428012328776,
|
|
"learning_rate": 0.0001444106861226539,
|
|
"loss": 0.6758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6905125379562378,
|
|
"step": 1585,
|
|
"valid_targets_mean": 16260.8,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.4243394715772618,
|
|
"grad_norm": 0.32040888502490367,
|
|
"learning_rate": 0.00014434432484204335,
|
|
"loss": 0.669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6988633275032043,
|
|
"step": 1590,
|
|
"valid_targets_mean": 15973.1,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.42567387243127836,
|
|
"grad_norm": 0.36245431872562145,
|
|
"learning_rate": 0.0001442775873644062,
|
|
"loss": 0.673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6784532070159912,
|
|
"step": 1595,
|
|
"valid_targets_mean": 16945.1,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 0.4270082732852949,
|
|
"grad_norm": 0.7427033015141541,
|
|
"learning_rate": 0.00014421047405179725,
|
|
"loss": 0.6803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6956072449684143,
|
|
"step": 1600,
|
|
"valid_targets_mean": 16470.8,
|
|
"valid_targets_min": 118
|
|
},
|
|
{
|
|
"epoch": 0.42834267413931143,
|
|
"grad_norm": 0.693817834423677,
|
|
"learning_rate": 0.00014414298526831012,
|
|
"loss": 0.6798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6564275622367859,
|
|
"step": 1605,
|
|
"valid_targets_mean": 16161.4,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 0.429677074993328,
|
|
"grad_norm": 0.5459010546987341,
|
|
"learning_rate": 0.00014407512138007547,
|
|
"loss": 0.6844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.674811065196991,
|
|
"step": 1610,
|
|
"valid_targets_mean": 15652.2,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 0.43101147584734456,
|
|
"grad_norm": 0.5203042449073534,
|
|
"learning_rate": 0.0001440068827552588,
|
|
"loss": 0.6608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6763057708740234,
|
|
"step": 1615,
|
|
"valid_targets_mean": 15514.4,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 0.4323458767013611,
|
|
"grad_norm": 0.5675075818567459,
|
|
"learning_rate": 0.0001439382697640587,
|
|
"loss": 0.6947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6874855160713196,
|
|
"step": 1620,
|
|
"valid_targets_mean": 15263.9,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 0.43368027755537764,
|
|
"grad_norm": 0.5359889737901847,
|
|
"learning_rate": 0.00014386928277870461,
|
|
"loss": 0.656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.669428825378418,
|
|
"step": 1625,
|
|
"valid_targets_mean": 16017.4,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.4350146784093942,
|
|
"grad_norm": 0.5408636943550875,
|
|
"learning_rate": 0.00014379992217345504,
|
|
"loss": 0.6933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.659244954586029,
|
|
"step": 1630,
|
|
"valid_targets_mean": 15961.6,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 0.4363490792634107,
|
|
"grad_norm": 0.37778085390205296,
|
|
"learning_rate": 0.0001437301883245953,
|
|
"loss": 0.6755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6465282440185547,
|
|
"step": 1635,
|
|
"valid_targets_mean": 16366.9,
|
|
"valid_targets_min": 70
|
|
},
|
|
{
|
|
"epoch": 0.43768348011742725,
|
|
"grad_norm": 0.6428136358402293,
|
|
"learning_rate": 0.0001436600816104356,
|
|
"loss": 0.6701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6930896639823914,
|
|
"step": 1640,
|
|
"valid_targets_mean": 16816.4,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 0.43901788097144384,
|
|
"grad_norm": 0.485887803251701,
|
|
"learning_rate": 0.000143589602411309,
|
|
"loss": 0.6714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6707935929298401,
|
|
"step": 1645,
|
|
"valid_targets_mean": 15216.7,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 0.4403522818254604,
|
|
"grad_norm": 0.3717825278137594,
|
|
"learning_rate": 0.00014351875110956927,
|
|
"loss": 0.6682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7038379907608032,
|
|
"step": 1650,
|
|
"valid_targets_mean": 15937.6,
|
|
"valid_targets_min": 381
|
|
},
|
|
{
|
|
"epoch": 0.4416866826794769,
|
|
"grad_norm": 0.4670503910336374,
|
|
"learning_rate": 0.00014344752808958887,
|
|
"loss": 0.6872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7037096619606018,
|
|
"step": 1655,
|
|
"valid_targets_mean": 15996.4,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 0.44302108353349345,
|
|
"grad_norm": 0.5130492428941893,
|
|
"learning_rate": 0.0001433759337377569,
|
|
"loss": 0.669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6803580522537231,
|
|
"step": 1660,
|
|
"valid_targets_mean": 15066.6,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 0.44435548438751,
|
|
"grad_norm": 0.42492544340198113,
|
|
"learning_rate": 0.00014330396844247685,
|
|
"loss": 0.6658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6324959993362427,
|
|
"step": 1665,
|
|
"valid_targets_mean": 17875.3,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 0.44568988524152653,
|
|
"grad_norm": 0.48243381726484286,
|
|
"learning_rate": 0.0001432316325941647,
|
|
"loss": 0.6726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6762223243713379,
|
|
"step": 1670,
|
|
"valid_targets_mean": 15398.0,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 0.4470242860955431,
|
|
"grad_norm": 0.4262271531125559,
|
|
"learning_rate": 0.00014315892658524665,
|
|
"loss": 0.6806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6871470808982849,
|
|
"step": 1675,
|
|
"valid_targets_mean": 15288.2,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 0.44835868694955966,
|
|
"grad_norm": 0.4564467834328404,
|
|
"learning_rate": 0.0001430858508101571,
|
|
"loss": 0.6527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6515965461730957,
|
|
"step": 1680,
|
|
"valid_targets_mean": 16011.1,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 0.4496930878035762,
|
|
"grad_norm": 0.3155919440895114,
|
|
"learning_rate": 0.00014301240566533637,
|
|
"loss": 0.6911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6801489591598511,
|
|
"step": 1685,
|
|
"valid_targets_mean": 16203.3,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 0.45102748865759273,
|
|
"grad_norm": 0.39341185029050857,
|
|
"learning_rate": 0.00014293859154922866,
|
|
"loss": 0.6644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6648285388946533,
|
|
"step": 1690,
|
|
"valid_targets_mean": 15043.1,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 0.45236188951160927,
|
|
"grad_norm": 0.5458457405844297,
|
|
"learning_rate": 0.00014286440886227995,
|
|
"loss": 0.6786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7056168913841248,
|
|
"step": 1695,
|
|
"valid_targets_mean": 15284.2,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 0.4536962903656258,
|
|
"grad_norm": 0.5395188248855505,
|
|
"learning_rate": 0.00014278985800693562,
|
|
"loss": 0.6921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6924360990524292,
|
|
"step": 1700,
|
|
"valid_targets_mean": 16265.5,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 0.4550306912196424,
|
|
"grad_norm": 0.5110930142156801,
|
|
"learning_rate": 0.0001427149393876384,
|
|
"loss": 0.6653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6504957675933838,
|
|
"step": 1705,
|
|
"valid_targets_mean": 15499.0,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 0.45636509207365894,
|
|
"grad_norm": 0.38747352375826066,
|
|
"learning_rate": 0.00014263965341082622,
|
|
"loss": 0.6748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6646894812583923,
|
|
"step": 1710,
|
|
"valid_targets_mean": 16010.1,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 0.4576994929276755,
|
|
"grad_norm": 0.4157399117530123,
|
|
"learning_rate": 0.00014256400048492994,
|
|
"loss": 0.6913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.696425199508667,
|
|
"step": 1715,
|
|
"valid_targets_mean": 16692.4,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 0.459033893781692,
|
|
"grad_norm": 0.40234560772772643,
|
|
"learning_rate": 0.00014248798102037106,
|
|
"loss": 0.652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6323100924491882,
|
|
"step": 1720,
|
|
"valid_targets_mean": 16598.7,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 0.46036829463570855,
|
|
"grad_norm": 0.4105686175825738,
|
|
"learning_rate": 0.0001424115954295597,
|
|
"loss": 0.6779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6720783710479736,
|
|
"step": 1725,
|
|
"valid_targets_mean": 15398.2,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 0.4617026954897251,
|
|
"grad_norm": 0.5288517622927207,
|
|
"learning_rate": 0.00014233484412689212,
|
|
"loss": 0.6751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7059606909751892,
|
|
"step": 1730,
|
|
"valid_targets_mean": 14793.6,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 0.4630370963437417,
|
|
"grad_norm": 0.4151350926192914,
|
|
"learning_rate": 0.00014225772752874866,
|
|
"loss": 0.6628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6483746767044067,
|
|
"step": 1735,
|
|
"valid_targets_mean": 16105.2,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 0.4643714971977582,
|
|
"grad_norm": 0.6172763590468829,
|
|
"learning_rate": 0.00014218024605349133,
|
|
"loss": 0.6557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6697094440460205,
|
|
"step": 1740,
|
|
"valid_targets_mean": 16343.8,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 0.46570589805177476,
|
|
"grad_norm": 0.4928889614387702,
|
|
"learning_rate": 0.00014210240012146178,
|
|
"loss": 0.6684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6752841472625732,
|
|
"step": 1745,
|
|
"valid_targets_mean": 16355.5,
|
|
"valid_targets_min": 7
|
|
},
|
|
{
|
|
"epoch": 0.4670402989057913,
|
|
"grad_norm": 0.5095363864894301,
|
|
"learning_rate": 0.00014202419015497863,
|
|
"loss": 0.6772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6391301155090332,
|
|
"step": 1750,
|
|
"valid_targets_mean": 14801.8,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 0.46837469975980783,
|
|
"grad_norm": 0.40201996373625287,
|
|
"learning_rate": 0.00014194561657833563,
|
|
"loss": 0.6738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.663654625415802,
|
|
"step": 1755,
|
|
"valid_targets_mean": 15804.3,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 0.46970910061382437,
|
|
"grad_norm": 0.3900175407918852,
|
|
"learning_rate": 0.000141866679817799,
|
|
"loss": 0.6652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6548658609390259,
|
|
"step": 1760,
|
|
"valid_targets_mean": 16266.0,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 0.47104350146784096,
|
|
"grad_norm": 0.6979606870040465,
|
|
"learning_rate": 0.00014178738030160527,
|
|
"loss": 0.6765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7068432569503784,
|
|
"step": 1765,
|
|
"valid_targets_mean": 15426.0,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 0.4723779023218575,
|
|
"grad_norm": 0.43383345904843723,
|
|
"learning_rate": 0.000141707718459959,
|
|
"loss": 0.6717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6808011531829834,
|
|
"step": 1770,
|
|
"valid_targets_mean": 16582.9,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 0.47371230317587404,
|
|
"grad_norm": 0.45845628788358933,
|
|
"learning_rate": 0.00014162769472503032,
|
|
"loss": 0.6519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6384435892105103,
|
|
"step": 1775,
|
|
"valid_targets_mean": 16704.6,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 0.4750467040298906,
|
|
"grad_norm": 0.3777800679246702,
|
|
"learning_rate": 0.00014154730953095274,
|
|
"loss": 0.6537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6459342241287231,
|
|
"step": 1780,
|
|
"valid_targets_mean": 16872.8,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 0.4763811048839071,
|
|
"grad_norm": 0.4286481570503636,
|
|
"learning_rate": 0.0001414665633138206,
|
|
"loss": 0.6809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6782388687133789,
|
|
"step": 1785,
|
|
"valid_targets_mean": 16174.8,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 0.47771550573792365,
|
|
"grad_norm": 0.4005345889086281,
|
|
"learning_rate": 0.00014138545651168683,
|
|
"loss": 0.6836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6628920435905457,
|
|
"step": 1790,
|
|
"valid_targets_mean": 16390.7,
|
|
"valid_targets_min": 16
|
|
},
|
|
{
|
|
"epoch": 0.47904990659194024,
|
|
"grad_norm": 0.4115589182888216,
|
|
"learning_rate": 0.00014130398956456062,
|
|
"loss": 0.6641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6702592968940735,
|
|
"step": 1795,
|
|
"valid_targets_mean": 16677.2,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 0.4803843074459568,
|
|
"grad_norm": 0.5386199146943589,
|
|
"learning_rate": 0.00014122216291440496,
|
|
"loss": 0.6713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6504049301147461,
|
|
"step": 1800,
|
|
"valid_targets_mean": 16834.6,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 0.4817187082999733,
|
|
"grad_norm": 0.43594882378266525,
|
|
"learning_rate": 0.00014113997700513422,
|
|
"loss": 0.6783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6663535237312317,
|
|
"step": 1805,
|
|
"valid_targets_mean": 16627.7,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 0.48305310915398986,
|
|
"grad_norm": 0.36323537758193364,
|
|
"learning_rate": 0.00014105743228261174,
|
|
"loss": 0.6494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6430245637893677,
|
|
"step": 1810,
|
|
"valid_targets_mean": 15963.8,
|
|
"valid_targets_min": 10
|
|
},
|
|
{
|
|
"epoch": 0.4843875100080064,
|
|
"grad_norm": 0.4816559868698329,
|
|
"learning_rate": 0.00014097452919464748,
|
|
"loss": 0.6816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6982545852661133,
|
|
"step": 1815,
|
|
"valid_targets_mean": 15820.7,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 0.48572191086202293,
|
|
"grad_norm": 0.637279774432971,
|
|
"learning_rate": 0.00014089126819099557,
|
|
"loss": 0.66,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6218864917755127,
|
|
"step": 1820,
|
|
"valid_targets_mean": 15937.6,
|
|
"valid_targets_min": 72
|
|
},
|
|
{
|
|
"epoch": 0.4870563117160395,
|
|
"grad_norm": 0.46051330636380267,
|
|
"learning_rate": 0.00014080764972335184,
|
|
"loss": 0.6817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7032638788223267,
|
|
"step": 1825,
|
|
"valid_targets_mean": 16084.2,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 0.48839071257005606,
|
|
"grad_norm": 0.4185133479284188,
|
|
"learning_rate": 0.00014072367424535135,
|
|
"loss": 0.6693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.661512553691864,
|
|
"step": 1830,
|
|
"valid_targets_mean": 14985.0,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 0.4897251134240726,
|
|
"grad_norm": 0.5342467862945137,
|
|
"learning_rate": 0.000140639342212566,
|
|
"loss": 0.6695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6464630961418152,
|
|
"step": 1835,
|
|
"valid_targets_mean": 17390.7,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.49105951427808914,
|
|
"grad_norm": 0.6111007650530857,
|
|
"learning_rate": 0.000140554654082502,
|
|
"loss": 0.6667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6647217273712158,
|
|
"step": 1840,
|
|
"valid_targets_mean": 16947.6,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 0.4923939151321057,
|
|
"grad_norm": 0.3474028404883341,
|
|
"learning_rate": 0.0001404696103145975,
|
|
"loss": 0.6849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6861566305160522,
|
|
"step": 1845,
|
|
"valid_targets_mean": 15907.7,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 0.4937283159861222,
|
|
"grad_norm": 0.5451616019461446,
|
|
"learning_rate": 0.0001403842113702198,
|
|
"loss": 0.6609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6573194265365601,
|
|
"step": 1850,
|
|
"valid_targets_mean": 16478.6,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 0.49506271684013875,
|
|
"grad_norm": 0.7004212131411925,
|
|
"learning_rate": 0.00014029845771266325,
|
|
"loss": 0.6801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6477423906326294,
|
|
"step": 1855,
|
|
"valid_targets_mean": 15260.8,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 0.49639711769415534,
|
|
"grad_norm": 0.7048434342299513,
|
|
"learning_rate": 0.00014021234980714648,
|
|
"loss": 0.6827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6603525876998901,
|
|
"step": 1860,
|
|
"valid_targets_mean": 15962.9,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 0.4977315185481719,
|
|
"grad_norm": 0.756775978206065,
|
|
"learning_rate": 0.00014012588812080992,
|
|
"loss": 0.6815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6624361276626587,
|
|
"step": 1865,
|
|
"valid_targets_mean": 16108.6,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 0.4990659194021884,
|
|
"grad_norm": 0.5783522972899093,
|
|
"learning_rate": 0.00014003907312271319,
|
|
"loss": 0.6667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6649295091629028,
|
|
"step": 1870,
|
|
"valid_targets_mean": 16909.0,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 0.500400320256205,
|
|
"grad_norm": 0.7832629887077739,
|
|
"learning_rate": 0.0001399519052838329,
|
|
"loss": 0.6664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6546527147293091,
|
|
"step": 1875,
|
|
"valid_targets_mean": 16595.7,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 0.5017347211102215,
|
|
"grad_norm": 0.939021148548249,
|
|
"learning_rate": 0.00013986438507705958,
|
|
"loss": 0.6742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6604918241500854,
|
|
"step": 1880,
|
|
"valid_targets_mean": 15948.0,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 0.503069121964238,
|
|
"grad_norm": 0.8662965417826878,
|
|
"learning_rate": 0.00013977651297719548,
|
|
"loss": 0.6739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6684395670890808,
|
|
"step": 1885,
|
|
"valid_targets_mean": 16201.3,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 0.5044035228182546,
|
|
"grad_norm": 0.9088217892111629,
|
|
"learning_rate": 0.00013968828946095193,
|
|
"loss": 0.6594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6788122653961182,
|
|
"step": 1890,
|
|
"valid_targets_mean": 15575.2,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 0.5057379236722711,
|
|
"grad_norm": 0.6344513905134873,
|
|
"learning_rate": 0.00013959971500694668,
|
|
"loss": 0.6672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6416043043136597,
|
|
"step": 1895,
|
|
"valid_targets_mean": 15835.0,
|
|
"valid_targets_min": 12
|
|
},
|
|
{
|
|
"epoch": 0.5070723245262877,
|
|
"grad_norm": 0.8336148657488304,
|
|
"learning_rate": 0.00013951079009570132,
|
|
"loss": 0.67,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6694490909576416,
|
|
"step": 1900,
|
|
"valid_targets_mean": 16032.9,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 0.5084067253803043,
|
|
"grad_norm": 0.6260500989696588,
|
|
"learning_rate": 0.00013942151520963868,
|
|
"loss": 0.6659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6914685964584351,
|
|
"step": 1905,
|
|
"valid_targets_mean": 16321.1,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 0.5097411262343208,
|
|
"grad_norm": 0.7405051356611972,
|
|
"learning_rate": 0.00013933189083308031,
|
|
"loss": 0.6738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7031766176223755,
|
|
"step": 1910,
|
|
"valid_targets_mean": 17288.0,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 0.5110755270883374,
|
|
"grad_norm": 0.6086327457057731,
|
|
"learning_rate": 0.00013924191745224364,
|
|
"loss": 0.6608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6749123334884644,
|
|
"step": 1915,
|
|
"valid_targets_mean": 15469.2,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 0.5124099279423538,
|
|
"grad_norm": 0.6877904338270722,
|
|
"learning_rate": 0.00013915159555523957,
|
|
"loss": 0.6848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6368306279182434,
|
|
"step": 1920,
|
|
"valid_targets_mean": 16919.3,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 0.5137443287963704,
|
|
"grad_norm": 0.47035305733386035,
|
|
"learning_rate": 0.00013906092563206968,
|
|
"loss": 0.6524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6483859419822693,
|
|
"step": 1925,
|
|
"valid_targets_mean": 15656.5,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 0.5150787296503869,
|
|
"grad_norm": 0.45402330651367906,
|
|
"learning_rate": 0.00013896990817462363,
|
|
"loss": 0.6795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7136232256889343,
|
|
"step": 1930,
|
|
"valid_targets_mean": 16150.9,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.5164131305044035,
|
|
"grad_norm": 0.7824076954450869,
|
|
"learning_rate": 0.00013887854367667645,
|
|
"loss": 0.6714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6787303686141968,
|
|
"step": 1935,
|
|
"valid_targets_mean": 16542.0,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 0.5177475313584201,
|
|
"grad_norm": 0.5056478751091026,
|
|
"learning_rate": 0.00013878683263388587,
|
|
"loss": 0.6747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6699013113975525,
|
|
"step": 1940,
|
|
"valid_targets_mean": 14940.9,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 0.5190819322124366,
|
|
"grad_norm": 0.5252915459634886,
|
|
"learning_rate": 0.00013869477554378972,
|
|
"loss": 0.6727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6794301271438599,
|
|
"step": 1945,
|
|
"valid_targets_mean": 14674.8,
|
|
"valid_targets_min": 106
|
|
},
|
|
{
|
|
"epoch": 0.5204163330664532,
|
|
"grad_norm": 0.38122775082529703,
|
|
"learning_rate": 0.00013860237290580306,
|
|
"loss": 0.6532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6353006958961487,
|
|
"step": 1950,
|
|
"valid_targets_mean": 16347.3,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 0.5217507339204697,
|
|
"grad_norm": 0.4481900683975625,
|
|
"learning_rate": 0.00013850962522121569,
|
|
"loss": 0.679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6387051939964294,
|
|
"step": 1955,
|
|
"valid_targets_mean": 16739.2,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 0.5230851347744863,
|
|
"grad_norm": 0.3884767558072463,
|
|
"learning_rate": 0.00013841653299318915,
|
|
"loss": 0.6655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7000205516815186,
|
|
"step": 1960,
|
|
"valid_targets_mean": 16349.4,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 0.5244195356285029,
|
|
"grad_norm": 0.3751791438449631,
|
|
"learning_rate": 0.00013832309672675428,
|
|
"loss": 0.6503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6721636652946472,
|
|
"step": 1965,
|
|
"valid_targets_mean": 15760.5,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 0.5257539364825193,
|
|
"grad_norm": 0.5018554381804197,
|
|
"learning_rate": 0.00013822931692880828,
|
|
"loss": 0.6734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6598831415176392,
|
|
"step": 1970,
|
|
"valid_targets_mean": 16458.7,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 0.5270883373365359,
|
|
"grad_norm": 0.43344427993303225,
|
|
"learning_rate": 0.00013813519410811208,
|
|
"loss": 0.6681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6558117866516113,
|
|
"step": 1975,
|
|
"valid_targets_mean": 15469.7,
|
|
"valid_targets_min": 99
|
|
},
|
|
{
|
|
"epoch": 0.5284227381905524,
|
|
"grad_norm": 0.40192550153987733,
|
|
"learning_rate": 0.00013804072877528746,
|
|
"loss": 0.6816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6607159376144409,
|
|
"step": 1980,
|
|
"valid_targets_mean": 16332.3,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 0.529757139044569,
|
|
"grad_norm": 0.3261496597805123,
|
|
"learning_rate": 0.0001379459214428144,
|
|
"loss": 0.6596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6428033113479614,
|
|
"step": 1985,
|
|
"valid_targets_mean": 15988.9,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 0.5310915398985855,
|
|
"grad_norm": 0.2827103987399224,
|
|
"learning_rate": 0.0001378507726250283,
|
|
"loss": 0.6697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6552940011024475,
|
|
"step": 1990,
|
|
"valid_targets_mean": 16928.4,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 0.5324259407526021,
|
|
"grad_norm": 0.4131312586238756,
|
|
"learning_rate": 0.00013775528283811695,
|
|
"loss": 0.6639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6279177665710449,
|
|
"step": 1995,
|
|
"valid_targets_mean": 17712.5,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 0.5337603416066187,
|
|
"grad_norm": 0.4328575262764605,
|
|
"learning_rate": 0.00013765945260011815,
|
|
"loss": 0.6693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6813353300094604,
|
|
"step": 2000,
|
|
"valid_targets_mean": 15772.1,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 0.5350947424606352,
|
|
"grad_norm": 0.4303463028986665,
|
|
"learning_rate": 0.0001375632824309165,
|
|
"loss": 0.6642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6382510662078857,
|
|
"step": 2005,
|
|
"valid_targets_mean": 17243.4,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 0.5364291433146517,
|
|
"grad_norm": 0.31754546536948197,
|
|
"learning_rate": 0.00013746677285224082,
|
|
"loss": 0.6661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6832298636436462,
|
|
"step": 2010,
|
|
"valid_targets_mean": 15342.7,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 0.5377635441686682,
|
|
"grad_norm": 0.3224539660495597,
|
|
"learning_rate": 0.00013736992438766123,
|
|
"loss": 0.658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6509510278701782,
|
|
"step": 2015,
|
|
"valid_targets_mean": 15682.6,
|
|
"valid_targets_min": 20
|
|
},
|
|
{
|
|
"epoch": 0.5390979450226848,
|
|
"grad_norm": 0.3476987488221845,
|
|
"learning_rate": 0.00013727273756258633,
|
|
"loss": 0.6595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6609938144683838,
|
|
"step": 2020,
|
|
"valid_targets_mean": 16596.7,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 0.5404323458767014,
|
|
"grad_norm": 0.3341925429496843,
|
|
"learning_rate": 0.00013717521290426034,
|
|
"loss": 0.6584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6978187561035156,
|
|
"step": 2025,
|
|
"valid_targets_mean": 17569.2,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 0.5417667467307179,
|
|
"grad_norm": 0.31980325681357635,
|
|
"learning_rate": 0.00013707735094176026,
|
|
"loss": 0.663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6462590098381042,
|
|
"step": 2030,
|
|
"valid_targets_mean": 17217.4,
|
|
"valid_targets_min": 180
|
|
},
|
|
{
|
|
"epoch": 0.5431011475847345,
|
|
"grad_norm": 0.504110733594983,
|
|
"learning_rate": 0.00013697915220599294,
|
|
"loss": 0.6694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6722946763038635,
|
|
"step": 2035,
|
|
"valid_targets_mean": 15108.6,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 0.544435548438751,
|
|
"grad_norm": 0.39427036155645856,
|
|
"learning_rate": 0.0001368806172296923,
|
|
"loss": 0.6617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6628670692443848,
|
|
"step": 2040,
|
|
"valid_targets_mean": 17106.6,
|
|
"valid_targets_min": 184
|
|
},
|
|
{
|
|
"epoch": 0.5457699492927676,
|
|
"grad_norm": 0.3798470884563092,
|
|
"learning_rate": 0.00013678174654741638,
|
|
"loss": 0.6478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6374893188476562,
|
|
"step": 2045,
|
|
"valid_targets_mean": 17227.5,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 0.547104350146784,
|
|
"grad_norm": 0.3542392713845455,
|
|
"learning_rate": 0.00013668254069554439,
|
|
"loss": 0.6764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6546074748039246,
|
|
"step": 2050,
|
|
"valid_targets_mean": 15193.7,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 0.5484387510008006,
|
|
"grad_norm": 0.3049919714486412,
|
|
"learning_rate": 0.00013658300021227387,
|
|
"loss": 0.6746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6780501008033752,
|
|
"step": 2055,
|
|
"valid_targets_mean": 16047.9,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 0.5497731518548172,
|
|
"grad_norm": 0.4094752526440804,
|
|
"learning_rate": 0.00013648312563761784,
|
|
"loss": 0.6497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.672895073890686,
|
|
"step": 2060,
|
|
"valid_targets_mean": 15821.3,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 0.5511075527088337,
|
|
"grad_norm": 0.2885708539239041,
|
|
"learning_rate": 0.0001363829175134017,
|
|
"loss": 0.6569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6362361907958984,
|
|
"step": 2065,
|
|
"valid_targets_mean": 16060.2,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 0.5524419535628503,
|
|
"grad_norm": 0.3679911308247253,
|
|
"learning_rate": 0.00013628237638326038,
|
|
"loss": 0.6684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6490753889083862,
|
|
"step": 2070,
|
|
"valid_targets_mean": 16748.8,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 0.5537763544168668,
|
|
"grad_norm": 0.4534861336884921,
|
|
"learning_rate": 0.0001361815027926354,
|
|
"loss": 0.6607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6785265207290649,
|
|
"step": 2075,
|
|
"valid_targets_mean": 17079.0,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 0.5551107552708834,
|
|
"grad_norm": 0.44250368563770404,
|
|
"learning_rate": 0.00013608029728877195,
|
|
"loss": 0.6572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6550644636154175,
|
|
"step": 2080,
|
|
"valid_targets_mean": 16177.8,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 0.5564451561248999,
|
|
"grad_norm": 0.3950264297661988,
|
|
"learning_rate": 0.00013597876042071574,
|
|
"loss": 0.6703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6962975263595581,
|
|
"step": 2085,
|
|
"valid_targets_mean": 14695.6,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 0.5577795569789165,
|
|
"grad_norm": 0.4343603248782463,
|
|
"learning_rate": 0.00013587689273931032,
|
|
"loss": 0.6631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6817919015884399,
|
|
"step": 2090,
|
|
"valid_targets_mean": 15244.0,
|
|
"valid_targets_min": 111
|
|
},
|
|
{
|
|
"epoch": 0.559113957832933,
|
|
"grad_norm": 0.3778902089930458,
|
|
"learning_rate": 0.00013577469479719376,
|
|
"loss": 0.6818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6776700615882874,
|
|
"step": 2095,
|
|
"valid_targets_mean": 16056.0,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 0.5604483586869495,
|
|
"grad_norm": 0.4665567574775196,
|
|
"learning_rate": 0.00013567216714879593,
|
|
"loss": 0.6575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.634872317314148,
|
|
"step": 2100,
|
|
"valid_targets_mean": 16989.1,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 0.5617827595409661,
|
|
"grad_norm": 6.260037404534871,
|
|
"learning_rate": 0.00013556931035033526,
|
|
"loss": 0.6993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7710926532745361,
|
|
"step": 2105,
|
|
"valid_targets_mean": 15841.9,
|
|
"valid_targets_min": 95
|
|
},
|
|
{
|
|
"epoch": 0.5631171603949826,
|
|
"grad_norm": 0.7013102551903544,
|
|
"learning_rate": 0.00013546612495981603,
|
|
"loss": 0.6811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6613208055496216,
|
|
"step": 2110,
|
|
"valid_targets_mean": 15519.8,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 0.5644515612489992,
|
|
"grad_norm": 0.47711928148353344,
|
|
"learning_rate": 0.00013536261153702494,
|
|
"loss": 0.661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.666735053062439,
|
|
"step": 2115,
|
|
"valid_targets_mean": 15697.4,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 0.5657859621030158,
|
|
"grad_norm": 0.6530144831314217,
|
|
"learning_rate": 0.0001352587706435284,
|
|
"loss": 0.6495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6208878755569458,
|
|
"step": 2120,
|
|
"valid_targets_mean": 15542.3,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 0.5671203629570323,
|
|
"grad_norm": 0.4575924813341875,
|
|
"learning_rate": 0.00013515460284266933,
|
|
"loss": 0.6653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6797025203704834,
|
|
"step": 2125,
|
|
"valid_targets_mean": 15705.2,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 0.5684547638110489,
|
|
"grad_norm": 0.4181593612220908,
|
|
"learning_rate": 0.0001350501086995642,
|
|
"loss": 0.6567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6510787606239319,
|
|
"step": 2130,
|
|
"valid_targets_mean": 16198.4,
|
|
"valid_targets_min": 101
|
|
},
|
|
{
|
|
"epoch": 0.5697891646650654,
|
|
"grad_norm": 0.5909184489265575,
|
|
"learning_rate": 0.00013494528878109978,
|
|
"loss": 0.6756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6520047783851624,
|
|
"step": 2135,
|
|
"valid_targets_mean": 15843.7,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 0.5711235655190819,
|
|
"grad_norm": 0.6030856727277748,
|
|
"learning_rate": 0.00013484014365593036,
|
|
"loss": 0.6668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6336778402328491,
|
|
"step": 2140,
|
|
"valid_targets_mean": 16958.3,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 0.5724579663730984,
|
|
"grad_norm": 0.4515248521744553,
|
|
"learning_rate": 0.00013473467389447436,
|
|
"loss": 0.6559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6533001661300659,
|
|
"step": 2145,
|
|
"valid_targets_mean": 15690.6,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 0.573792367227115,
|
|
"grad_norm": 0.4667994555141797,
|
|
"learning_rate": 0.0001346288800689114,
|
|
"loss": 0.6521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6496244668960571,
|
|
"step": 2150,
|
|
"valid_targets_mean": 15861.7,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 0.5751267680811316,
|
|
"grad_norm": 0.4435676406772137,
|
|
"learning_rate": 0.00013452276275317926,
|
|
"loss": 0.6762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6681039333343506,
|
|
"step": 2155,
|
|
"valid_targets_mean": 15310.4,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 0.5764611689351481,
|
|
"grad_norm": 0.36479657444039537,
|
|
"learning_rate": 0.00013441632252297054,
|
|
"loss": 0.6537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.627946138381958,
|
|
"step": 2160,
|
|
"valid_targets_mean": 16752.0,
|
|
"valid_targets_min": 144
|
|
},
|
|
{
|
|
"epoch": 0.5777955697891647,
|
|
"grad_norm": 0.34293032801686635,
|
|
"learning_rate": 0.0001343095599557297,
|
|
"loss": 0.6663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6876500844955444,
|
|
"step": 2165,
|
|
"valid_targets_mean": 15393.2,
|
|
"valid_targets_min": 115
|
|
},
|
|
{
|
|
"epoch": 0.5791299706431812,
|
|
"grad_norm": 0.4076228566130478,
|
|
"learning_rate": 0.00013420247563064998,
|
|
"loss": 0.6739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6447117924690247,
|
|
"step": 2170,
|
|
"valid_targets_mean": 15293.0,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 0.5804643714971978,
|
|
"grad_norm": 0.44537032041814034,
|
|
"learning_rate": 0.0001340950701286701,
|
|
"loss": 0.6695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6437950730323792,
|
|
"step": 2175,
|
|
"valid_targets_mean": 16650.6,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 0.5817987723512144,
|
|
"grad_norm": 0.3878069492513395,
|
|
"learning_rate": 0.0001339873440324712,
|
|
"loss": 0.6705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6598751544952393,
|
|
"step": 2180,
|
|
"valid_targets_mean": 15965.4,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 0.5831331732052308,
|
|
"grad_norm": 0.3411736097320242,
|
|
"learning_rate": 0.00013387929792647366,
|
|
"loss": 0.6558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6527872681617737,
|
|
"step": 2185,
|
|
"valid_targets_mean": 15891.7,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 0.5844675740592474,
|
|
"grad_norm": 0.3003184033242001,
|
|
"learning_rate": 0.00013377093239683396,
|
|
"loss": 0.6719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6643562316894531,
|
|
"step": 2190,
|
|
"valid_targets_mean": 15842.3,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 0.5858019749132639,
|
|
"grad_norm": 0.36648446645528304,
|
|
"learning_rate": 0.0001336622480314414,
|
|
"loss": 0.686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6981199979782104,
|
|
"step": 2195,
|
|
"valid_targets_mean": 14542.6,
|
|
"valid_targets_min": 118
|
|
},
|
|
{
|
|
"epoch": 0.5871363757672805,
|
|
"grad_norm": 0.36396613520492294,
|
|
"learning_rate": 0.00013355324541991512,
|
|
"loss": 0.6544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.675082266330719,
|
|
"step": 2200,
|
|
"valid_targets_mean": 15199.7,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 0.588470776621297,
|
|
"grad_norm": 0.38112202254205735,
|
|
"learning_rate": 0.00013344392515360055,
|
|
"loss": 0.6435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6569694876670837,
|
|
"step": 2205,
|
|
"valid_targets_mean": 15126.9,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 0.5898051774753136,
|
|
"grad_norm": 0.49269313436468004,
|
|
"learning_rate": 0.0001333342878255667,
|
|
"loss": 0.6601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6463513970375061,
|
|
"step": 2210,
|
|
"valid_targets_mean": 16902.2,
|
|
"valid_targets_min": 94
|
|
},
|
|
{
|
|
"epoch": 0.5911395783293302,
|
|
"grad_norm": 0.44890881586303993,
|
|
"learning_rate": 0.00013322433403060237,
|
|
"loss": 0.6628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6854138374328613,
|
|
"step": 2215,
|
|
"valid_targets_mean": 15081.2,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.5924739791833467,
|
|
"grad_norm": 0.45988820654600676,
|
|
"learning_rate": 0.0001331140643652134,
|
|
"loss": 0.6544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6454874277114868,
|
|
"step": 2220,
|
|
"valid_targets_mean": 16441.7,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 0.5938083800373632,
|
|
"grad_norm": 0.7093304778268725,
|
|
"learning_rate": 0.00013300347942761916,
|
|
"loss": 0.6569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6526100635528564,
|
|
"step": 2225,
|
|
"valid_targets_mean": 16363.5,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 0.5951427808913797,
|
|
"grad_norm": 0.3468346631394234,
|
|
"learning_rate": 0.00013289257981774944,
|
|
"loss": 0.6663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6592247486114502,
|
|
"step": 2230,
|
|
"valid_targets_mean": 15972.2,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 0.5964771817453963,
|
|
"grad_norm": 0.3553442337570315,
|
|
"learning_rate": 0.0001327813661372411,
|
|
"loss": 0.6717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6738698482513428,
|
|
"step": 2235,
|
|
"valid_targets_mean": 15463.1,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 0.5978115825994129,
|
|
"grad_norm": 0.3838662240074573,
|
|
"learning_rate": 0.00013266983898943495,
|
|
"loss": 0.6473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6299634575843811,
|
|
"step": 2240,
|
|
"valid_targets_mean": 17580.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 0.5991459834534294,
|
|
"grad_norm": 0.3592590789558824,
|
|
"learning_rate": 0.00013255799897937218,
|
|
"loss": 0.6732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6780022382736206,
|
|
"step": 2245,
|
|
"valid_targets_mean": 16846.2,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 0.600480384307446,
|
|
"grad_norm": 0.44100859110625984,
|
|
"learning_rate": 0.0001324458467137915,
|
|
"loss": 0.6703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6987379193305969,
|
|
"step": 2250,
|
|
"valid_targets_mean": 15482.9,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 0.6018147851614625,
|
|
"grad_norm": 0.37062644306975717,
|
|
"learning_rate": 0.00013233338280112548,
|
|
"loss": 0.6554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6602747440338135,
|
|
"step": 2255,
|
|
"valid_targets_mean": 17064.1,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 0.6031491860154791,
|
|
"grad_norm": 0.418849866695498,
|
|
"learning_rate": 0.00013222060785149744,
|
|
"loss": 0.6673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6753230094909668,
|
|
"step": 2260,
|
|
"valid_targets_mean": 15280.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 0.6044835868694955,
|
|
"grad_norm": 0.43917318413874673,
|
|
"learning_rate": 0.00013210752247671813,
|
|
"loss": 0.6774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.654193639755249,
|
|
"step": 2265,
|
|
"valid_targets_mean": 14665.9,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 0.6058179877235121,
|
|
"grad_norm": 0.40114074662966903,
|
|
"learning_rate": 0.00013199412729028226,
|
|
"loss": 0.6598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.668895423412323,
|
|
"step": 2270,
|
|
"valid_targets_mean": 14357.4,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 0.6071523885775287,
|
|
"grad_norm": 0.3970227577568205,
|
|
"learning_rate": 0.00013188042290736542,
|
|
"loss": 0.6705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6774420738220215,
|
|
"step": 2275,
|
|
"valid_targets_mean": 16830.2,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 0.6084867894315452,
|
|
"grad_norm": 0.3764067127859396,
|
|
"learning_rate": 0.00013176640994482056,
|
|
"loss": 0.6441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6230798363685608,
|
|
"step": 2280,
|
|
"valid_targets_mean": 16233.8,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 0.6098211902855618,
|
|
"grad_norm": 0.40059163154769223,
|
|
"learning_rate": 0.00013165208902117466,
|
|
"loss": 0.6681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6714913249015808,
|
|
"step": 2285,
|
|
"valid_targets_mean": 17469.4,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 0.6111555911395783,
|
|
"grad_norm": 0.3605557043946584,
|
|
"learning_rate": 0.0001315374607566254,
|
|
"loss": 0.6626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6634005308151245,
|
|
"step": 2290,
|
|
"valid_targets_mean": 16034.4,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 0.6124899919935949,
|
|
"grad_norm": 0.3240690873923662,
|
|
"learning_rate": 0.0001314225257730379,
|
|
"loss": 0.6702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6710679531097412,
|
|
"step": 2295,
|
|
"valid_targets_mean": 15922.1,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 0.6138243928476115,
|
|
"grad_norm": 0.45278167405604686,
|
|
"learning_rate": 0.00013130728469394113,
|
|
"loss": 0.662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6353721022605896,
|
|
"step": 2300,
|
|
"valid_targets_mean": 15621.8,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 0.615158793701628,
|
|
"grad_norm": 0.5261678023296263,
|
|
"learning_rate": 0.00013119173814452474,
|
|
"loss": 0.6623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6599146127700806,
|
|
"step": 2305,
|
|
"valid_targets_mean": 15986.3,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.6164931945556446,
|
|
"grad_norm": 0.3520154885026282,
|
|
"learning_rate": 0.0001310758867516355,
|
|
"loss": 0.6636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6420433521270752,
|
|
"step": 2310,
|
|
"valid_targets_mean": 16249.3,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 0.617827595409661,
|
|
"grad_norm": 0.3160597821977501,
|
|
"learning_rate": 0.00013095973114377401,
|
|
"loss": 0.6504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6603477597236633,
|
|
"step": 2315,
|
|
"valid_targets_mean": 16349.6,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 0.6191619962636776,
|
|
"grad_norm": 0.36231419289255584,
|
|
"learning_rate": 0.00013084327195109127,
|
|
"loss": 0.6712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6746746301651001,
|
|
"step": 2320,
|
|
"valid_targets_mean": 14960.1,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 0.6204963971176941,
|
|
"grad_norm": 0.5203030064565997,
|
|
"learning_rate": 0.0001307265098053852,
|
|
"loss": 0.6776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6610316038131714,
|
|
"step": 2325,
|
|
"valid_targets_mean": 14701.1,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 0.6218307979717107,
|
|
"grad_norm": 0.4654024359266842,
|
|
"learning_rate": 0.00013060944534009727,
|
|
"loss": 0.6592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6650247573852539,
|
|
"step": 2330,
|
|
"valid_targets_mean": 16131.2,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.6231651988257273,
|
|
"grad_norm": 0.40579144818563323,
|
|
"learning_rate": 0.00013049207919030913,
|
|
"loss": 0.6713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7027677297592163,
|
|
"step": 2335,
|
|
"valid_targets_mean": 15358.5,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 0.6244995996797438,
|
|
"grad_norm": 0.33738783680985324,
|
|
"learning_rate": 0.000130374411992739,
|
|
"loss": 0.6717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6985683441162109,
|
|
"step": 2340,
|
|
"valid_targets_mean": 15497.9,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 0.6258340005337604,
|
|
"grad_norm": 0.29967326257870436,
|
|
"learning_rate": 0.00013025644438573828,
|
|
"loss": 0.6736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.681916356086731,
|
|
"step": 2345,
|
|
"valid_targets_mean": 15759.5,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 0.6271684013877769,
|
|
"grad_norm": 0.3784599680448572,
|
|
"learning_rate": 0.0001301381770092882,
|
|
"loss": 0.6603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6813744902610779,
|
|
"step": 2350,
|
|
"valid_targets_mean": 15277.1,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 0.6285028022417934,
|
|
"grad_norm": 0.6857152779568253,
|
|
"learning_rate": 0.00013001961050499618,
|
|
"loss": 0.6598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7079257965087891,
|
|
"step": 2355,
|
|
"valid_targets_mean": 14874.0,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 0.6298372030958099,
|
|
"grad_norm": 0.6008392351197666,
|
|
"learning_rate": 0.00012990074551609248,
|
|
"loss": 0.6479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6424392461776733,
|
|
"step": 2360,
|
|
"valid_targets_mean": 16310.2,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.6311716039498265,
|
|
"grad_norm": 0.5035494087224074,
|
|
"learning_rate": 0.00012978158268742656,
|
|
"loss": 0.646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6734390258789062,
|
|
"step": 2365,
|
|
"valid_targets_mean": 15363.5,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 0.6325060048038431,
|
|
"grad_norm": 0.8359104728648866,
|
|
"learning_rate": 0.00012966212266546384,
|
|
"loss": 0.6737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6907227039337158,
|
|
"step": 2370,
|
|
"valid_targets_mean": 16321.4,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 0.6338404056578596,
|
|
"grad_norm": 0.6869329295377531,
|
|
"learning_rate": 0.0001295423660982819,
|
|
"loss": 0.677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6277381181716919,
|
|
"step": 2375,
|
|
"valid_targets_mean": 16251.1,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 0.6351748065118762,
|
|
"grad_norm": 0.6148555801464882,
|
|
"learning_rate": 0.00012942231363556717,
|
|
"loss": 0.6729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.675305962562561,
|
|
"step": 2380,
|
|
"valid_targets_mean": 15309.9,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 0.6365092073658927,
|
|
"grad_norm": 0.45415070983770905,
|
|
"learning_rate": 0.00012930196592861123,
|
|
"loss": 0.6387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6547595262527466,
|
|
"step": 2385,
|
|
"valid_targets_mean": 15454.3,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 0.6378436082199093,
|
|
"grad_norm": 0.442794276566689,
|
|
"learning_rate": 0.0001291813236303075,
|
|
"loss": 0.6617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6543595790863037,
|
|
"step": 2390,
|
|
"valid_targets_mean": 16753.5,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 0.6391780090739259,
|
|
"grad_norm": 0.5332207438446981,
|
|
"learning_rate": 0.0001290603873951475,
|
|
"loss": 0.6613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6456538438796997,
|
|
"step": 2395,
|
|
"valid_targets_mean": 15616.2,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 0.6405124099279423,
|
|
"grad_norm": 0.7521918638954632,
|
|
"learning_rate": 0.0001289391578792174,
|
|
"loss": 0.6552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6417596340179443,
|
|
"step": 2400,
|
|
"valid_targets_mean": 15314.5,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 0.6418468107819589,
|
|
"grad_norm": 0.5277592718542781,
|
|
"learning_rate": 0.0001288176357401944,
|
|
"loss": 0.6609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6445276141166687,
|
|
"step": 2405,
|
|
"valid_targets_mean": 16181.8,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 0.6431812116359754,
|
|
"grad_norm": 0.5360797058408273,
|
|
"learning_rate": 0.00012869582163734327,
|
|
"loss": 0.6621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6326366066932678,
|
|
"step": 2410,
|
|
"valid_targets_mean": 14843.5,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 0.644515612489992,
|
|
"grad_norm": 0.5613437487080589,
|
|
"learning_rate": 0.00012857371623151265,
|
|
"loss": 0.6459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6261048316955566,
|
|
"step": 2415,
|
|
"valid_targets_mean": 16203.7,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 0.6458500133440085,
|
|
"grad_norm": 0.5401174366140299,
|
|
"learning_rate": 0.00012845132018513147,
|
|
"loss": 0.6576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6785622835159302,
|
|
"step": 2420,
|
|
"valid_targets_mean": 15360.1,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 0.6471844141980251,
|
|
"grad_norm": 0.5523670660311945,
|
|
"learning_rate": 0.00012832863416220556,
|
|
"loss": 0.6704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6968633532524109,
|
|
"step": 2425,
|
|
"valid_targets_mean": 15177.7,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 0.6485188150520417,
|
|
"grad_norm": 0.6266815221915214,
|
|
"learning_rate": 0.00012820565882831365,
|
|
"loss": 0.6504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6593841910362244,
|
|
"step": 2430,
|
|
"valid_targets_mean": 14721.1,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 0.6498532159060582,
|
|
"grad_norm": 0.34252486449660074,
|
|
"learning_rate": 0.00012808239485060426,
|
|
"loss": 0.6475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6551427841186523,
|
|
"step": 2435,
|
|
"valid_targets_mean": 16791.4,
|
|
"valid_targets_min": 18
|
|
},
|
|
{
|
|
"epoch": 0.6511876167600748,
|
|
"grad_norm": 0.3777353953835305,
|
|
"learning_rate": 0.00012795884289779161,
|
|
"loss": 0.6545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6419693231582642,
|
|
"step": 2440,
|
|
"valid_targets_mean": 16404.4,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 0.6525220176140912,
|
|
"grad_norm": 0.45799644108503845,
|
|
"learning_rate": 0.00012783500364015233,
|
|
"loss": 0.6486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6675738096237183,
|
|
"step": 2445,
|
|
"valid_targets_mean": 15019.3,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 0.6538564184681078,
|
|
"grad_norm": 0.35174570280079337,
|
|
"learning_rate": 0.00012771087774952165,
|
|
"loss": 0.6638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6781587600708008,
|
|
"step": 2450,
|
|
"valid_targets_mean": 16235.8,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 0.6551908193221244,
|
|
"grad_norm": 0.29804159496964094,
|
|
"learning_rate": 0.00012758646589928975,
|
|
"loss": 0.6575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6292779445648193,
|
|
"step": 2455,
|
|
"valid_targets_mean": 15435.7,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 0.6565252201761409,
|
|
"grad_norm": 0.3673376413084568,
|
|
"learning_rate": 0.00012746176876439824,
|
|
"loss": 0.677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7069848775863647,
|
|
"step": 2460,
|
|
"valid_targets_mean": 14007.8,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 0.6578596210301575,
|
|
"grad_norm": 0.36509344991462184,
|
|
"learning_rate": 0.00012733678702133642,
|
|
"loss": 0.6493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6549676656723022,
|
|
"step": 2465,
|
|
"valid_targets_mean": 15895.7,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 0.659194021884174,
|
|
"grad_norm": 0.429418610209433,
|
|
"learning_rate": 0.00012721152134813755,
|
|
"loss": 0.6624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6349632740020752,
|
|
"step": 2470,
|
|
"valid_targets_mean": 16081.5,
|
|
"valid_targets_min": 98
|
|
},
|
|
{
|
|
"epoch": 0.6605284227381906,
|
|
"grad_norm": 0.7270900153753073,
|
|
"learning_rate": 0.00012708597242437524,
|
|
"loss": 0.6568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6852024793624878,
|
|
"step": 2475,
|
|
"valid_targets_mean": 14263.0,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 0.661862823592207,
|
|
"grad_norm": 0.7031091414429342,
|
|
"learning_rate": 0.00012696014093115974,
|
|
"loss": 0.6717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.665812611579895,
|
|
"step": 2480,
|
|
"valid_targets_mean": 16887.2,
|
|
"valid_targets_min": 215
|
|
},
|
|
{
|
|
"epoch": 0.6631972244462236,
|
|
"grad_norm": 0.4374642051297583,
|
|
"learning_rate": 0.00012683402755113432,
|
|
"loss": 0.6475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6082494258880615,
|
|
"step": 2485,
|
|
"valid_targets_mean": 16537.8,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 0.6645316253002402,
|
|
"grad_norm": 0.6477682364856551,
|
|
"learning_rate": 0.0001267076329684714,
|
|
"loss": 0.6509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6187378168106079,
|
|
"step": 2490,
|
|
"valid_targets_mean": 16730.8,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 0.6658660261542567,
|
|
"grad_norm": 0.38481277870775876,
|
|
"learning_rate": 0.0001265809578688691,
|
|
"loss": 0.6578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6274208426475525,
|
|
"step": 2495,
|
|
"valid_targets_mean": 15341.1,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 0.6672004270082733,
|
|
"grad_norm": 0.3917460884697377,
|
|
"learning_rate": 0.00012645400293954714,
|
|
"loss": 0.664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.663731575012207,
|
|
"step": 2500,
|
|
"valid_targets_mean": 15888.5,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 0.6685348278622898,
|
|
"grad_norm": 0.9151888966738665,
|
|
"learning_rate": 0.0001263267688692435,
|
|
"loss": 0.6793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6852500438690186,
|
|
"step": 2505,
|
|
"valid_targets_mean": 15587.5,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 0.6698692287163064,
|
|
"grad_norm": 0.5979390813822053,
|
|
"learning_rate": 0.00012619925634821052,
|
|
"loss": 0.6613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6739305257797241,
|
|
"step": 2510,
|
|
"valid_targets_mean": 16353.5,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 0.671203629570323,
|
|
"grad_norm": 0.4449761729311656,
|
|
"learning_rate": 0.00012607146606821105,
|
|
"loss": 0.6525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6503389477729797,
|
|
"step": 2515,
|
|
"valid_targets_mean": 16451.6,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 0.6725380304243395,
|
|
"grad_norm": 0.49706212546796325,
|
|
"learning_rate": 0.00012594339872251483,
|
|
"loss": 0.6644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6610913872718811,
|
|
"step": 2520,
|
|
"valid_targets_mean": 16268.6,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 0.673872431278356,
|
|
"grad_norm": 0.5290075839543622,
|
|
"learning_rate": 0.00012581505500589474,
|
|
"loss": 0.6616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6627337336540222,
|
|
"step": 2525,
|
|
"valid_targets_mean": 15314.1,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 0.6752068321323725,
|
|
"grad_norm": 0.5250059480100745,
|
|
"learning_rate": 0.00012568643561462298,
|
|
"loss": 0.6539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6993048787117004,
|
|
"step": 2530,
|
|
"valid_targets_mean": 16188.7,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 0.6765412329863891,
|
|
"grad_norm": 0.4750525887774703,
|
|
"learning_rate": 0.00012555754124646729,
|
|
"loss": 0.6505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6581923365592957,
|
|
"step": 2535,
|
|
"valid_targets_mean": 15603.4,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 0.6778756338404056,
|
|
"grad_norm": 0.3684157475749975,
|
|
"learning_rate": 0.0001254283726006871,
|
|
"loss": 0.6487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6634360551834106,
|
|
"step": 2540,
|
|
"valid_targets_mean": 16575.8,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 0.6792100346944222,
|
|
"grad_norm": 0.5074329452000185,
|
|
"learning_rate": 0.00012529893037803,
|
|
"loss": 0.6621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6525907516479492,
|
|
"step": 2545,
|
|
"valid_targets_mean": 16449.4,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 0.6805444355484388,
|
|
"grad_norm": 0.5328389684013507,
|
|
"learning_rate": 0.00012516921528072752,
|
|
"loss": 0.6497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6763752698898315,
|
|
"step": 2550,
|
|
"valid_targets_mean": 16478.6,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 0.6818788364024553,
|
|
"grad_norm": 0.9065202004121011,
|
|
"learning_rate": 0.00012503922801249172,
|
|
"loss": 0.679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6522437334060669,
|
|
"step": 2555,
|
|
"valid_targets_mean": 16686.2,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 0.6832132372564719,
|
|
"grad_norm": 0.5445419222694561,
|
|
"learning_rate": 0.00012490896927851118,
|
|
"loss": 0.6606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6466104984283447,
|
|
"step": 2560,
|
|
"valid_targets_mean": 16886.7,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 0.6845476381104884,
|
|
"grad_norm": 0.709381810114747,
|
|
"learning_rate": 0.0001247784397854471,
|
|
"loss": 0.6505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6570961475372314,
|
|
"step": 2565,
|
|
"valid_targets_mean": 16608.6,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 0.685882038964505,
|
|
"grad_norm": 0.5686902000812523,
|
|
"learning_rate": 0.0001246476402414297,
|
|
"loss": 0.6612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.682182788848877,
|
|
"step": 2570,
|
|
"valid_targets_mean": 15198.4,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 0.6872164398185214,
|
|
"grad_norm": 0.6011052255291751,
|
|
"learning_rate": 0.0001245165713560541,
|
|
"loss": 0.6641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6695008277893066,
|
|
"step": 2575,
|
|
"valid_targets_mean": 16134.5,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 0.688550840672538,
|
|
"grad_norm": 0.5532645790999307,
|
|
"learning_rate": 0.00012438523384037675,
|
|
"loss": 0.6644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6632760763168335,
|
|
"step": 2580,
|
|
"valid_targets_mean": 15272.8,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 0.6898852415265546,
|
|
"grad_norm": 0.596489328498592,
|
|
"learning_rate": 0.0001242536284069113,
|
|
"loss": 0.6394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6107437610626221,
|
|
"step": 2585,
|
|
"valid_targets_mean": 16331.5,
|
|
"valid_targets_min": 160
|
|
},
|
|
{
|
|
"epoch": 0.6912196423805711,
|
|
"grad_norm": 0.5514444258245582,
|
|
"learning_rate": 0.00012412175576962504,
|
|
"loss": 0.6508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6393566727638245,
|
|
"step": 2590,
|
|
"valid_targets_mean": 16626.9,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 0.6925540432345877,
|
|
"grad_norm": 0.5190980664614784,
|
|
"learning_rate": 0.00012398961664393467,
|
|
"loss": 0.6638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6491864919662476,
|
|
"step": 2595,
|
|
"valid_targets_mean": 16259.0,
|
|
"valid_targets_min": 188
|
|
},
|
|
{
|
|
"epoch": 0.6938884440886042,
|
|
"grad_norm": 0.34768837243198264,
|
|
"learning_rate": 0.0001238572117467027,
|
|
"loss": 0.6659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6843166947364807,
|
|
"step": 2600,
|
|
"valid_targets_mean": 15492.4,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 0.6952228449426208,
|
|
"grad_norm": 0.5270278631943136,
|
|
"learning_rate": 0.0001237245417962335,
|
|
"loss": 0.6479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6631830930709839,
|
|
"step": 2605,
|
|
"valid_targets_mean": 16151.7,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 0.6965572457966374,
|
|
"grad_norm": 0.9404018373354717,
|
|
"learning_rate": 0.0001235916075122693,
|
|
"loss": 0.6692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6821975708007812,
|
|
"step": 2610,
|
|
"valid_targets_mean": 15369.8,
|
|
"valid_targets_min": 135
|
|
},
|
|
{
|
|
"epoch": 0.6978916466506538,
|
|
"grad_norm": 0.7210725325794161,
|
|
"learning_rate": 0.00012345840961598638,
|
|
"loss": 0.6831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.663686990737915,
|
|
"step": 2615,
|
|
"valid_targets_mean": 15961.8,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.6992260475046704,
|
|
"grad_norm": 0.41395719841660517,
|
|
"learning_rate": 0.00012332494882999113,
|
|
"loss": 0.6675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6907253265380859,
|
|
"step": 2620,
|
|
"valid_targets_mean": 15160.7,
|
|
"valid_targets_min": 16
|
|
},
|
|
{
|
|
"epoch": 0.7005604483586869,
|
|
"grad_norm": 0.3569886499787534,
|
|
"learning_rate": 0.00012319122587831614,
|
|
"loss": 0.6466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6753599643707275,
|
|
"step": 2625,
|
|
"valid_targets_mean": 15287.2,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 0.7018948492127035,
|
|
"grad_norm": 0.29934624084299416,
|
|
"learning_rate": 0.00012305724148641627,
|
|
"loss": 0.6563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6501826047897339,
|
|
"step": 2630,
|
|
"valid_targets_mean": 15727.1,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 0.70322925006672,
|
|
"grad_norm": 0.318425447077463,
|
|
"learning_rate": 0.00012292299638116472,
|
|
"loss": 0.6601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.665668249130249,
|
|
"step": 2635,
|
|
"valid_targets_mean": 16180.4,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 0.7045636509207366,
|
|
"grad_norm": 0.32493659582242096,
|
|
"learning_rate": 0.00012278849129084902,
|
|
"loss": 0.6451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6409593820571899,
|
|
"step": 2640,
|
|
"valid_targets_mean": 15696.4,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 0.7058980517747532,
|
|
"grad_norm": 0.3094955674403719,
|
|
"learning_rate": 0.00012265372694516714,
|
|
"loss": 0.6629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.657052218914032,
|
|
"step": 2645,
|
|
"valid_targets_mean": 15152.1,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 0.7072324526287697,
|
|
"grad_norm": 0.23050310147089786,
|
|
"learning_rate": 0.00012251870407522364,
|
|
"loss": 0.6626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6353529095649719,
|
|
"step": 2650,
|
|
"valid_targets_mean": 16007.0,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.7085668534827863,
|
|
"grad_norm": 0.2805492113192007,
|
|
"learning_rate": 0.00012238342341352546,
|
|
"loss": 0.6458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6158525943756104,
|
|
"step": 2655,
|
|
"valid_targets_mean": 16036.7,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 0.7099012543368027,
|
|
"grad_norm": 0.8419236025415093,
|
|
"learning_rate": 0.00012224788569397806,
|
|
"loss": 0.6685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6778745055198669,
|
|
"step": 2660,
|
|
"valid_targets_mean": 15884.4,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 0.7112356551908193,
|
|
"grad_norm": 0.49950512465543345,
|
|
"learning_rate": 0.0001221120916518816,
|
|
"loss": 0.6707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6485254168510437,
|
|
"step": 2665,
|
|
"valid_targets_mean": 15734.8,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 0.7125700560448359,
|
|
"grad_norm": 0.3086314975104693,
|
|
"learning_rate": 0.0001219760420239267,
|
|
"loss": 0.6371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.640455424785614,
|
|
"step": 2670,
|
|
"valid_targets_mean": 16773.1,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 0.7139044568988524,
|
|
"grad_norm": 0.3616813061445184,
|
|
"learning_rate": 0.00012183973754819051,
|
|
"loss": 0.6376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6198325157165527,
|
|
"step": 2675,
|
|
"valid_targets_mean": 16633.2,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.715238857752869,
|
|
"grad_norm": 0.3766222345660372,
|
|
"learning_rate": 0.00012170317896413284,
|
|
"loss": 0.6458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6716166734695435,
|
|
"step": 2680,
|
|
"valid_targets_mean": 15397.1,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 0.7165732586068855,
|
|
"grad_norm": 0.2916119694175485,
|
|
"learning_rate": 0.00012156636701259202,
|
|
"loss": 0.6503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6420208215713501,
|
|
"step": 2685,
|
|
"valid_targets_mean": 15637.3,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 0.7179076594609021,
|
|
"grad_norm": 0.2775617851207647,
|
|
"learning_rate": 0.00012142930243578092,
|
|
"loss": 0.6447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6339491009712219,
|
|
"step": 2690,
|
|
"valid_targets_mean": 15479.5,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 0.7192420603149186,
|
|
"grad_norm": 0.37674025138489925,
|
|
"learning_rate": 0.00012129198597728285,
|
|
"loss": 0.6613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6560890674591064,
|
|
"step": 2695,
|
|
"valid_targets_mean": 14771.5,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 0.7205764611689351,
|
|
"grad_norm": 0.39622678418107,
|
|
"learning_rate": 0.00012115441838204767,
|
|
"loss": 0.662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6819006204605103,
|
|
"step": 2700,
|
|
"valid_targets_mean": 16697.3,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 0.7219108620229517,
|
|
"grad_norm": 0.46002020422176004,
|
|
"learning_rate": 0.00012101660039638766,
|
|
"loss": 0.6272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6191419363021851,
|
|
"step": 2705,
|
|
"valid_targets_mean": 15123.1,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 0.7232452628769682,
|
|
"grad_norm": 0.4054679246957719,
|
|
"learning_rate": 0.00012087853276797346,
|
|
"loss": 0.6594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6391434669494629,
|
|
"step": 2710,
|
|
"valid_targets_mean": 15248.1,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 0.7245796637309848,
|
|
"grad_norm": 0.4589545493169983,
|
|
"learning_rate": 0.00012074021624583005,
|
|
"loss": 0.6672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7053963541984558,
|
|
"step": 2715,
|
|
"valid_targets_mean": 15634.4,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 0.7259140645850013,
|
|
"grad_norm": 0.4100508001012324,
|
|
"learning_rate": 0.00012060165158033268,
|
|
"loss": 0.6445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6315319538116455,
|
|
"step": 2720,
|
|
"valid_targets_mean": 16354.7,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 0.7272484654390179,
|
|
"grad_norm": 0.449503597581273,
|
|
"learning_rate": 0.00012046283952320275,
|
|
"loss": 0.6596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6481472253799438,
|
|
"step": 2725,
|
|
"valid_targets_mean": 17060.3,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 0.7285828662930345,
|
|
"grad_norm": 0.5521352851054642,
|
|
"learning_rate": 0.00012032378082750382,
|
|
"loss": 0.641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6330423355102539,
|
|
"step": 2730,
|
|
"valid_targets_mean": 15964.4,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 0.729917267147051,
|
|
"grad_norm": 0.3662763277079348,
|
|
"learning_rate": 0.00012018447624763748,
|
|
"loss": 0.6664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6733106374740601,
|
|
"step": 2735,
|
|
"valid_targets_mean": 15451.8,
|
|
"valid_targets_min": 115
|
|
},
|
|
{
|
|
"epoch": 0.7312516680010676,
|
|
"grad_norm": 0.4442832003030609,
|
|
"learning_rate": 0.00012004492653933923,
|
|
"loss": 0.6544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.644192636013031,
|
|
"step": 2740,
|
|
"valid_targets_mean": 17145.9,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 0.732586068855084,
|
|
"grad_norm": 0.4146694850471416,
|
|
"learning_rate": 0.0001199051324596744,
|
|
"loss": 0.6548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6647549867630005,
|
|
"step": 2745,
|
|
"valid_targets_mean": 16118.1,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 0.7339204697091006,
|
|
"grad_norm": 0.4034483397987316,
|
|
"learning_rate": 0.00011976509476703408,
|
|
"loss": 0.6562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6489412784576416,
|
|
"step": 2750,
|
|
"valid_targets_mean": 16238.3,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 0.7352548705631171,
|
|
"grad_norm": 0.3858704887075872,
|
|
"learning_rate": 0.00011962481422113098,
|
|
"loss": 0.6641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6932016611099243,
|
|
"step": 2755,
|
|
"valid_targets_mean": 16186.4,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 0.7365892714171337,
|
|
"grad_norm": 0.3708392071660316,
|
|
"learning_rate": 0.00011948429158299523,
|
|
"loss": 0.6414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6387462019920349,
|
|
"step": 2760,
|
|
"valid_targets_mean": 15729.8,
|
|
"valid_targets_min": 87
|
|
},
|
|
{
|
|
"epoch": 0.7379236722711503,
|
|
"grad_norm": 0.32790785396891836,
|
|
"learning_rate": 0.00011934352761497043,
|
|
"loss": 0.6562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6276763677597046,
|
|
"step": 2765,
|
|
"valid_targets_mean": 15110.6,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.7392580731251668,
|
|
"grad_norm": 0.435520809779603,
|
|
"learning_rate": 0.00011920252308070936,
|
|
"loss": 0.654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6683900356292725,
|
|
"step": 2770,
|
|
"valid_targets_mean": 16605.6,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 0.7405924739791834,
|
|
"grad_norm": 0.429796127636718,
|
|
"learning_rate": 0.00011906127874516985,
|
|
"loss": 0.6794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6834566593170166,
|
|
"step": 2775,
|
|
"valid_targets_mean": 15583.3,
|
|
"valid_targets_min": 186
|
|
},
|
|
{
|
|
"epoch": 0.7419268748331999,
|
|
"grad_norm": 0.5630641955667017,
|
|
"learning_rate": 0.00011891979537461069,
|
|
"loss": 0.6482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6598657369613647,
|
|
"step": 2780,
|
|
"valid_targets_mean": 14949.2,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 0.7432612756872164,
|
|
"grad_norm": 0.42825593904406456,
|
|
"learning_rate": 0.00011877807373658751,
|
|
"loss": 0.6588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6864846348762512,
|
|
"step": 2785,
|
|
"valid_targets_mean": 15358.7,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 0.7445956765412329,
|
|
"grad_norm": 0.24764962892304368,
|
|
"learning_rate": 0.00011863611459994845,
|
|
"loss": 0.6355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6364130973815918,
|
|
"step": 2790,
|
|
"valid_targets_mean": 16195.6,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 0.7459300773952495,
|
|
"grad_norm": 0.3361856535909037,
|
|
"learning_rate": 0.00011849391873483016,
|
|
"loss": 0.6563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6433441638946533,
|
|
"step": 2795,
|
|
"valid_targets_mean": 17315.0,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 0.7472644782492661,
|
|
"grad_norm": 0.4839633607167184,
|
|
"learning_rate": 0.00011835148691265355,
|
|
"loss": 0.6423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6697137355804443,
|
|
"step": 2800,
|
|
"valid_targets_mean": 15119.5,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 0.7485988791032826,
|
|
"grad_norm": 0.48866740517865787,
|
|
"learning_rate": 0.00011820881990611963,
|
|
"loss": 0.6584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6491275429725647,
|
|
"step": 2805,
|
|
"valid_targets_mean": 16127.4,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 0.7499332799572992,
|
|
"grad_norm": 0.4639695977583032,
|
|
"learning_rate": 0.00011806591848920521,
|
|
"loss": 0.6543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6611607074737549,
|
|
"step": 2810,
|
|
"valid_targets_mean": 16535.5,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 0.7512676808113157,
|
|
"grad_norm": 0.5819254408562657,
|
|
"learning_rate": 0.00011792278343715892,
|
|
"loss": 0.6698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6676276922225952,
|
|
"step": 2815,
|
|
"valid_targets_mean": 15598.1,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 0.7526020816653323,
|
|
"grad_norm": 0.5386974712188372,
|
|
"learning_rate": 0.00011777941552649674,
|
|
"loss": 0.6507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6673504710197449,
|
|
"step": 2820,
|
|
"valid_targets_mean": 16666.5,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 0.7539364825193489,
|
|
"grad_norm": 0.5247082569671393,
|
|
"learning_rate": 0.00011763581553499803,
|
|
"loss": 0.6689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6670835018157959,
|
|
"step": 2825,
|
|
"valid_targets_mean": 15185.6,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 0.7552708833733653,
|
|
"grad_norm": 0.20057356755323014,
|
|
"learning_rate": 0.00011749198424170117,
|
|
"loss": 0.636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6377661228179932,
|
|
"step": 2830,
|
|
"valid_targets_mean": 16080.7,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 0.7566052842273819,
|
|
"grad_norm": 0.2717189631177877,
|
|
"learning_rate": 0.00011734792242689934,
|
|
"loss": 0.6503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6641967296600342,
|
|
"step": 2835,
|
|
"valid_targets_mean": 16038.2,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 0.7579396850813984,
|
|
"grad_norm": 0.3539872406346123,
|
|
"learning_rate": 0.00011720363087213629,
|
|
"loss": 0.6492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6535997986793518,
|
|
"step": 2840,
|
|
"valid_targets_mean": 15803.9,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 0.759274085935415,
|
|
"grad_norm": 0.3060505538467894,
|
|
"learning_rate": 0.00011705911036020222,
|
|
"loss": 0.6547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6756272912025452,
|
|
"step": 2845,
|
|
"valid_targets_mean": 16094.0,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 0.7606084867894315,
|
|
"grad_norm": 0.32979603532007673,
|
|
"learning_rate": 0.00011691436167512938,
|
|
"loss": 0.6558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6754165291786194,
|
|
"step": 2850,
|
|
"valid_targets_mean": 14935.3,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 0.7619428876434481,
|
|
"grad_norm": 0.4055952495934608,
|
|
"learning_rate": 0.00011676938560218781,
|
|
"loss": 0.6692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6552126407623291,
|
|
"step": 2855,
|
|
"valid_targets_mean": 15897.4,
|
|
"valid_targets_min": 36
|
|
},
|
|
{
|
|
"epoch": 0.7632772884974647,
|
|
"grad_norm": 0.24961533900790797,
|
|
"learning_rate": 0.00011662418292788127,
|
|
"loss": 0.6383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6132807731628418,
|
|
"step": 2860,
|
|
"valid_targets_mean": 16758.9,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 0.7646116893514812,
|
|
"grad_norm": 0.2912209439909631,
|
|
"learning_rate": 0.00011647875443994271,
|
|
"loss": 0.6536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6957015991210938,
|
|
"step": 2865,
|
|
"valid_targets_mean": 15787.5,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.7659460902054978,
|
|
"grad_norm": 0.3042265405636786,
|
|
"learning_rate": 0.00011633310092733027,
|
|
"loss": 0.654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6923919916152954,
|
|
"step": 2870,
|
|
"valid_targets_mean": 15682.3,
|
|
"valid_targets_min": 87
|
|
},
|
|
{
|
|
"epoch": 0.7672804910595142,
|
|
"grad_norm": 0.46422609082136285,
|
|
"learning_rate": 0.00011618722318022273,
|
|
"loss": 0.6469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6420409679412842,
|
|
"step": 2875,
|
|
"valid_targets_mean": 16307.2,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 0.7686148919135308,
|
|
"grad_norm": 0.2866231185689334,
|
|
"learning_rate": 0.00011604112199001546,
|
|
"loss": 0.6591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6567983627319336,
|
|
"step": 2880,
|
|
"valid_targets_mean": 16033.0,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 0.7699492927675474,
|
|
"grad_norm": 0.29511853808011,
|
|
"learning_rate": 0.00011589479814931598,
|
|
"loss": 0.6747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6823477745056152,
|
|
"step": 2885,
|
|
"valid_targets_mean": 15575.4,
|
|
"valid_targets_min": 184
|
|
},
|
|
{
|
|
"epoch": 0.7712836936215639,
|
|
"grad_norm": 0.28271108085235624,
|
|
"learning_rate": 0.0001157482524519397,
|
|
"loss": 0.6484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6464595794677734,
|
|
"step": 2890,
|
|
"valid_targets_mean": 16116.9,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 0.7726180944755805,
|
|
"grad_norm": 0.2865267852616057,
|
|
"learning_rate": 0.00011560148569290558,
|
|
"loss": 0.6631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6816505789756775,
|
|
"step": 2895,
|
|
"valid_targets_mean": 15358.4,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 0.773952495329597,
|
|
"grad_norm": 0.30991511376313186,
|
|
"learning_rate": 0.00011545449866843194,
|
|
"loss": 0.6525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.63730788230896,
|
|
"step": 2900,
|
|
"valid_targets_mean": 15462.2,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 0.7752868961836136,
|
|
"grad_norm": 0.3893737799601431,
|
|
"learning_rate": 0.00011530729217593198,
|
|
"loss": 0.6611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6356149911880493,
|
|
"step": 2905,
|
|
"valid_targets_mean": 15372.1,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 0.77662129703763,
|
|
"grad_norm": 0.3726536887518834,
|
|
"learning_rate": 0.00011515986701400955,
|
|
"loss": 0.6486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6307750344276428,
|
|
"step": 2910,
|
|
"valid_targets_mean": 16776.6,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.7779556978916466,
|
|
"grad_norm": 0.5524398950886195,
|
|
"learning_rate": 0.00011501222398245478,
|
|
"loss": 0.6492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6876989603042603,
|
|
"step": 2915,
|
|
"valid_targets_mean": 15650.0,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 0.7792900987456632,
|
|
"grad_norm": 0.43053436499737224,
|
|
"learning_rate": 0.00011486436388223977,
|
|
"loss": 0.6506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6381161212921143,
|
|
"step": 2920,
|
|
"valid_targets_mean": 15313.3,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 0.7806244995996797,
|
|
"grad_norm": 0.519345484811449,
|
|
"learning_rate": 0.00011471628751551426,
|
|
"loss": 0.6476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6335276365280151,
|
|
"step": 2925,
|
|
"valid_targets_mean": 16039.5,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 0.7819589004536963,
|
|
"grad_norm": 0.40279485056898695,
|
|
"learning_rate": 0.0001145679956856012,
|
|
"loss": 0.6519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6481651067733765,
|
|
"step": 2930,
|
|
"valid_targets_mean": 17094.5,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 0.7832933013077128,
|
|
"grad_norm": 0.3694050878957853,
|
|
"learning_rate": 0.00011441948919699249,
|
|
"loss": 0.641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6418542861938477,
|
|
"step": 2935,
|
|
"valid_targets_mean": 16285.5,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 0.7846277021617294,
|
|
"grad_norm": 0.4735156964913278,
|
|
"learning_rate": 0.00011427076885534445,
|
|
"loss": 0.6586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6383504867553711,
|
|
"step": 2940,
|
|
"valid_targets_mean": 15567.9,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 0.785962103015746,
|
|
"grad_norm": 0.3844007153033924,
|
|
"learning_rate": 0.00011412183546747374,
|
|
"loss": 0.6717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6902410984039307,
|
|
"step": 2945,
|
|
"valid_targets_mean": 15343.8,
|
|
"valid_targets_min": 105
|
|
},
|
|
{
|
|
"epoch": 0.7872965038697625,
|
|
"grad_norm": 0.317269590749701,
|
|
"learning_rate": 0.00011397268984135266,
|
|
"loss": 0.6529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6304830312728882,
|
|
"step": 2950,
|
|
"valid_targets_mean": 16103.9,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 0.7886309047237791,
|
|
"grad_norm": 0.3120496775255097,
|
|
"learning_rate": 0.00011382333278610503,
|
|
"loss": 0.656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6566540598869324,
|
|
"step": 2955,
|
|
"valid_targets_mean": 15891.0,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 0.7899653055777955,
|
|
"grad_norm": 0.29363407283435133,
|
|
"learning_rate": 0.00011367376511200157,
|
|
"loss": 0.6344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6340903043746948,
|
|
"step": 2960,
|
|
"valid_targets_mean": 15561.6,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 0.7912997064318121,
|
|
"grad_norm": 0.27145633159261673,
|
|
"learning_rate": 0.00011352398763045569,
|
|
"loss": 0.6529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6175417304039001,
|
|
"step": 2965,
|
|
"valid_targets_mean": 15884.3,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 0.7926341072858286,
|
|
"grad_norm": 0.3177062342894978,
|
|
"learning_rate": 0.00011337400115401905,
|
|
"loss": 0.6375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6519104242324829,
|
|
"step": 2970,
|
|
"valid_targets_mean": 15654.3,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.7939685081398452,
|
|
"grad_norm": 0.3262337279748586,
|
|
"learning_rate": 0.00011322380649637704,
|
|
"loss": 0.6394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6559080481529236,
|
|
"step": 2975,
|
|
"valid_targets_mean": 16248.6,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.7953029089938618,
|
|
"grad_norm": 0.3231189128287473,
|
|
"learning_rate": 0.00011307340447234449,
|
|
"loss": 0.6561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6174107789993286,
|
|
"step": 2980,
|
|
"valid_targets_mean": 17327.3,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 0.7966373098478783,
|
|
"grad_norm": 0.2673123843052834,
|
|
"learning_rate": 0.00011292279589786115,
|
|
"loss": 0.6415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.674813985824585,
|
|
"step": 2985,
|
|
"valid_targets_mean": 16437.4,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 0.7979717107018949,
|
|
"grad_norm": 0.2628610801083876,
|
|
"learning_rate": 0.00011277198158998744,
|
|
"loss": 0.6521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6365718841552734,
|
|
"step": 2990,
|
|
"valid_targets_mean": 16253.4,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 0.7993061115559114,
|
|
"grad_norm": 0.41550078533066354,
|
|
"learning_rate": 0.00011262096236689978,
|
|
"loss": 0.6571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6576251983642578,
|
|
"step": 2995,
|
|
"valid_targets_mean": 16513.4,
|
|
"valid_targets_min": 135
|
|
},
|
|
{
|
|
"epoch": 0.800640512409928,
|
|
"grad_norm": 0.3088075857567339,
|
|
"learning_rate": 0.0001124697390478863,
|
|
"loss": 0.6353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6164828538894653,
|
|
"step": 3000,
|
|
"valid_targets_mean": 15918.2,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 0.8019749132639445,
|
|
"grad_norm": 0.25913707294388605,
|
|
"learning_rate": 0.00011231831245334238,
|
|
"loss": 0.6576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6322481632232666,
|
|
"step": 3005,
|
|
"valid_targets_mean": 17183.3,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 0.803309314117961,
|
|
"grad_norm": 0.2875117928032586,
|
|
"learning_rate": 0.00011216668340476618,
|
|
"loss": 0.6382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6693239808082581,
|
|
"step": 3010,
|
|
"valid_targets_mean": 15421.7,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 0.8046437149719776,
|
|
"grad_norm": 0.36214125629051713,
|
|
"learning_rate": 0.00011201485272475416,
|
|
"loss": 0.6626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.659346342086792,
|
|
"step": 3015,
|
|
"valid_targets_mean": 15835.4,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 0.8059781158259941,
|
|
"grad_norm": 0.46775768851706007,
|
|
"learning_rate": 0.00011186282123699664,
|
|
"loss": 0.6548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.636502742767334,
|
|
"step": 3020,
|
|
"valid_targets_mean": 14893.1,
|
|
"valid_targets_min": 189
|
|
},
|
|
{
|
|
"epoch": 0.8073125166800107,
|
|
"grad_norm": 0.2738171869503716,
|
|
"learning_rate": 0.00011171058976627341,
|
|
"loss": 0.654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6290814876556396,
|
|
"step": 3025,
|
|
"valid_targets_mean": 15760.6,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 0.8086469175340272,
|
|
"grad_norm": 0.3647024223804018,
|
|
"learning_rate": 0.00011155815913844906,
|
|
"loss": 0.6462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6478596925735474,
|
|
"step": 3030,
|
|
"valid_targets_mean": 14951.4,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 0.8099813183880438,
|
|
"grad_norm": 0.27641270754591074,
|
|
"learning_rate": 0.00011140553018046872,
|
|
"loss": 0.6598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6753206849098206,
|
|
"step": 3035,
|
|
"valid_targets_mean": 14340.4,
|
|
"valid_targets_min": 115
|
|
},
|
|
{
|
|
"epoch": 0.8113157192420604,
|
|
"grad_norm": 0.27104302518571194,
|
|
"learning_rate": 0.00011125270372035342,
|
|
"loss": 0.6473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6414433121681213,
|
|
"step": 3040,
|
|
"valid_targets_mean": 16101.6,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.8126501200960768,
|
|
"grad_norm": 0.24831789534538956,
|
|
"learning_rate": 0.00011109968058719565,
|
|
"loss": 0.6451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6589419841766357,
|
|
"step": 3045,
|
|
"valid_targets_mean": 15459.0,
|
|
"valid_targets_min": 64
|
|
},
|
|
{
|
|
"epoch": 0.8139845209500934,
|
|
"grad_norm": 0.19532136745712955,
|
|
"learning_rate": 0.00011094646161115489,
|
|
"loss": 0.6412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6319899559020996,
|
|
"step": 3050,
|
|
"valid_targets_mean": 16658.7,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 0.8153189218041099,
|
|
"grad_norm": 0.3916604874994846,
|
|
"learning_rate": 0.00011079304762345307,
|
|
"loss": 0.6458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6420038938522339,
|
|
"step": 3055,
|
|
"valid_targets_mean": 16778.7,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.8166533226581265,
|
|
"grad_norm": 0.37864396364913205,
|
|
"learning_rate": 0.00011063943945637005,
|
|
"loss": 0.6355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6372954845428467,
|
|
"step": 3060,
|
|
"valid_targets_mean": 17428.5,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 0.817987723512143,
|
|
"grad_norm": 0.3870524602109993,
|
|
"learning_rate": 0.00011048563794323915,
|
|
"loss": 0.6463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6788116693496704,
|
|
"step": 3065,
|
|
"valid_targets_mean": 15602.4,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 0.8193221243661596,
|
|
"grad_norm": 0.2977983842070513,
|
|
"learning_rate": 0.00011033164391844259,
|
|
"loss": 0.6508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6552585363388062,
|
|
"step": 3070,
|
|
"valid_targets_mean": 15580.0,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 0.8206565252201762,
|
|
"grad_norm": 0.26537308607076826,
|
|
"learning_rate": 0.00011017745821740696,
|
|
"loss": 0.6507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6489561200141907,
|
|
"step": 3075,
|
|
"valid_targets_mean": 15627.2,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 0.8219909260741927,
|
|
"grad_norm": 0.3351573792494936,
|
|
"learning_rate": 0.00011002308167659877,
|
|
"loss": 0.6611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6333989500999451,
|
|
"step": 3080,
|
|
"valid_targets_mean": 16951.4,
|
|
"valid_targets_min": 129
|
|
},
|
|
{
|
|
"epoch": 0.8233253269282093,
|
|
"grad_norm": 0.3732147638583794,
|
|
"learning_rate": 0.00010986851513351976,
|
|
"loss": 0.6627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6664532423019409,
|
|
"step": 3085,
|
|
"valid_targets_mean": 15145.9,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 0.8246597277822257,
|
|
"grad_norm": 0.32793741970703777,
|
|
"learning_rate": 0.00010971375942670251,
|
|
"loss": 0.6897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7011292576789856,
|
|
"step": 3090,
|
|
"valid_targets_mean": 16805.0,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 0.8259941286362423,
|
|
"grad_norm": 0.37753399308849905,
|
|
"learning_rate": 0.00010955881539570581,
|
|
"loss": 0.6474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.661662757396698,
|
|
"step": 3095,
|
|
"valid_targets_mean": 15650.4,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 0.8273285294902589,
|
|
"grad_norm": 0.30408206326658377,
|
|
"learning_rate": 0.00010940368388111008,
|
|
"loss": 0.6332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6272863745689392,
|
|
"step": 3100,
|
|
"valid_targets_mean": 15785.5,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 0.8286629303442754,
|
|
"grad_norm": 0.2528560431432323,
|
|
"learning_rate": 0.00010924836572451287,
|
|
"loss": 0.6519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6672605276107788,
|
|
"step": 3105,
|
|
"valid_targets_mean": 16141.8,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 0.829997331198292,
|
|
"grad_norm": 0.2883692488906481,
|
|
"learning_rate": 0.00010909286176852432,
|
|
"loss": 0.6529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.664074182510376,
|
|
"step": 3110,
|
|
"valid_targets_mean": 16108.2,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 0.8313317320523085,
|
|
"grad_norm": 0.253752010702719,
|
|
"learning_rate": 0.0001089371728567625,
|
|
"loss": 0.644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6300142407417297,
|
|
"step": 3115,
|
|
"valid_targets_mean": 16448.3,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 0.8326661329063251,
|
|
"grad_norm": 0.19621076323735484,
|
|
"learning_rate": 0.00010878129983384886,
|
|
"loss": 0.6421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6365523338317871,
|
|
"step": 3120,
|
|
"valid_targets_mean": 16571.9,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 0.8340005337603416,
|
|
"grad_norm": 0.31043696213151706,
|
|
"learning_rate": 0.00010862524354540369,
|
|
"loss": 0.6392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6390799880027771,
|
|
"step": 3125,
|
|
"valid_targets_mean": 16295.1,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 0.8353349346143581,
|
|
"grad_norm": 0.2660416805962427,
|
|
"learning_rate": 0.00010846900483804152,
|
|
"loss": 0.6536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6644906997680664,
|
|
"step": 3130,
|
|
"valid_targets_mean": 16289.7,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 0.8366693354683747,
|
|
"grad_norm": 0.2860410130544462,
|
|
"learning_rate": 0.00010831258455936645,
|
|
"loss": 0.6523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6767973899841309,
|
|
"step": 3135,
|
|
"valid_targets_mean": 15392.7,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 0.8380037363223912,
|
|
"grad_norm": 0.5964225786420335,
|
|
"learning_rate": 0.00010815598355796771,
|
|
"loss": 0.653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6599225997924805,
|
|
"step": 3140,
|
|
"valid_targets_mean": 15163.7,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 0.8393381371764078,
|
|
"grad_norm": 0.40273457380132044,
|
|
"learning_rate": 0.0001079992026834149,
|
|
"loss": 0.6595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6746566295623779,
|
|
"step": 3145,
|
|
"valid_targets_mean": 16054.7,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 0.8406725380304243,
|
|
"grad_norm": 0.37269235274804263,
|
|
"learning_rate": 0.00010784224278625345,
|
|
"loss": 0.6499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6395473480224609,
|
|
"step": 3150,
|
|
"valid_targets_mean": 16422.7,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 0.8420069388844409,
|
|
"grad_norm": 0.33447721838177247,
|
|
"learning_rate": 0.00010768510471799996,
|
|
"loss": 0.6392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6642786264419556,
|
|
"step": 3155,
|
|
"valid_targets_mean": 16876.6,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 0.8433413397384575,
|
|
"grad_norm": 0.2952224423385826,
|
|
"learning_rate": 0.00010752778933113774,
|
|
"loss": 0.6496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6484600305557251,
|
|
"step": 3160,
|
|
"valid_targets_mean": 15536.5,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 0.844675740592474,
|
|
"grad_norm": 0.3861238288551174,
|
|
"learning_rate": 0.00010737029747911191,
|
|
"loss": 0.6658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.643525242805481,
|
|
"step": 3165,
|
|
"valid_targets_mean": 16309.5,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 0.8460101414464906,
|
|
"grad_norm": 0.3135056303201646,
|
|
"learning_rate": 0.00010721263001632503,
|
|
"loss": 0.6531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6588514447212219,
|
|
"step": 3170,
|
|
"valid_targets_mean": 16432.4,
|
|
"valid_targets_min": 94
|
|
},
|
|
{
|
|
"epoch": 0.847344542300507,
|
|
"grad_norm": 0.2817059242378593,
|
|
"learning_rate": 0.00010705478779813235,
|
|
"loss": 0.6551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.682015061378479,
|
|
"step": 3175,
|
|
"valid_targets_mean": 15666.3,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 0.8486789431545236,
|
|
"grad_norm": 0.3975996084790914,
|
|
"learning_rate": 0.00010689677168083711,
|
|
"loss": 0.6374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6828092932701111,
|
|
"step": 3180,
|
|
"valid_targets_mean": 14917.6,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.8500133440085401,
|
|
"grad_norm": 0.3058355653351926,
|
|
"learning_rate": 0.00010673858252168603,
|
|
"loss": 0.6542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6468585729598999,
|
|
"step": 3185,
|
|
"valid_targets_mean": 15833.9,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.8513477448625567,
|
|
"grad_norm": 0.4211216089487226,
|
|
"learning_rate": 0.00010658022117886457,
|
|
"loss": 0.6511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6917360424995422,
|
|
"step": 3190,
|
|
"valid_targets_mean": 16047.0,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.8526821457165733,
|
|
"grad_norm": 0.30431416270371453,
|
|
"learning_rate": 0.00010642168851149229,
|
|
"loss": 0.6588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6483701467514038,
|
|
"step": 3195,
|
|
"valid_targets_mean": 16088.1,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 0.8540165465705898,
|
|
"grad_norm": 0.37426473290512485,
|
|
"learning_rate": 0.00010626298537961821,
|
|
"loss": 0.6423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.663414478302002,
|
|
"step": 3200,
|
|
"valid_targets_mean": 16697.5,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 0.8553509474246064,
|
|
"grad_norm": 0.2570405060046422,
|
|
"learning_rate": 0.00010610411264421611,
|
|
"loss": 0.6507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6568108797073364,
|
|
"step": 3205,
|
|
"valid_targets_mean": 16679.7,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 0.8566853482786229,
|
|
"grad_norm": 0.37474591801273793,
|
|
"learning_rate": 0.0001059450711671799,
|
|
"loss": 0.6544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6513127088546753,
|
|
"step": 3210,
|
|
"valid_targets_mean": 15614.5,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 0.8580197491326395,
|
|
"grad_norm": 0.3141516713671115,
|
|
"learning_rate": 0.0001057858618113189,
|
|
"loss": 0.6644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6733075380325317,
|
|
"step": 3215,
|
|
"valid_targets_mean": 15210.8,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 0.859354149986656,
|
|
"grad_norm": 0.2974994549393474,
|
|
"learning_rate": 0.00010562648544035323,
|
|
"loss": 0.6449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6401640772819519,
|
|
"step": 3220,
|
|
"valid_targets_mean": 15541.6,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 0.8606885508406725,
|
|
"grad_norm": 0.3388673466882177,
|
|
"learning_rate": 0.00010546694291890902,
|
|
"loss": 0.6254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6491298675537109,
|
|
"step": 3225,
|
|
"valid_targets_mean": 14598.7,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 0.8620229516946891,
|
|
"grad_norm": 0.4371152887391013,
|
|
"learning_rate": 0.00010530723511251382,
|
|
"loss": 0.6544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6556539535522461,
|
|
"step": 3230,
|
|
"valid_targets_mean": 15699.9,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 0.8633573525487056,
|
|
"grad_norm": 0.5767723282397708,
|
|
"learning_rate": 0.0001051473628875918,
|
|
"loss": 0.6468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6518882513046265,
|
|
"step": 3235,
|
|
"valid_targets_mean": 15763.0,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 0.8646917534027222,
|
|
"grad_norm": 0.30426789707079194,
|
|
"learning_rate": 0.00010498732711145918,
|
|
"loss": 0.6601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6652976274490356,
|
|
"step": 3240,
|
|
"valid_targets_mean": 15535.9,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 0.8660261542567387,
|
|
"grad_norm": 0.4130220093019007,
|
|
"learning_rate": 0.00010482712865231942,
|
|
"loss": 0.623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6167925596237183,
|
|
"step": 3245,
|
|
"valid_targets_mean": 16325.2,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 0.8673605551107553,
|
|
"grad_norm": 0.36276171852808714,
|
|
"learning_rate": 0.00010466676837925857,
|
|
"loss": 0.6441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6520528793334961,
|
|
"step": 3250,
|
|
"valid_targets_mean": 16459.6,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 0.8686949559647719,
|
|
"grad_norm": 0.4067218367888341,
|
|
"learning_rate": 0.00010450624716224045,
|
|
"loss": 0.6573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6417218446731567,
|
|
"step": 3255,
|
|
"valid_targets_mean": 15986.3,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 0.8700293568187883,
|
|
"grad_norm": 0.24818738071714427,
|
|
"learning_rate": 0.00010434556587210214,
|
|
"loss": 0.6394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.64249587059021,
|
|
"step": 3260,
|
|
"valid_targets_mean": 17100.3,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 0.8713637576728049,
|
|
"grad_norm": 0.3850215570427052,
|
|
"learning_rate": 0.000104184725380549,
|
|
"loss": 0.6619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6386861801147461,
|
|
"step": 3265,
|
|
"valid_targets_mean": 16787.6,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 0.8726981585268214,
|
|
"grad_norm": 0.3584729041483362,
|
|
"learning_rate": 0.0001040237265601502,
|
|
"loss": 0.6511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.627690315246582,
|
|
"step": 3270,
|
|
"valid_targets_mean": 17029.8,
|
|
"valid_targets_min": 6
|
|
},
|
|
{
|
|
"epoch": 0.874032559380838,
|
|
"grad_norm": 0.4339765963953783,
|
|
"learning_rate": 0.00010386257028433366,
|
|
"loss": 0.6427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6622530817985535,
|
|
"step": 3275,
|
|
"valid_targets_mean": 14468.4,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 0.8753669602348545,
|
|
"grad_norm": 0.2825943811936514,
|
|
"learning_rate": 0.00010370125742738173,
|
|
"loss": 0.6424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6384086608886719,
|
|
"step": 3280,
|
|
"valid_targets_mean": 16163.9,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.8767013610888711,
|
|
"grad_norm": 0.29040496771866525,
|
|
"learning_rate": 0.00010353978886442605,
|
|
"loss": 0.6662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6597708463668823,
|
|
"step": 3285,
|
|
"valid_targets_mean": 15830.2,
|
|
"valid_targets_min": 133
|
|
},
|
|
{
|
|
"epoch": 0.8780357619428877,
|
|
"grad_norm": 0.41728095971133644,
|
|
"learning_rate": 0.00010337816547144308,
|
|
"loss": 0.6708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6693861484527588,
|
|
"step": 3290,
|
|
"valid_targets_mean": 15653.0,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 0.8793701627969042,
|
|
"grad_norm": 0.3651738291095255,
|
|
"learning_rate": 0.00010321638812524917,
|
|
"loss": 0.6547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.659409761428833,
|
|
"step": 3295,
|
|
"valid_targets_mean": 16814.6,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 0.8807045636509208,
|
|
"grad_norm": 0.3325618768861608,
|
|
"learning_rate": 0.00010305445770349593,
|
|
"loss": 0.6414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6621459722518921,
|
|
"step": 3300,
|
|
"valid_targets_mean": 16536.9,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 0.8820389645049372,
|
|
"grad_norm": 0.28918434164279605,
|
|
"learning_rate": 0.00010289237508466536,
|
|
"loss": 0.6448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6440975666046143,
|
|
"step": 3305,
|
|
"valid_targets_mean": 15207.3,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 0.8833733653589538,
|
|
"grad_norm": 0.4985808542613525,
|
|
"learning_rate": 0.00010273014114806517,
|
|
"loss": 0.6388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.642197847366333,
|
|
"step": 3310,
|
|
"valid_targets_mean": 15152.4,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 0.8847077662129704,
|
|
"grad_norm": 0.39850878457550165,
|
|
"learning_rate": 0.000102567756773824,
|
|
"loss": 0.6527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6053390502929688,
|
|
"step": 3315,
|
|
"valid_targets_mean": 16184.6,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 0.8860421670669869,
|
|
"grad_norm": 0.30925451590653463,
|
|
"learning_rate": 0.00010240522284288657,
|
|
"loss": 0.6404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6605707406997681,
|
|
"step": 3320,
|
|
"valid_targets_mean": 14017.9,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 0.8873765679210035,
|
|
"grad_norm": 0.3691668202190612,
|
|
"learning_rate": 0.00010224254023700899,
|
|
"loss": 0.652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6467193961143494,
|
|
"step": 3325,
|
|
"valid_targets_mean": 16063.3,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 0.88871096877502,
|
|
"grad_norm": 0.2674717558464169,
|
|
"learning_rate": 0.00010207970983875395,
|
|
"loss": 0.6522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6779950857162476,
|
|
"step": 3330,
|
|
"valid_targets_mean": 15231.2,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 0.8900453696290366,
|
|
"grad_norm": 0.20038688374080343,
|
|
"learning_rate": 0.00010191673253148589,
|
|
"loss": 0.6484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6465331315994263,
|
|
"step": 3335,
|
|
"valid_targets_mean": 15670.2,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 0.8913797704830531,
|
|
"grad_norm": 0.28330757770723614,
|
|
"learning_rate": 0.00010175360919936623,
|
|
"loss": 0.6506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6360025405883789,
|
|
"step": 3340,
|
|
"valid_targets_mean": 16725.9,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 0.8927141713370697,
|
|
"grad_norm": 0.3439479225547816,
|
|
"learning_rate": 0.00010159034072734865,
|
|
"loss": 0.63,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6294069886207581,
|
|
"step": 3345,
|
|
"valid_targets_mean": 16449.0,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 0.8940485721910862,
|
|
"grad_norm": 0.2700394489223394,
|
|
"learning_rate": 0.00010142692800117416,
|
|
"loss": 0.6418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6559712886810303,
|
|
"step": 3350,
|
|
"valid_targets_mean": 16355.4,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.8953829730451027,
|
|
"grad_norm": 0.3290237145673029,
|
|
"learning_rate": 0.00010126337190736636,
|
|
"loss": 0.65,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6721099615097046,
|
|
"step": 3355,
|
|
"valid_targets_mean": 15148.3,
|
|
"valid_targets_min": 67
|
|
},
|
|
{
|
|
"epoch": 0.8967173738991193,
|
|
"grad_norm": 0.24940716395634227,
|
|
"learning_rate": 0.00010109967333322669,
|
|
"loss": 0.6475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6641243696212769,
|
|
"step": 3360,
|
|
"valid_targets_mean": 15853.0,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 0.8980517747531358,
|
|
"grad_norm": 0.3767555898917068,
|
|
"learning_rate": 0.00010093583316682945,
|
|
"loss": 0.6475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6830979585647583,
|
|
"step": 3365,
|
|
"valid_targets_mean": 14323.9,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 0.8993861756071524,
|
|
"grad_norm": 0.31077570308089764,
|
|
"learning_rate": 0.00010077185229701722,
|
|
"loss": 0.6438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6360262632369995,
|
|
"step": 3370,
|
|
"valid_targets_mean": 15914.6,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 0.900720576461169,
|
|
"grad_norm": 0.35636066897001256,
|
|
"learning_rate": 0.00010060773161339574,
|
|
"loss": 0.6381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6645758152008057,
|
|
"step": 3375,
|
|
"valid_targets_mean": 15709.9,
|
|
"valid_targets_min": 78
|
|
},
|
|
{
|
|
"epoch": 0.9020549773151855,
|
|
"grad_norm": 0.3168169499045184,
|
|
"learning_rate": 0.00010044347200632943,
|
|
"loss": 0.638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6317712664604187,
|
|
"step": 3380,
|
|
"valid_targets_mean": 15377.5,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.9033893781692021,
|
|
"grad_norm": 0.34833612107690176,
|
|
"learning_rate": 0.00010027907436693623,
|
|
"loss": 0.6494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6437000036239624,
|
|
"step": 3385,
|
|
"valid_targets_mean": 14439.3,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 0.9047237790232185,
|
|
"grad_norm": 0.22139220732501388,
|
|
"learning_rate": 0.00010011453958708297,
|
|
"loss": 0.6508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6224488615989685,
|
|
"step": 3390,
|
|
"valid_targets_mean": 15715.7,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 0.9060581798772351,
|
|
"grad_norm": 0.27304939667752415,
|
|
"learning_rate": 9.994986855938047e-05,
|
|
"loss": 0.6501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6275122761726379,
|
|
"step": 3395,
|
|
"valid_targets_mean": 16695.6,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 0.9073925807312516,
|
|
"grad_norm": 0.22683624420794118,
|
|
"learning_rate": 9.978506217717874e-05,
|
|
"loss": 0.6419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6165873408317566,
|
|
"step": 3400,
|
|
"valid_targets_mean": 16133.7,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 0.9087269815852682,
|
|
"grad_norm": 0.3781598294210209,
|
|
"learning_rate": 9.962012133456204e-05,
|
|
"loss": 0.6531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6441546678543091,
|
|
"step": 3405,
|
|
"valid_targets_mean": 16254.3,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 0.9100613824392848,
|
|
"grad_norm": 0.3485691664629273,
|
|
"learning_rate": 9.945504692634409e-05,
|
|
"loss": 0.6384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6154640913009644,
|
|
"step": 3410,
|
|
"valid_targets_mean": 15014.2,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 0.9113957832933013,
|
|
"grad_norm": 0.2941331404988799,
|
|
"learning_rate": 9.928983984806326e-05,
|
|
"loss": 0.6405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6541690826416016,
|
|
"step": 3415,
|
|
"valid_targets_mean": 16053.0,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.9127301841473179,
|
|
"grad_norm": 0.2940870924679832,
|
|
"learning_rate": 9.912450099597765e-05,
|
|
"loss": 0.6429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6771601438522339,
|
|
"step": 3420,
|
|
"valid_targets_mean": 15410.8,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 0.9140645850013344,
|
|
"grad_norm": 0.2540267547781181,
|
|
"learning_rate": 9.895903126706019e-05,
|
|
"loss": 0.6478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6407514810562134,
|
|
"step": 3425,
|
|
"valid_targets_mean": 15978.2,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 0.915398985855351,
|
|
"grad_norm": 0.58827205007206,
|
|
"learning_rate": 9.879343155899382e-05,
|
|
"loss": 0.6342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6365908980369568,
|
|
"step": 3430,
|
|
"valid_targets_mean": 15551.0,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 0.9167333867093675,
|
|
"grad_norm": 0.48345445350401867,
|
|
"learning_rate": 9.862770277016676e-05,
|
|
"loss": 0.647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6549006700515747,
|
|
"step": 3435,
|
|
"valid_targets_mean": 16312.3,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 0.918067787563384,
|
|
"grad_norm": 0.6096917662718333,
|
|
"learning_rate": 9.846184579966733e-05,
|
|
"loss": 0.6416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6357414126396179,
|
|
"step": 3440,
|
|
"valid_targets_mean": 16554.4,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 0.9194021884174006,
|
|
"grad_norm": 0.5175036160963103,
|
|
"learning_rate": 9.829586154727933e-05,
|
|
"loss": 0.6507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6284055709838867,
|
|
"step": 3445,
|
|
"valid_targets_mean": 15700.8,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 0.9207365892714171,
|
|
"grad_norm": 0.38330154473288014,
|
|
"learning_rate": 9.812975091347706e-05,
|
|
"loss": 0.6359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6508110761642456,
|
|
"step": 3450,
|
|
"valid_targets_mean": 15062.5,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 0.9220709901254337,
|
|
"grad_norm": 0.4369589415188452,
|
|
"learning_rate": 9.796351479942047e-05,
|
|
"loss": 0.6491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6330595016479492,
|
|
"step": 3455,
|
|
"valid_targets_mean": 16589.2,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 0.9234053909794502,
|
|
"grad_norm": 0.428584499898361,
|
|
"learning_rate": 9.779715410695015e-05,
|
|
"loss": 0.6463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6327037811279297,
|
|
"step": 3460,
|
|
"valid_targets_mean": 16817.5,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 0.9247397918334668,
|
|
"grad_norm": 0.5171308232638616,
|
|
"learning_rate": 9.76306697385827e-05,
|
|
"loss": 0.6502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6380854845046997,
|
|
"step": 3465,
|
|
"valid_targets_mean": 16331.6,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 0.9260741926874834,
|
|
"grad_norm": 0.5943448075228943,
|
|
"learning_rate": 9.746406259750552e-05,
|
|
"loss": 0.6614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6656478047370911,
|
|
"step": 3470,
|
|
"valid_targets_mean": 14692.4,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 0.9274085935414998,
|
|
"grad_norm": 0.45121493438958316,
|
|
"learning_rate": 9.729733358757213e-05,
|
|
"loss": 0.6312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6517891883850098,
|
|
"step": 3475,
|
|
"valid_targets_mean": 16139.6,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.9287429943955164,
|
|
"grad_norm": 0.2765978957054617,
|
|
"learning_rate": 9.713048361329715e-05,
|
|
"loss": 0.644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6355182528495789,
|
|
"step": 3480,
|
|
"valid_targets_mean": 16863.1,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 0.9300773952495329,
|
|
"grad_norm": 0.5155652814279078,
|
|
"learning_rate": 9.696351357985154e-05,
|
|
"loss": 0.6605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6674410104751587,
|
|
"step": 3485,
|
|
"valid_targets_mean": 14653.7,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.9314117961035495,
|
|
"grad_norm": 0.5064196677900263,
|
|
"learning_rate": 9.679642439305744e-05,
|
|
"loss": 0.6589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6703199148178101,
|
|
"step": 3490,
|
|
"valid_targets_mean": 16033.6,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 0.932746196957566,
|
|
"grad_norm": 0.5571820953695459,
|
|
"learning_rate": 9.662921695938354e-05,
|
|
"loss": 0.6379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6885676383972168,
|
|
"step": 3495,
|
|
"valid_targets_mean": 15294.4,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 0.9340805978115826,
|
|
"grad_norm": 0.39708045174193374,
|
|
"learning_rate": 9.646189218593992e-05,
|
|
"loss": 0.647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6267415881156921,
|
|
"step": 3500,
|
|
"valid_targets_mean": 15537.8,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 0.9354149986655992,
|
|
"grad_norm": 0.43893302118614175,
|
|
"learning_rate": 9.629445098047334e-05,
|
|
"loss": 0.6566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6632278561592102,
|
|
"step": 3505,
|
|
"valid_targets_mean": 16264.6,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 0.9367493995196157,
|
|
"grad_norm": 0.38062901384662845,
|
|
"learning_rate": 9.61268942513621e-05,
|
|
"loss": 0.6338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6326217651367188,
|
|
"step": 3510,
|
|
"valid_targets_mean": 14996.4,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 0.9380838003736323,
|
|
"grad_norm": 0.49266598679797097,
|
|
"learning_rate": 9.595922290761128e-05,
|
|
"loss": 0.6516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6653306484222412,
|
|
"step": 3515,
|
|
"valid_targets_mean": 16628.6,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 0.9394182012276487,
|
|
"grad_norm": 0.35614289136880733,
|
|
"learning_rate": 9.579143785884779e-05,
|
|
"loss": 0.6287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6084467768669128,
|
|
"step": 3520,
|
|
"valid_targets_mean": 16826.6,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 0.9407526020816653,
|
|
"grad_norm": 0.37113715768790856,
|
|
"learning_rate": 9.562354001531532e-05,
|
|
"loss": 0.655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6197367310523987,
|
|
"step": 3525,
|
|
"valid_targets_mean": 15503.0,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 0.9420870029356819,
|
|
"grad_norm": 0.41762638777021666,
|
|
"learning_rate": 9.545553028786952e-05,
|
|
"loss": 0.6315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6234275102615356,
|
|
"step": 3530,
|
|
"valid_targets_mean": 15646.6,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 0.9434214037896984,
|
|
"grad_norm": 0.44380341857698374,
|
|
"learning_rate": 9.5287409587973e-05,
|
|
"loss": 0.6407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6393918395042419,
|
|
"step": 3535,
|
|
"valid_targets_mean": 15084.2,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 0.944755804643715,
|
|
"grad_norm": 0.3892234490208754,
|
|
"learning_rate": 9.511917882769042e-05,
|
|
"loss": 0.6391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6391283273696899,
|
|
"step": 3540,
|
|
"valid_targets_mean": 16098.1,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 0.9460902054977315,
|
|
"grad_norm": 0.43778742220118544,
|
|
"learning_rate": 9.495083891968351e-05,
|
|
"loss": 0.6486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6234838962554932,
|
|
"step": 3545,
|
|
"valid_targets_mean": 15090.8,
|
|
"valid_targets_min": 159
|
|
},
|
|
{
|
|
"epoch": 0.9474246063517481,
|
|
"grad_norm": 0.381850694877453,
|
|
"learning_rate": 9.478239077720615e-05,
|
|
"loss": 0.6499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6469874978065491,
|
|
"step": 3550,
|
|
"valid_targets_mean": 16223.9,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 0.9487590072057646,
|
|
"grad_norm": 0.41317054151838634,
|
|
"learning_rate": 9.461383531409937e-05,
|
|
"loss": 0.6389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6466406583786011,
|
|
"step": 3555,
|
|
"valid_targets_mean": 16031.7,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 0.9500934080597812,
|
|
"grad_norm": 0.30576626565173615,
|
|
"learning_rate": 9.444517344478645e-05,
|
|
"loss": 0.6482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6074718236923218,
|
|
"step": 3560,
|
|
"valid_targets_mean": 16289.6,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 0.9514278089137977,
|
|
"grad_norm": 0.349699953608093,
|
|
"learning_rate": 9.427640608426789e-05,
|
|
"loss": 0.6425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6368805170059204,
|
|
"step": 3565,
|
|
"valid_targets_mean": 16044.6,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 0.9527622097678142,
|
|
"grad_norm": 0.34546045313356855,
|
|
"learning_rate": 9.410753414811654e-05,
|
|
"loss": 0.6377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6125216484069824,
|
|
"step": 3570,
|
|
"valid_targets_mean": 16506.6,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.9540966106218308,
|
|
"grad_norm": 0.3893315863348032,
|
|
"learning_rate": 9.393855855247254e-05,
|
|
"loss": 0.6687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6844526529312134,
|
|
"step": 3575,
|
|
"valid_targets_mean": 16149.5,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 0.9554310114758473,
|
|
"grad_norm": 0.2633863843990565,
|
|
"learning_rate": 9.376948021403838e-05,
|
|
"loss": 0.6567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6337841749191284,
|
|
"step": 3580,
|
|
"valid_targets_mean": 16207.8,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 0.9567654123298639,
|
|
"grad_norm": 0.29692519605292395,
|
|
"learning_rate": 9.360030005007399e-05,
|
|
"loss": 0.647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6229900121688843,
|
|
"step": 3585,
|
|
"valid_targets_mean": 15579.8,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 0.9580998131838805,
|
|
"grad_norm": 0.2757188736918943,
|
|
"learning_rate": 9.343101897839169e-05,
|
|
"loss": 0.6508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6774806976318359,
|
|
"step": 3590,
|
|
"valid_targets_mean": 15031.3,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 0.959434214037897,
|
|
"grad_norm": 0.31332494454541954,
|
|
"learning_rate": 9.326163791735116e-05,
|
|
"loss": 0.6292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6396254301071167,
|
|
"step": 3595,
|
|
"valid_targets_mean": 16527.8,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 0.9607686148919136,
|
|
"grad_norm": 0.36077294744202587,
|
|
"learning_rate": 9.309215778585461e-05,
|
|
"loss": 0.6457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6841853857040405,
|
|
"step": 3600,
|
|
"valid_targets_mean": 16090.1,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 0.96210301574593,
|
|
"grad_norm": 0.2672498287309418,
|
|
"learning_rate": 9.29225795033417e-05,
|
|
"loss": 0.6541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6412582397460938,
|
|
"step": 3605,
|
|
"valid_targets_mean": 16092.3,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 0.9634374165999466,
|
|
"grad_norm": 0.2540687238294275,
|
|
"learning_rate": 9.275290398978454e-05,
|
|
"loss": 0.6652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6630794405937195,
|
|
"step": 3610,
|
|
"valid_targets_mean": 14939.0,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 0.9647718174539631,
|
|
"grad_norm": 0.2131800114252568,
|
|
"learning_rate": 9.258313216568273e-05,
|
|
"loss": 0.6461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6630018949508667,
|
|
"step": 3615,
|
|
"valid_targets_mean": 16149.2,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 0.9661062183079797,
|
|
"grad_norm": 0.24902095013861963,
|
|
"learning_rate": 9.241326495205836e-05,
|
|
"loss": 0.6539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6484121084213257,
|
|
"step": 3620,
|
|
"valid_targets_mean": 15283.1,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 0.9674406191619963,
|
|
"grad_norm": 0.2524604516006915,
|
|
"learning_rate": 9.224330327045105e-05,
|
|
"loss": 0.6348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6242421269416809,
|
|
"step": 3625,
|
|
"valid_targets_mean": 15518.7,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 0.9687750200160128,
|
|
"grad_norm": 0.28238647446499715,
|
|
"learning_rate": 9.207324804291285e-05,
|
|
"loss": 0.6475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6292116641998291,
|
|
"step": 3630,
|
|
"valid_targets_mean": 16086.4,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 0.9701094208700294,
|
|
"grad_norm": 0.29096513969754145,
|
|
"learning_rate": 9.190310019200338e-05,
|
|
"loss": 0.6444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6281774044036865,
|
|
"step": 3635,
|
|
"valid_targets_mean": 16468.9,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 0.9714438217240459,
|
|
"grad_norm": 0.2546985499074181,
|
|
"learning_rate": 9.173286064078465e-05,
|
|
"loss": 0.6639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6918833255767822,
|
|
"step": 3640,
|
|
"valid_targets_mean": 15578.0,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 0.9727782225780625,
|
|
"grad_norm": 0.25499970964039437,
|
|
"learning_rate": 9.156253031281625e-05,
|
|
"loss": 0.6588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6605820059776306,
|
|
"step": 3645,
|
|
"valid_targets_mean": 14916.7,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 0.974112623432079,
|
|
"grad_norm": 0.3389180129292924,
|
|
"learning_rate": 9.139211013215013e-05,
|
|
"loss": 0.6547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6656877398490906,
|
|
"step": 3650,
|
|
"valid_targets_mean": 15895.6,
|
|
"valid_targets_min": 143
|
|
},
|
|
{
|
|
"epoch": 0.9754470242860955,
|
|
"grad_norm": 0.25053024078219016,
|
|
"learning_rate": 9.122160102332583e-05,
|
|
"loss": 0.6503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6228987574577332,
|
|
"step": 3655,
|
|
"valid_targets_mean": 16149.6,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 0.9767814251401121,
|
|
"grad_norm": 0.2178268898809574,
|
|
"learning_rate": 9.105100391136523e-05,
|
|
"loss": 0.6346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6444330215454102,
|
|
"step": 3660,
|
|
"valid_targets_mean": 15280.1,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 0.9781158259941286,
|
|
"grad_norm": 0.27511389097304567,
|
|
"learning_rate": 9.088031972176764e-05,
|
|
"loss": 0.6362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6305812001228333,
|
|
"step": 3665,
|
|
"valid_targets_mean": 16102.7,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 0.9794502268481452,
|
|
"grad_norm": 0.27125123958326863,
|
|
"learning_rate": 9.070954938050482e-05,
|
|
"loss": 0.6569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6606098413467407,
|
|
"step": 3670,
|
|
"valid_targets_mean": 15203.1,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 0.9807846277021617,
|
|
"grad_norm": 0.30513794835142677,
|
|
"learning_rate": 9.053869381401589e-05,
|
|
"loss": 0.6471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6514785885810852,
|
|
"step": 3675,
|
|
"valid_targets_mean": 15489.2,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.9821190285561783,
|
|
"grad_norm": 0.2674682260881522,
|
|
"learning_rate": 9.036775394920228e-05,
|
|
"loss": 0.6474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6139630079269409,
|
|
"step": 3680,
|
|
"valid_targets_mean": 17697.0,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 0.9834534294101949,
|
|
"grad_norm": 0.42011865968688955,
|
|
"learning_rate": 9.01967307134228e-05,
|
|
"loss": 0.643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6135485768318176,
|
|
"step": 3685,
|
|
"valid_targets_mean": 16937.3,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 0.9847878302642114,
|
|
"grad_norm": 0.25991299262090733,
|
|
"learning_rate": 9.00256250344885e-05,
|
|
"loss": 0.6352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6385388970375061,
|
|
"step": 3690,
|
|
"valid_targets_mean": 17042.7,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 0.9861222311182279,
|
|
"grad_norm": 0.3094872030920946,
|
|
"learning_rate": 8.985443784065774e-05,
|
|
"loss": 0.6347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6620912551879883,
|
|
"step": 3695,
|
|
"valid_targets_mean": 16172.8,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 0.9874566319722444,
|
|
"grad_norm": 0.2648905621560967,
|
|
"learning_rate": 8.968317006063107e-05,
|
|
"loss": 0.643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6437067985534668,
|
|
"step": 3700,
|
|
"valid_targets_mean": 15814.3,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 0.988791032826261,
|
|
"grad_norm": 0.2872339382049347,
|
|
"learning_rate": 8.951182262354624e-05,
|
|
"loss": 0.6226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.634140133857727,
|
|
"step": 3705,
|
|
"valid_targets_mean": 15975.5,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 0.9901254336802775,
|
|
"grad_norm": 0.22777511656801333,
|
|
"learning_rate": 8.934039645897316e-05,
|
|
"loss": 0.6508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6716867685317993,
|
|
"step": 3710,
|
|
"valid_targets_mean": 16354.8,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.9914598345342941,
|
|
"grad_norm": 0.2817686690598872,
|
|
"learning_rate": 8.916889249690877e-05,
|
|
"loss": 0.6598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6462900638580322,
|
|
"step": 3715,
|
|
"valid_targets_mean": 15771.1,
|
|
"valid_targets_min": 120
|
|
},
|
|
{
|
|
"epoch": 0.9927942353883107,
|
|
"grad_norm": 0.2206326896920367,
|
|
"learning_rate": 8.899731166777216e-05,
|
|
"loss": 0.6583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6339864134788513,
|
|
"step": 3720,
|
|
"valid_targets_mean": 16834.7,
|
|
"valid_targets_min": 98
|
|
},
|
|
{
|
|
"epoch": 0.9941286362423272,
|
|
"grad_norm": 0.5024525222279345,
|
|
"learning_rate": 8.882565490239935e-05,
|
|
"loss": 0.6259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6254690885543823,
|
|
"step": 3725,
|
|
"valid_targets_mean": 15646.8,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 0.9954630370963438,
|
|
"grad_norm": 0.3856087446468771,
|
|
"learning_rate": 8.865392313203839e-05,
|
|
"loss": 0.6402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6251246333122253,
|
|
"step": 3730,
|
|
"valid_targets_mean": 15004.9,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 0.9967974379503602,
|
|
"grad_norm": 0.40239943536134476,
|
|
"learning_rate": 8.848211728834415e-05,
|
|
"loss": 0.6633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6419751644134521,
|
|
"step": 3735,
|
|
"valid_targets_mean": 15467.9,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 0.9981318388043768,
|
|
"grad_norm": 0.31304776164504733,
|
|
"learning_rate": 8.831023830337348e-05,
|
|
"loss": 0.6602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6425862312316895,
|
|
"step": 3740,
|
|
"valid_targets_mean": 14812.8,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 0.9994662396583934,
|
|
"grad_norm": 0.35589672329273264,
|
|
"learning_rate": 8.813828710957987e-05,
|
|
"loss": 0.6247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6540701389312744,
|
|
"step": 3745,
|
|
"valid_targets_mean": 15883.5,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 1.00080064051241,
|
|
"grad_norm": 0.26760563380752905,
|
|
"learning_rate": 8.796626463980863e-05,
|
|
"loss": 0.6313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6551904678344727,
|
|
"step": 3750,
|
|
"valid_targets_mean": 17182.7,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 1.0021350413664265,
|
|
"grad_norm": 0.31015553596849776,
|
|
"learning_rate": 8.779417182729181e-05,
|
|
"loss": 0.6395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6176386475563049,
|
|
"step": 3755,
|
|
"valid_targets_mean": 16216.0,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 1.003469442220443,
|
|
"grad_norm": 0.19888626057576428,
|
|
"learning_rate": 8.762200960564295e-05,
|
|
"loss": 0.6428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6564364433288574,
|
|
"step": 3760,
|
|
"valid_targets_mean": 16117.9,
|
|
"valid_targets_min": 216
|
|
},
|
|
{
|
|
"epoch": 1.0048038430744595,
|
|
"grad_norm": 0.31576042269966875,
|
|
"learning_rate": 8.744977890885218e-05,
|
|
"loss": 0.6372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6488126516342163,
|
|
"step": 3765,
|
|
"valid_targets_mean": 15915.4,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 1.006138243928476,
|
|
"grad_norm": 0.37031993459449786,
|
|
"learning_rate": 8.727748067128116e-05,
|
|
"loss": 0.6441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6726912260055542,
|
|
"step": 3770,
|
|
"valid_targets_mean": 14668.3,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.0074726447824927,
|
|
"grad_norm": 0.25852458130057065,
|
|
"learning_rate": 8.71051158276579e-05,
|
|
"loss": 0.6408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.630450963973999,
|
|
"step": 3775,
|
|
"valid_targets_mean": 17935.9,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.0088070456365092,
|
|
"grad_norm": 0.2507167855310047,
|
|
"learning_rate": 8.693268531307182e-05,
|
|
"loss": 0.6495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6480845808982849,
|
|
"step": 3780,
|
|
"valid_targets_mean": 15486.5,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 1.0101414464905258,
|
|
"grad_norm": 0.25299995341948267,
|
|
"learning_rate": 8.676019006296851e-05,
|
|
"loss": 0.6424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6288371086120605,
|
|
"step": 3785,
|
|
"valid_targets_mean": 16877.4,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 1.0114758473445422,
|
|
"grad_norm": 0.4671268307313036,
|
|
"learning_rate": 8.658763101314484e-05,
|
|
"loss": 0.647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6182825565338135,
|
|
"step": 3790,
|
|
"valid_targets_mean": 15726.7,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 1.0128102481985588,
|
|
"grad_norm": 0.4394010424816238,
|
|
"learning_rate": 8.64150090997438e-05,
|
|
"loss": 0.6323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6484892964363098,
|
|
"step": 3795,
|
|
"valid_targets_mean": 15794.6,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 1.0141446490525754,
|
|
"grad_norm": 0.4487423936581022,
|
|
"learning_rate": 8.624232525924936e-05,
|
|
"loss": 0.6433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6359849572181702,
|
|
"step": 3800,
|
|
"valid_targets_mean": 16178.8,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.015479049906592,
|
|
"grad_norm": 0.41831834818461444,
|
|
"learning_rate": 8.606958042848145e-05,
|
|
"loss": 0.6422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6211769580841064,
|
|
"step": 3805,
|
|
"valid_targets_mean": 16291.6,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 1.0168134507606086,
|
|
"grad_norm": 0.45389931206375994,
|
|
"learning_rate": 8.589677554459094e-05,
|
|
"loss": 0.625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6307047605514526,
|
|
"step": 3810,
|
|
"valid_targets_mean": 17618.1,
|
|
"valid_targets_min": 87
|
|
},
|
|
{
|
|
"epoch": 1.018147851614625,
|
|
"grad_norm": 0.3268915481818196,
|
|
"learning_rate": 8.572391154505444e-05,
|
|
"loss": 0.6216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6273326873779297,
|
|
"step": 3815,
|
|
"valid_targets_mean": 15623.5,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 1.0194822524686415,
|
|
"grad_norm": 0.41308299425023076,
|
|
"learning_rate": 8.555098936766927e-05,
|
|
"loss": 0.6308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6591796278953552,
|
|
"step": 3820,
|
|
"valid_targets_mean": 14928.0,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.0208166533226581,
|
|
"grad_norm": 0.44942713175635873,
|
|
"learning_rate": 8.537800995054838e-05,
|
|
"loss": 0.6519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6796396374702454,
|
|
"step": 3825,
|
|
"valid_targets_mean": 15782.7,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 1.0221510541766747,
|
|
"grad_norm": 0.3653635630100857,
|
|
"learning_rate": 8.520497423211527e-05,
|
|
"loss": 0.6176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6224901080131531,
|
|
"step": 3830,
|
|
"valid_targets_mean": 15665.9,
|
|
"valid_targets_min": 110
|
|
},
|
|
{
|
|
"epoch": 1.0234854550306913,
|
|
"grad_norm": 0.3963653125859711,
|
|
"learning_rate": 8.503188315109881e-05,
|
|
"loss": 0.6472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6270386576652527,
|
|
"step": 3835,
|
|
"valid_targets_mean": 15768.8,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 1.0248198558847077,
|
|
"grad_norm": 0.45406483483665866,
|
|
"learning_rate": 8.485873764652832e-05,
|
|
"loss": 0.6325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6315596103668213,
|
|
"step": 3840,
|
|
"valid_targets_mean": 16419.9,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 1.0261542567387243,
|
|
"grad_norm": 0.44474720584480903,
|
|
"learning_rate": 8.468553865772826e-05,
|
|
"loss": 0.6202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6255053281784058,
|
|
"step": 3845,
|
|
"valid_targets_mean": 16084.4,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 1.0274886575927409,
|
|
"grad_norm": 0.39211860098514184,
|
|
"learning_rate": 8.451228712431332e-05,
|
|
"loss": 0.6344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6344472169876099,
|
|
"step": 3850,
|
|
"valid_targets_mean": 17080.2,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.0288230584467575,
|
|
"grad_norm": 0.39459268154462224,
|
|
"learning_rate": 8.433898398618319e-05,
|
|
"loss": 0.6433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6279346942901611,
|
|
"step": 3855,
|
|
"valid_targets_mean": 15763.7,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 1.0301574593007738,
|
|
"grad_norm": 0.45597533762198056,
|
|
"learning_rate": 8.416563018351758e-05,
|
|
"loss": 0.6209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6651809215545654,
|
|
"step": 3860,
|
|
"valid_targets_mean": 14995.1,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 1.0314918601547904,
|
|
"grad_norm": 0.36845803818318634,
|
|
"learning_rate": 8.399222665677105e-05,
|
|
"loss": 0.646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6424490213394165,
|
|
"step": 3865,
|
|
"valid_targets_mean": 16269.7,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.032826261008807,
|
|
"grad_norm": 0.3660998871304325,
|
|
"learning_rate": 8.381877434666784e-05,
|
|
"loss": 0.6246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6427116394042969,
|
|
"step": 3870,
|
|
"valid_targets_mean": 14471.5,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.0341606618628236,
|
|
"grad_norm": 0.35705162926889794,
|
|
"learning_rate": 8.364527419419696e-05,
|
|
"loss": 0.6268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6127498149871826,
|
|
"step": 3875,
|
|
"valid_targets_mean": 15339.7,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 1.0354950627168402,
|
|
"grad_norm": 0.3976760051887081,
|
|
"learning_rate": 8.347172714060686e-05,
|
|
"loss": 0.6366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6624436974525452,
|
|
"step": 3880,
|
|
"valid_targets_mean": 15717.8,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 1.0368294635708566,
|
|
"grad_norm": 0.3248289435744747,
|
|
"learning_rate": 8.32981341274005e-05,
|
|
"loss": 0.6228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6057945489883423,
|
|
"step": 3885,
|
|
"valid_targets_mean": 15022.4,
|
|
"valid_targets_min": 20
|
|
},
|
|
{
|
|
"epoch": 1.0381638644248732,
|
|
"grad_norm": 0.3799068194660505,
|
|
"learning_rate": 8.312449609633014e-05,
|
|
"loss": 0.634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6380169987678528,
|
|
"step": 3890,
|
|
"valid_targets_mean": 16899.8,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.0394982652788898,
|
|
"grad_norm": 0.27786217332276264,
|
|
"learning_rate": 8.295081398939227e-05,
|
|
"loss": 0.6264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6606595516204834,
|
|
"step": 3895,
|
|
"valid_targets_mean": 15556.7,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 1.0408326661329064,
|
|
"grad_norm": 0.3262178393145254,
|
|
"learning_rate": 8.277708874882252e-05,
|
|
"loss": 0.6278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.608328104019165,
|
|
"step": 3900,
|
|
"valid_targets_mean": 16115.2,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 1.042167066986923,
|
|
"grad_norm": 0.2967558099112656,
|
|
"learning_rate": 8.26033213170905e-05,
|
|
"loss": 0.6284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6086621284484863,
|
|
"step": 3905,
|
|
"valid_targets_mean": 16689.7,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 1.0435014678409393,
|
|
"grad_norm": 0.18378657045763086,
|
|
"learning_rate": 8.242951263689468e-05,
|
|
"loss": 0.6477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6364154815673828,
|
|
"step": 3910,
|
|
"valid_targets_mean": 16203.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 1.044835868694956,
|
|
"grad_norm": 0.2697854055357981,
|
|
"learning_rate": 8.225566365115738e-05,
|
|
"loss": 0.6357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6307680606842041,
|
|
"step": 3915,
|
|
"valid_targets_mean": 16268.8,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 1.0461702695489725,
|
|
"grad_norm": 0.32997967067160905,
|
|
"learning_rate": 8.20817753030195e-05,
|
|
"loss": 0.6354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6161054372787476,
|
|
"step": 3920,
|
|
"valid_targets_mean": 16692.8,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 1.0475046704029891,
|
|
"grad_norm": 0.2630706017357537,
|
|
"learning_rate": 8.190784853583554e-05,
|
|
"loss": 0.656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6609646081924438,
|
|
"step": 3925,
|
|
"valid_targets_mean": 15393.3,
|
|
"valid_targets_min": 87
|
|
},
|
|
{
|
|
"epoch": 1.0488390712570057,
|
|
"grad_norm": 0.25045695026175163,
|
|
"learning_rate": 8.17338842931684e-05,
|
|
"loss": 0.6417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6067150235176086,
|
|
"step": 3930,
|
|
"valid_targets_mean": 17500.9,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.050173472111022,
|
|
"grad_norm": 0.2177517219494181,
|
|
"learning_rate": 8.155988351878433e-05,
|
|
"loss": 0.6276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6083317995071411,
|
|
"step": 3935,
|
|
"valid_targets_mean": 15829.2,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 1.0515078729650387,
|
|
"grad_norm": 0.3013828015707749,
|
|
"learning_rate": 8.138584715664766e-05,
|
|
"loss": 0.6578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6143460273742676,
|
|
"step": 3940,
|
|
"valid_targets_mean": 15557.4,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 1.0528422738190553,
|
|
"grad_norm": 0.30390911254139075,
|
|
"learning_rate": 8.121177615091591e-05,
|
|
"loss": 0.6457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6318605542182922,
|
|
"step": 3945,
|
|
"valid_targets_mean": 15857.7,
|
|
"valid_targets_min": 36
|
|
},
|
|
{
|
|
"epoch": 1.0541766746730719,
|
|
"grad_norm": 0.3350524541259267,
|
|
"learning_rate": 8.103767144593445e-05,
|
|
"loss": 0.6341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6261539459228516,
|
|
"step": 3950,
|
|
"valid_targets_mean": 16744.7,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 1.0555110755270882,
|
|
"grad_norm": 0.30249343171639453,
|
|
"learning_rate": 8.086353398623154e-05,
|
|
"loss": 0.628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6331799030303955,
|
|
"step": 3955,
|
|
"valid_targets_mean": 16559.9,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.0568454763811048,
|
|
"grad_norm": 0.2546085452828038,
|
|
"learning_rate": 8.068936471651308e-05,
|
|
"loss": 0.6425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6100320219993591,
|
|
"step": 3960,
|
|
"valid_targets_mean": 15661.1,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 1.0581798772351214,
|
|
"grad_norm": 0.2646061268757464,
|
|
"learning_rate": 8.051516458165759e-05,
|
|
"loss": 0.6308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6500637531280518,
|
|
"step": 3965,
|
|
"valid_targets_mean": 15055.7,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 1.059514278089138,
|
|
"grad_norm": 0.19452961730319374,
|
|
"learning_rate": 8.0340934526711e-05,
|
|
"loss": 0.6433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6682567596435547,
|
|
"step": 3970,
|
|
"valid_targets_mean": 15522.8,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 1.0608486789431546,
|
|
"grad_norm": 0.2567508331337579,
|
|
"learning_rate": 8.016667549688157e-05,
|
|
"loss": 0.633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6242945194244385,
|
|
"step": 3975,
|
|
"valid_targets_mean": 16522.6,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 1.062183079797171,
|
|
"grad_norm": 0.21485742918403294,
|
|
"learning_rate": 7.999238843753474e-05,
|
|
"loss": 0.6357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6230143308639526,
|
|
"step": 3980,
|
|
"valid_targets_mean": 15910.7,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 1.0635174806511876,
|
|
"grad_norm": 0.22484131136152755,
|
|
"learning_rate": 7.981807429418803e-05,
|
|
"loss": 0.6644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6617487072944641,
|
|
"step": 3985,
|
|
"valid_targets_mean": 15697.9,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 1.0648518815052042,
|
|
"grad_norm": 0.20638645597298313,
|
|
"learning_rate": 7.96437340125059e-05,
|
|
"loss": 0.6214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6088519096374512,
|
|
"step": 3990,
|
|
"valid_targets_mean": 17400.2,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.0661862823592207,
|
|
"grad_norm": 0.27568755648856647,
|
|
"learning_rate": 7.946936853829458e-05,
|
|
"loss": 0.6405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6442372798919678,
|
|
"step": 3995,
|
|
"valid_targets_mean": 16478.1,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 1.0675206832132373,
|
|
"grad_norm": 0.3198359769041008,
|
|
"learning_rate": 7.929497881749699e-05,
|
|
"loss": 0.6363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.630896806716919,
|
|
"step": 4000,
|
|
"valid_targets_mean": 17061.9,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 1.0688550840672537,
|
|
"grad_norm": 0.3236960340782802,
|
|
"learning_rate": 7.912056579618759e-05,
|
|
"loss": 0.648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6238499879837036,
|
|
"step": 4005,
|
|
"valid_targets_mean": 16409.8,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 1.0701894849212703,
|
|
"grad_norm": 0.21317090795425317,
|
|
"learning_rate": 7.894613042056721e-05,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6377233266830444,
|
|
"step": 4010,
|
|
"valid_targets_mean": 15927.7,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.071523885775287,
|
|
"grad_norm": 0.3228179754734646,
|
|
"learning_rate": 7.877167363695805e-05,
|
|
"loss": 0.6405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6575232148170471,
|
|
"step": 4015,
|
|
"valid_targets_mean": 15166.5,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 1.0728582866293035,
|
|
"grad_norm": 0.22502051279544055,
|
|
"learning_rate": 7.859719639179834e-05,
|
|
"loss": 0.6227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6041486263275146,
|
|
"step": 4020,
|
|
"valid_targets_mean": 16201.1,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 1.07419268748332,
|
|
"grad_norm": 0.19018623600098714,
|
|
"learning_rate": 7.842269963163735e-05,
|
|
"loss": 0.6403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.615936279296875,
|
|
"step": 4025,
|
|
"valid_targets_mean": 17408.3,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 1.0755270883373365,
|
|
"grad_norm": 0.2633541076294458,
|
|
"learning_rate": 7.824818430313028e-05,
|
|
"loss": 0.635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6207270622253418,
|
|
"step": 4030,
|
|
"valid_targets_mean": 16066.5,
|
|
"valid_targets_min": 57
|
|
},
|
|
{
|
|
"epoch": 1.076861489191353,
|
|
"grad_norm": 0.19894714512203296,
|
|
"learning_rate": 7.807365135303299e-05,
|
|
"loss": 0.6517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6607016324996948,
|
|
"step": 4035,
|
|
"valid_targets_mean": 15828.8,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 1.0781958900453696,
|
|
"grad_norm": 0.2939348493891375,
|
|
"learning_rate": 7.789910172819693e-05,
|
|
"loss": 0.635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6290296316146851,
|
|
"step": 4040,
|
|
"valid_targets_mean": 16706.2,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 1.0795302908993862,
|
|
"grad_norm": 0.27275384805874286,
|
|
"learning_rate": 7.772453637556411e-05,
|
|
"loss": 0.6592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6724493503570557,
|
|
"step": 4045,
|
|
"valid_targets_mean": 16034.1,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 1.0808646917534026,
|
|
"grad_norm": 0.35278405634413196,
|
|
"learning_rate": 7.754995624216176e-05,
|
|
"loss": 0.6371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6188097596168518,
|
|
"step": 4050,
|
|
"valid_targets_mean": 16723.1,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.0821990926074192,
|
|
"grad_norm": 0.26278431869708246,
|
|
"learning_rate": 7.737536227509734e-05,
|
|
"loss": 0.6304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6200742721557617,
|
|
"step": 4055,
|
|
"valid_targets_mean": 15671.6,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 1.0835334934614358,
|
|
"grad_norm": 0.27084560921646084,
|
|
"learning_rate": 7.720075542155336e-05,
|
|
"loss": 0.6645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6747046709060669,
|
|
"step": 4060,
|
|
"valid_targets_mean": 15743.4,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 1.0848678943154524,
|
|
"grad_norm": 0.22793996132664027,
|
|
"learning_rate": 7.702613662878223e-05,
|
|
"loss": 0.6523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6548327803611755,
|
|
"step": 4065,
|
|
"valid_targets_mean": 15993.7,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 1.086202295169469,
|
|
"grad_norm": 0.28041981538949434,
|
|
"learning_rate": 7.685150684410114e-05,
|
|
"loss": 0.6336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6257905960083008,
|
|
"step": 4070,
|
|
"valid_targets_mean": 15352.7,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 1.0875366960234856,
|
|
"grad_norm": 0.2253117839055684,
|
|
"learning_rate": 7.667686701488688e-05,
|
|
"loss": 0.6369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5900462865829468,
|
|
"step": 4075,
|
|
"valid_targets_mean": 17873.3,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 1.088871096877502,
|
|
"grad_norm": 0.23298394292558355,
|
|
"learning_rate": 7.650221808857081e-05,
|
|
"loss": 0.6425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6310970187187195,
|
|
"step": 4080,
|
|
"valid_targets_mean": 16328.8,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.0902054977315185,
|
|
"grad_norm": 0.20823478267229803,
|
|
"learning_rate": 7.632756101263358e-05,
|
|
"loss": 0.6231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5811946392059326,
|
|
"step": 4085,
|
|
"valid_targets_mean": 17497.8,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 1.0915398985855351,
|
|
"grad_norm": 0.28914326624880793,
|
|
"learning_rate": 7.615289673460003e-05,
|
|
"loss": 0.6376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6378998756408691,
|
|
"step": 4090,
|
|
"valid_targets_mean": 14718.8,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 1.0928742994395517,
|
|
"grad_norm": 0.29919918167845155,
|
|
"learning_rate": 7.59782262020341e-05,
|
|
"loss": 0.6268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6432034373283386,
|
|
"step": 4095,
|
|
"valid_targets_mean": 16114.1,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 1.094208700293568,
|
|
"grad_norm": 0.18872135872551207,
|
|
"learning_rate": 7.580355036253372e-05,
|
|
"loss": 0.6381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6325193643569946,
|
|
"step": 4100,
|
|
"valid_targets_mean": 14879.7,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 1.0955431011475847,
|
|
"grad_norm": 0.2170776845688349,
|
|
"learning_rate": 7.562887016372551e-05,
|
|
"loss": 0.6436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6410015225410461,
|
|
"step": 4105,
|
|
"valid_targets_mean": 15726.7,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 1.0968775020016013,
|
|
"grad_norm": 0.3454607950583436,
|
|
"learning_rate": 7.54541865532598e-05,
|
|
"loss": 0.6389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6285821199417114,
|
|
"step": 4110,
|
|
"valid_targets_mean": 16712.9,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 1.0982119028556179,
|
|
"grad_norm": 0.256231016783552,
|
|
"learning_rate": 7.527950047880543e-05,
|
|
"loss": 0.6378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6538169980049133,
|
|
"step": 4115,
|
|
"valid_targets_mean": 15445.9,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 1.0995463037096345,
|
|
"grad_norm": 0.2564298714151834,
|
|
"learning_rate": 7.51048128880446e-05,
|
|
"loss": 0.644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6618050336837769,
|
|
"step": 4120,
|
|
"valid_targets_mean": 14654.4,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.1008807045636508,
|
|
"grad_norm": 0.2391895949624863,
|
|
"learning_rate": 7.493012472866769e-05,
|
|
"loss": 0.6383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6317932605743408,
|
|
"step": 4125,
|
|
"valid_targets_mean": 16660.1,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.1022151054176674,
|
|
"grad_norm": 0.2307958569944933,
|
|
"learning_rate": 7.47554369483682e-05,
|
|
"loss": 0.6371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.656184732913971,
|
|
"step": 4130,
|
|
"valid_targets_mean": 15621.7,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 1.103549506271684,
|
|
"grad_norm": 0.24380743638684044,
|
|
"learning_rate": 7.458075049483762e-05,
|
|
"loss": 0.6299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6304770112037659,
|
|
"step": 4135,
|
|
"valid_targets_mean": 16798.2,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 1.1048839071257006,
|
|
"grad_norm": 0.28731780160095854,
|
|
"learning_rate": 7.44060663157602e-05,
|
|
"loss": 0.6378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6511474251747131,
|
|
"step": 4140,
|
|
"valid_targets_mean": 16375.2,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 1.1062183079797172,
|
|
"grad_norm": 0.2513566937786385,
|
|
"learning_rate": 7.423138535880779e-05,
|
|
"loss": 0.638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6225339770317078,
|
|
"step": 4145,
|
|
"valid_targets_mean": 16275.2,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 1.1075527088337336,
|
|
"grad_norm": 0.40907374807887503,
|
|
"learning_rate": 7.405670857163489e-05,
|
|
"loss": 0.6476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6531241536140442,
|
|
"step": 4150,
|
|
"valid_targets_mean": 16403.7,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.1088871096877502,
|
|
"grad_norm": 0.41217346744153255,
|
|
"learning_rate": 7.388203690187325e-05,
|
|
"loss": 0.6384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6631238460540771,
|
|
"step": 4155,
|
|
"valid_targets_mean": 14551.2,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 1.1102215105417668,
|
|
"grad_norm": 0.33374581286984195,
|
|
"learning_rate": 7.370737129712695e-05,
|
|
"loss": 0.6242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6131796836853027,
|
|
"step": 4160,
|
|
"valid_targets_mean": 16204.9,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 1.1115559113957834,
|
|
"grad_norm": 0.45881666367462687,
|
|
"learning_rate": 7.353271270496713e-05,
|
|
"loss": 0.6458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6824424862861633,
|
|
"step": 4165,
|
|
"valid_targets_mean": 15262.4,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 1.1128903122498,
|
|
"grad_norm": 0.3249252539125189,
|
|
"learning_rate": 7.335806207292687e-05,
|
|
"loss": 0.6408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6273507475852966,
|
|
"step": 4170,
|
|
"valid_targets_mean": 16026.1,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 1.1142247131038163,
|
|
"grad_norm": 0.41682632170083284,
|
|
"learning_rate": 7.31834203484961e-05,
|
|
"loss": 0.6408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6524085402488708,
|
|
"step": 4175,
|
|
"valid_targets_mean": 14977.0,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 1.115559113957833,
|
|
"grad_norm": 0.30109531205332285,
|
|
"learning_rate": 7.300878847911642e-05,
|
|
"loss": 0.6432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6498684883117676,
|
|
"step": 4180,
|
|
"valid_targets_mean": 15246.6,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 1.1168935148118495,
|
|
"grad_norm": 0.3671309009672698,
|
|
"learning_rate": 7.283416741217595e-05,
|
|
"loss": 0.6211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6055911779403687,
|
|
"step": 4185,
|
|
"valid_targets_mean": 17311.5,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 1.118227915665866,
|
|
"grad_norm": 0.3153704556403042,
|
|
"learning_rate": 7.26595580950042e-05,
|
|
"loss": 0.6513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.687164306640625,
|
|
"step": 4190,
|
|
"valid_targets_mean": 15870.5,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 1.1195623165198825,
|
|
"grad_norm": 0.3666269955347262,
|
|
"learning_rate": 7.248496147486695e-05,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6103506088256836,
|
|
"step": 4195,
|
|
"valid_targets_mean": 17057.8,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 1.120896717373899,
|
|
"grad_norm": 0.3438879047175211,
|
|
"learning_rate": 7.23103784989611e-05,
|
|
"loss": 0.6451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6561911106109619,
|
|
"step": 4200,
|
|
"valid_targets_mean": 15884.4,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 1.1222311182279157,
|
|
"grad_norm": 0.2833414402565143,
|
|
"learning_rate": 7.213581011440954e-05,
|
|
"loss": 0.6671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6505024433135986,
|
|
"step": 4205,
|
|
"valid_targets_mean": 16336.9,
|
|
"valid_targets_min": 199
|
|
},
|
|
{
|
|
"epoch": 1.1235655190819323,
|
|
"grad_norm": 0.34970656377825804,
|
|
"learning_rate": 7.1961257268256e-05,
|
|
"loss": 0.6377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6430806517601013,
|
|
"step": 4210,
|
|
"valid_targets_mean": 14787.7,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 1.1248999199359488,
|
|
"grad_norm": 0.3823019646687883,
|
|
"learning_rate": 7.178672090745986e-05,
|
|
"loss": 0.6286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6347396373748779,
|
|
"step": 4215,
|
|
"valid_targets_mean": 16203.0,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 1.1262343207899652,
|
|
"grad_norm": 0.3278569894429977,
|
|
"learning_rate": 7.161220197889114e-05,
|
|
"loss": 0.6409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6750344634056091,
|
|
"step": 4220,
|
|
"valid_targets_mean": 16876.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 1.1275687216439818,
|
|
"grad_norm": 0.3283760936090551,
|
|
"learning_rate": 7.143770142932524e-05,
|
|
"loss": 0.6389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6138772964477539,
|
|
"step": 4225,
|
|
"valid_targets_mean": 17061.0,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 1.1289031224979984,
|
|
"grad_norm": 0.2751031909904081,
|
|
"learning_rate": 7.126322020543785e-05,
|
|
"loss": 0.6336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.62000572681427,
|
|
"step": 4230,
|
|
"valid_targets_mean": 15323.3,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.130237523352015,
|
|
"grad_norm": 0.3125927012088527,
|
|
"learning_rate": 7.108875925379991e-05,
|
|
"loss": 0.6333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6249172687530518,
|
|
"step": 4235,
|
|
"valid_targets_mean": 14841.3,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.1315719242060316,
|
|
"grad_norm": 0.2353496713324635,
|
|
"learning_rate": 7.091431952087221e-05,
|
|
"loss": 0.6535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6486538648605347,
|
|
"step": 4240,
|
|
"valid_targets_mean": 15914.3,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 1.132906325060048,
|
|
"grad_norm": 0.28818023845326846,
|
|
"learning_rate": 7.073990195300058e-05,
|
|
"loss": 0.6308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.616797685623169,
|
|
"step": 4245,
|
|
"valid_targets_mean": 15263.0,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 1.1342407259140646,
|
|
"grad_norm": 0.30990451144746395,
|
|
"learning_rate": 7.056550749641056e-05,
|
|
"loss": 0.6361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6597074270248413,
|
|
"step": 4250,
|
|
"valid_targets_mean": 15883.9,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.1355751267680811,
|
|
"grad_norm": 0.29519057375689733,
|
|
"learning_rate": 7.039113709720227e-05,
|
|
"loss": 0.6367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6335593461990356,
|
|
"step": 4255,
|
|
"valid_targets_mean": 15723.1,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 1.1369095276220977,
|
|
"grad_norm": 0.23344528739206696,
|
|
"learning_rate": 7.021679170134533e-05,
|
|
"loss": 0.6264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6216973662376404,
|
|
"step": 4260,
|
|
"valid_targets_mean": 16099.4,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 1.1382439284761143,
|
|
"grad_norm": 0.28822907095570005,
|
|
"learning_rate": 7.004247225467381e-05,
|
|
"loss": 0.6317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6207790970802307,
|
|
"step": 4265,
|
|
"valid_targets_mean": 16510.5,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 1.1395783293301307,
|
|
"grad_norm": 0.22615557292653501,
|
|
"learning_rate": 6.986817970288082e-05,
|
|
"loss": 0.6356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6095227003097534,
|
|
"step": 4270,
|
|
"valid_targets_mean": 16566.2,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 1.1409127301841473,
|
|
"grad_norm": 0.2789627702109262,
|
|
"learning_rate": 6.969391499151371e-05,
|
|
"loss": 0.6208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.612389326095581,
|
|
"step": 4275,
|
|
"valid_targets_mean": 15183.5,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 1.1422471310381639,
|
|
"grad_norm": 0.19826473078863655,
|
|
"learning_rate": 6.95196790659688e-05,
|
|
"loss": 0.6332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6624350547790527,
|
|
"step": 4280,
|
|
"valid_targets_mean": 15083.0,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 1.1435815318921805,
|
|
"grad_norm": 0.22431913852918883,
|
|
"learning_rate": 6.934547287148614e-05,
|
|
"loss": 0.6453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6406052112579346,
|
|
"step": 4285,
|
|
"valid_targets_mean": 15643.4,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 1.1449159327461969,
|
|
"grad_norm": 0.16484255463144135,
|
|
"learning_rate": 6.917129735314455e-05,
|
|
"loss": 0.6393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6505135297775269,
|
|
"step": 4290,
|
|
"valid_targets_mean": 15640.7,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.1462503336002134,
|
|
"grad_norm": 0.18808915828082584,
|
|
"learning_rate": 6.899715345585649e-05,
|
|
"loss": 0.6215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6421415209770203,
|
|
"step": 4295,
|
|
"valid_targets_mean": 16454.7,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 1.14758473445423,
|
|
"grad_norm": 0.20215904565478326,
|
|
"learning_rate": 6.882304212436272e-05,
|
|
"loss": 0.6413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6256978511810303,
|
|
"step": 4300,
|
|
"valid_targets_mean": 16380.1,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 1.1489191353082466,
|
|
"grad_norm": 0.48019910123488907,
|
|
"learning_rate": 6.864896430322745e-05,
|
|
"loss": 0.6659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7197443246841431,
|
|
"step": 4305,
|
|
"valid_targets_mean": 15080.7,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 1.1502535361622632,
|
|
"grad_norm": 0.18749183197740524,
|
|
"learning_rate": 6.84749209368331e-05,
|
|
"loss": 0.6176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5742081999778748,
|
|
"step": 4310,
|
|
"valid_targets_mean": 17511.4,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 1.1515879370162796,
|
|
"grad_norm": 0.2364048097934658,
|
|
"learning_rate": 6.830091296937509e-05,
|
|
"loss": 0.6325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6291307806968689,
|
|
"step": 4315,
|
|
"valid_targets_mean": 15929.1,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 1.1529223378702962,
|
|
"grad_norm": 0.2601345422541887,
|
|
"learning_rate": 6.812694134485686e-05,
|
|
"loss": 0.6368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6046054363250732,
|
|
"step": 4320,
|
|
"valid_targets_mean": 16293.8,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 1.1542567387243128,
|
|
"grad_norm": 0.2605636939487043,
|
|
"learning_rate": 6.795300700708466e-05,
|
|
"loss": 0.6403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6342687606811523,
|
|
"step": 4325,
|
|
"valid_targets_mean": 16742.1,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 1.1555911395783294,
|
|
"grad_norm": 0.2702252161459794,
|
|
"learning_rate": 6.777911089966245e-05,
|
|
"loss": 0.6278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6475258469581604,
|
|
"step": 4330,
|
|
"valid_targets_mean": 15410.6,
|
|
"valid_targets_min": 59
|
|
},
|
|
{
|
|
"epoch": 1.156925540432346,
|
|
"grad_norm": 0.21623367735516652,
|
|
"learning_rate": 6.760525396598686e-05,
|
|
"loss": 0.6498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6857297420501709,
|
|
"step": 4335,
|
|
"valid_targets_mean": 15698.7,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 1.1582599412863623,
|
|
"grad_norm": 0.2524994678636068,
|
|
"learning_rate": 6.74314371492419e-05,
|
|
"loss": 0.6239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.655139684677124,
|
|
"step": 4340,
|
|
"valid_targets_mean": 15753.3,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 1.159594342140379,
|
|
"grad_norm": 0.35038263626958716,
|
|
"learning_rate": 6.725766139239399e-05,
|
|
"loss": 0.6372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6007696986198425,
|
|
"step": 4345,
|
|
"valid_targets_mean": 15674.3,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.1609287429943955,
|
|
"grad_norm": 0.27171891513965785,
|
|
"learning_rate": 6.708392763818681e-05,
|
|
"loss": 0.6347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6097182035446167,
|
|
"step": 4350,
|
|
"valid_targets_mean": 15697.2,
|
|
"valid_targets_min": 186
|
|
},
|
|
{
|
|
"epoch": 1.1622631438484121,
|
|
"grad_norm": 0.3963280716536439,
|
|
"learning_rate": 6.691023682913616e-05,
|
|
"loss": 0.6488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.665363609790802,
|
|
"step": 4355,
|
|
"valid_targets_mean": 17297.8,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 1.1635975447024287,
|
|
"grad_norm": 0.3304867835821895,
|
|
"learning_rate": 6.673658990752484e-05,
|
|
"loss": 0.6301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6157478094100952,
|
|
"step": 4360,
|
|
"valid_targets_mean": 16411.7,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 1.164931945556445,
|
|
"grad_norm": 0.36259027834719193,
|
|
"learning_rate": 6.656298781539764e-05,
|
|
"loss": 0.6268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6261616945266724,
|
|
"step": 4365,
|
|
"valid_targets_mean": 16823.3,
|
|
"valid_targets_min": 129
|
|
},
|
|
{
|
|
"epoch": 1.1662663464104617,
|
|
"grad_norm": 0.29117128189517166,
|
|
"learning_rate": 6.638943149455598e-05,
|
|
"loss": 0.6417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6324571371078491,
|
|
"step": 4370,
|
|
"valid_targets_mean": 15005.9,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 1.1676007472644783,
|
|
"grad_norm": 0.2521032334673322,
|
|
"learning_rate": 6.621592188655314e-05,
|
|
"loss": 0.6298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6089852452278137,
|
|
"step": 4375,
|
|
"valid_targets_mean": 15565.0,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 1.1689351481184949,
|
|
"grad_norm": 0.3063978273693239,
|
|
"learning_rate": 6.604245993268893e-05,
|
|
"loss": 0.6266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6192407608032227,
|
|
"step": 4380,
|
|
"valid_targets_mean": 16533.9,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 1.1702695489725112,
|
|
"grad_norm": 0.2919718403743578,
|
|
"learning_rate": 6.586904657400457e-05,
|
|
"loss": 0.635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6104006171226501,
|
|
"step": 4385,
|
|
"valid_targets_mean": 15465.3,
|
|
"valid_targets_min": 331
|
|
},
|
|
{
|
|
"epoch": 1.1716039498265278,
|
|
"grad_norm": 0.27954703339928455,
|
|
"learning_rate": 6.569568275127769e-05,
|
|
"loss": 0.6164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6153366565704346,
|
|
"step": 4390,
|
|
"valid_targets_mean": 16239.0,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 1.1729383506805444,
|
|
"grad_norm": 0.2577231730281383,
|
|
"learning_rate": 6.552236940501725e-05,
|
|
"loss": 0.6168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6046464443206787,
|
|
"step": 4395,
|
|
"valid_targets_mean": 14669.8,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 1.174272751534561,
|
|
"grad_norm": 0.27404691900004263,
|
|
"learning_rate": 6.534910747545825e-05,
|
|
"loss": 0.6199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6306606531143188,
|
|
"step": 4400,
|
|
"valid_targets_mean": 15363.8,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 1.1756071523885776,
|
|
"grad_norm": 0.1798445084889397,
|
|
"learning_rate": 6.517589790255686e-05,
|
|
"loss": 0.6488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6698160171508789,
|
|
"step": 4405,
|
|
"valid_targets_mean": 14588.3,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 1.1769415532425942,
|
|
"grad_norm": 0.30087020212970406,
|
|
"learning_rate": 6.500274162598512e-05,
|
|
"loss": 0.6411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6415399312973022,
|
|
"step": 4410,
|
|
"valid_targets_mean": 15331.5,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.1782759540966106,
|
|
"grad_norm": 0.22891062443425242,
|
|
"learning_rate": 6.482963958512599e-05,
|
|
"loss": 0.6342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6426824927330017,
|
|
"step": 4415,
|
|
"valid_targets_mean": 15908.3,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 1.1796103549506272,
|
|
"grad_norm": 0.25552739097544924,
|
|
"learning_rate": 6.465659271906823e-05,
|
|
"loss": 0.6277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6185193061828613,
|
|
"step": 4420,
|
|
"valid_targets_mean": 16322.4,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 1.1809447558046438,
|
|
"grad_norm": 0.25044588493176134,
|
|
"learning_rate": 6.448360196660116e-05,
|
|
"loss": 0.6371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6124670505523682,
|
|
"step": 4425,
|
|
"valid_targets_mean": 16749.6,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 1.1822791566586603,
|
|
"grad_norm": 0.19454645206623475,
|
|
"learning_rate": 6.43106682662098e-05,
|
|
"loss": 0.629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6282304525375366,
|
|
"step": 4430,
|
|
"valid_targets_mean": 15491.6,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 1.1836135575126767,
|
|
"grad_norm": 0.2540826353003291,
|
|
"learning_rate": 6.413779255606961e-05,
|
|
"loss": 0.6173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6295236349105835,
|
|
"step": 4435,
|
|
"valid_targets_mean": 15087.2,
|
|
"valid_targets_min": 167
|
|
},
|
|
{
|
|
"epoch": 1.1849479583666933,
|
|
"grad_norm": 0.3649774351284495,
|
|
"learning_rate": 6.396497577404143e-05,
|
|
"loss": 0.6455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.64961838722229,
|
|
"step": 4440,
|
|
"valid_targets_mean": 16165.2,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 1.18628235922071,
|
|
"grad_norm": 0.3090907649636376,
|
|
"learning_rate": 6.379221885766643e-05,
|
|
"loss": 0.6293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6002676486968994,
|
|
"step": 4445,
|
|
"valid_targets_mean": 16184.6,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 1.1876167600747265,
|
|
"grad_norm": 0.2670827791758813,
|
|
"learning_rate": 6.361952274416106e-05,
|
|
"loss": 0.6231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6144086122512817,
|
|
"step": 4450,
|
|
"valid_targets_mean": 17209.1,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 1.188951160928743,
|
|
"grad_norm": 0.2762036773882695,
|
|
"learning_rate": 6.344688837041177e-05,
|
|
"loss": 0.6262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.61281418800354,
|
|
"step": 4455,
|
|
"valid_targets_mean": 18342.9,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 1.1902855617827595,
|
|
"grad_norm": 0.36065831326273906,
|
|
"learning_rate": 6.32743166729702e-05,
|
|
"loss": 0.6359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6284192204475403,
|
|
"step": 4460,
|
|
"valid_targets_mean": 16182.9,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 1.191619962636776,
|
|
"grad_norm": 0.23353933817304148,
|
|
"learning_rate": 6.310180858804794e-05,
|
|
"loss": 0.6485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6543843150138855,
|
|
"step": 4465,
|
|
"valid_targets_mean": 15583.2,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 1.1929543634907926,
|
|
"grad_norm": 0.3570787968656111,
|
|
"learning_rate": 6.292936505151145e-05,
|
|
"loss": 0.6412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6564721465110779,
|
|
"step": 4470,
|
|
"valid_targets_mean": 14519.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 1.1942887643448092,
|
|
"grad_norm": 0.36008419141471126,
|
|
"learning_rate": 6.275698699887699e-05,
|
|
"loss": 0.6161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6520239114761353,
|
|
"step": 4475,
|
|
"valid_targets_mean": 16250.3,
|
|
"valid_targets_min": 53
|
|
},
|
|
{
|
|
"epoch": 1.1956231651988256,
|
|
"grad_norm": 0.3207534581185475,
|
|
"learning_rate": 6.258467536530565e-05,
|
|
"loss": 0.6344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6185310482978821,
|
|
"step": 4480,
|
|
"valid_targets_mean": 15550.9,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.1969575660528422,
|
|
"grad_norm": 0.2775970978135452,
|
|
"learning_rate": 6.241243108559811e-05,
|
|
"loss": 0.6354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6161234378814697,
|
|
"step": 4485,
|
|
"valid_targets_mean": 16199.4,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 1.1982919669068588,
|
|
"grad_norm": 0.3128052037762757,
|
|
"learning_rate": 6.224025509418971e-05,
|
|
"loss": 0.6491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6437747478485107,
|
|
"step": 4490,
|
|
"valid_targets_mean": 14918.8,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 1.1996263677608754,
|
|
"grad_norm": 0.3206313524507226,
|
|
"learning_rate": 6.206814832514525e-05,
|
|
"loss": 0.6343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6561208963394165,
|
|
"step": 4495,
|
|
"valid_targets_mean": 15631.4,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.200960768614892,
|
|
"grad_norm": 0.29463076747279565,
|
|
"learning_rate": 6.189611171215405e-05,
|
|
"loss": 0.6299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6470526456832886,
|
|
"step": 4500,
|
|
"valid_targets_mean": 16196.3,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 1.2022951694689086,
|
|
"grad_norm": 0.21321940478714968,
|
|
"learning_rate": 6.172414618852488e-05,
|
|
"loss": 0.6394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6385337114334106,
|
|
"step": 4505,
|
|
"valid_targets_mean": 17155.1,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 1.203629570322925,
|
|
"grad_norm": 0.33018462000086,
|
|
"learning_rate": 6.155225268718069e-05,
|
|
"loss": 0.6193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6546652913093567,
|
|
"step": 4510,
|
|
"valid_targets_mean": 15543.9,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 1.2049639711769415,
|
|
"grad_norm": 0.2191433153569452,
|
|
"learning_rate": 6.138043214065385e-05,
|
|
"loss": 0.6274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6190072298049927,
|
|
"step": 4515,
|
|
"valid_targets_mean": 16248.8,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 1.2062983720309581,
|
|
"grad_norm": 0.2941732327678985,
|
|
"learning_rate": 6.120868548108093e-05,
|
|
"loss": 0.6194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6256512403488159,
|
|
"step": 4520,
|
|
"valid_targets_mean": 15419.4,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 1.2076327728849747,
|
|
"grad_norm": 0.27394641869706676,
|
|
"learning_rate": 6.103701364019754e-05,
|
|
"loss": 0.6327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.625571608543396,
|
|
"step": 4525,
|
|
"valid_targets_mean": 16626.2,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 1.208967173738991,
|
|
"grad_norm": 0.3562152679188747,
|
|
"learning_rate": 6.0865417549333536e-05,
|
|
"loss": 0.6431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6560186147689819,
|
|
"step": 4530,
|
|
"valid_targets_mean": 15384.8,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 1.2103015745930077,
|
|
"grad_norm": 0.2864401824703139,
|
|
"learning_rate": 6.0693898139407786e-05,
|
|
"loss": 0.6433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6474707126617432,
|
|
"step": 4535,
|
|
"valid_targets_mean": 15037.6,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 1.2116359754470243,
|
|
"grad_norm": 0.3635249627954564,
|
|
"learning_rate": 6.0522456340923085e-05,
|
|
"loss": 0.6261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6389481425285339,
|
|
"step": 4540,
|
|
"valid_targets_mean": 15964.4,
|
|
"valid_targets_min": 150
|
|
},
|
|
{
|
|
"epoch": 1.2129703763010409,
|
|
"grad_norm": 0.25685540368852067,
|
|
"learning_rate": 6.0351093083961267e-05,
|
|
"loss": 0.6203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6290313005447388,
|
|
"step": 4545,
|
|
"valid_targets_mean": 15779.2,
|
|
"valid_targets_min": 26
|
|
},
|
|
{
|
|
"epoch": 1.2143047771550575,
|
|
"grad_norm": 0.29619248753111965,
|
|
"learning_rate": 6.017980929817807e-05,
|
|
"loss": 0.6292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6584930419921875,
|
|
"step": 4550,
|
|
"valid_targets_mean": 16471.1,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 1.2156391780090738,
|
|
"grad_norm": 0.28564040063006574,
|
|
"learning_rate": 6.000860591279801e-05,
|
|
"loss": 0.6325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6416076421737671,
|
|
"step": 4555,
|
|
"valid_targets_mean": 14785.7,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 1.2169735788630904,
|
|
"grad_norm": 0.3510476897469252,
|
|
"learning_rate": 5.9837483856609527e-05,
|
|
"loss": 0.6353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6294372081756592,
|
|
"step": 4560,
|
|
"valid_targets_mean": 16263.0,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.218307979717107,
|
|
"grad_norm": 0.3394073281836747,
|
|
"learning_rate": 5.966644405795982e-05,
|
|
"loss": 0.631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6662023067474365,
|
|
"step": 4565,
|
|
"valid_targets_mean": 16064.6,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 1.2196423805711236,
|
|
"grad_norm": 0.30884667299242735,
|
|
"learning_rate": 5.949548744474978e-05,
|
|
"loss": 0.6214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6019168496131897,
|
|
"step": 4570,
|
|
"valid_targets_mean": 15504.7,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 1.22097678142514,
|
|
"grad_norm": 0.26457608857420895,
|
|
"learning_rate": 5.9324614944429095e-05,
|
|
"loss": 0.6387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6279822587966919,
|
|
"step": 4575,
|
|
"valid_targets_mean": 15599.3,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 1.2223111822791566,
|
|
"grad_norm": 0.2621594312135588,
|
|
"learning_rate": 5.915382748399105e-05,
|
|
"loss": 0.6431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.625961184501648,
|
|
"step": 4580,
|
|
"valid_targets_mean": 16323.6,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 1.2236455831331732,
|
|
"grad_norm": 0.28013919204235443,
|
|
"learning_rate": 5.8983125989967646e-05,
|
|
"loss": 0.6493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6862317323684692,
|
|
"step": 4585,
|
|
"valid_targets_mean": 16439.2,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 1.2249799839871898,
|
|
"grad_norm": 0.2811201149430358,
|
|
"learning_rate": 5.881251138842453e-05,
|
|
"loss": 0.6277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5923159122467041,
|
|
"step": 4590,
|
|
"valid_targets_mean": 16460.1,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 1.2263143848412064,
|
|
"grad_norm": 0.22967843281129904,
|
|
"learning_rate": 5.864198460495585e-05,
|
|
"loss": 0.6351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6291022300720215,
|
|
"step": 4595,
|
|
"valid_targets_mean": 15312.0,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 1.227648785695223,
|
|
"grad_norm": 0.23520044247290123,
|
|
"learning_rate": 5.847154656467944e-05,
|
|
"loss": 0.6319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6028156280517578,
|
|
"step": 4600,
|
|
"valid_targets_mean": 16651.4,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 1.2289831865492393,
|
|
"grad_norm": 0.26946665323032615,
|
|
"learning_rate": 5.830119819223166e-05,
|
|
"loss": 0.6267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6300636529922485,
|
|
"step": 4605,
|
|
"valid_targets_mean": 16018.6,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.230317587403256,
|
|
"grad_norm": 0.32477605566846685,
|
|
"learning_rate": 5.8130940411762406e-05,
|
|
"loss": 0.6249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6440907716751099,
|
|
"step": 4610,
|
|
"valid_targets_mean": 17033.9,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 1.2316519882572725,
|
|
"grad_norm": 0.23081030208781445,
|
|
"learning_rate": 5.79607741469301e-05,
|
|
"loss": 0.6445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6172423958778381,
|
|
"step": 4615,
|
|
"valid_targets_mean": 16389.6,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.232986389111289,
|
|
"grad_norm": 0.31649508316647457,
|
|
"learning_rate": 5.779070032089674e-05,
|
|
"loss": 0.6387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6402013301849365,
|
|
"step": 4620,
|
|
"valid_targets_mean": 15918.7,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 1.2343207899653055,
|
|
"grad_norm": 0.19452765805479708,
|
|
"learning_rate": 5.762071985632276e-05,
|
|
"loss": 0.6263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6300052404403687,
|
|
"step": 4625,
|
|
"valid_targets_mean": 16835.9,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 1.235655190819322,
|
|
"grad_norm": 0.2823409706822507,
|
|
"learning_rate": 5.745083367536217e-05,
|
|
"loss": 0.6245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.620025634765625,
|
|
"step": 4630,
|
|
"valid_targets_mean": 15694.7,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 1.2369895916733387,
|
|
"grad_norm": 0.26476187073764795,
|
|
"learning_rate": 5.7281042699657465e-05,
|
|
"loss": 0.6388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6380904912948608,
|
|
"step": 4635,
|
|
"valid_targets_mean": 16004.7,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 1.2383239925273553,
|
|
"grad_norm": 0.3134755420445105,
|
|
"learning_rate": 5.71113478503346e-05,
|
|
"loss": 0.633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6498226523399353,
|
|
"step": 4640,
|
|
"valid_targets_mean": 16175.9,
|
|
"valid_targets_min": 27
|
|
},
|
|
{
|
|
"epoch": 1.2396583933813718,
|
|
"grad_norm": 0.20770097652952002,
|
|
"learning_rate": 5.694175004799814e-05,
|
|
"loss": 0.6361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6252698302268982,
|
|
"step": 4645,
|
|
"valid_targets_mean": 16053.2,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 1.2409927942353882,
|
|
"grad_norm": 0.3212021535054955,
|
|
"learning_rate": 5.6772250212726025e-05,
|
|
"loss": 0.6229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6281235218048096,
|
|
"step": 4650,
|
|
"valid_targets_mean": 14775.4,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 1.2423271950894048,
|
|
"grad_norm": 0.2152007420515824,
|
|
"learning_rate": 5.660284926406484e-05,
|
|
"loss": 0.6241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6370538473129272,
|
|
"step": 4655,
|
|
"valid_targets_mean": 16842.9,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.2436615959434214,
|
|
"grad_norm": 0.31336835020726617,
|
|
"learning_rate": 5.64335481210247e-05,
|
|
"loss": 0.6395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6438559293746948,
|
|
"step": 4660,
|
|
"valid_targets_mean": 17387.4,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 1.244995996797438,
|
|
"grad_norm": 0.23760951337333752,
|
|
"learning_rate": 5.626434770207414e-05,
|
|
"loss": 0.6264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6267757415771484,
|
|
"step": 4665,
|
|
"valid_targets_mean": 16263.9,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 1.2463303976514546,
|
|
"grad_norm": 0.23155203486515635,
|
|
"learning_rate": 5.60952489251354e-05,
|
|
"loss": 0.627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6151365041732788,
|
|
"step": 4670,
|
|
"valid_targets_mean": 17165.5,
|
|
"valid_targets_min": 39
|
|
},
|
|
{
|
|
"epoch": 1.247664798505471,
|
|
"grad_norm": 0.22217206648146606,
|
|
"learning_rate": 5.592625270757928e-05,
|
|
"loss": 0.6383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6513705253601074,
|
|
"step": 4675,
|
|
"valid_targets_mean": 14673.6,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 1.2489991993594876,
|
|
"grad_norm": 0.22140652238088304,
|
|
"learning_rate": 5.575735996622011e-05,
|
|
"loss": 0.6178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6008556485176086,
|
|
"step": 4680,
|
|
"valid_targets_mean": 17656.0,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 1.2503336002135041,
|
|
"grad_norm": 0.19959940963850556,
|
|
"learning_rate": 5.558857161731093e-05,
|
|
"loss": 0.6328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6737499833106995,
|
|
"step": 4685,
|
|
"valid_targets_mean": 15595.3,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 1.2516680010675207,
|
|
"grad_norm": 0.2504724859467704,
|
|
"learning_rate": 5.541988857653847e-05,
|
|
"loss": 0.6305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6380608081817627,
|
|
"step": 4690,
|
|
"valid_targets_mean": 15766.5,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 1.2530024019215373,
|
|
"grad_norm": 0.2672212181599046,
|
|
"learning_rate": 5.5251311759018046e-05,
|
|
"loss": 0.6289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6470935344696045,
|
|
"step": 4695,
|
|
"valid_targets_mean": 15265.7,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.2543368027755537,
|
|
"grad_norm": 0.17364708553119368,
|
|
"learning_rate": 5.508284207928879e-05,
|
|
"loss": 0.6305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6420101523399353,
|
|
"step": 4700,
|
|
"valid_targets_mean": 15472.1,
|
|
"valid_targets_min": 29
|
|
},
|
|
{
|
|
"epoch": 1.2556712036295703,
|
|
"grad_norm": 0.1975931861065749,
|
|
"learning_rate": 5.491448045130865e-05,
|
|
"loss": 0.6244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6059800386428833,
|
|
"step": 4705,
|
|
"valid_targets_mean": 17265.4,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 1.257005604483587,
|
|
"grad_norm": 0.19654584278709405,
|
|
"learning_rate": 5.4746227788449236e-05,
|
|
"loss": 0.6192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5940755605697632,
|
|
"step": 4710,
|
|
"valid_targets_mean": 17243.3,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 1.2583400053376035,
|
|
"grad_norm": 0.17652417356658778,
|
|
"learning_rate": 5.457808500349115e-05,
|
|
"loss": 0.633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6395894289016724,
|
|
"step": 4715,
|
|
"valid_targets_mean": 15693.4,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.2596744061916199,
|
|
"grad_norm": 0.21081149707273059,
|
|
"learning_rate": 5.441005300861887e-05,
|
|
"loss": 0.6271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.645363986492157,
|
|
"step": 4720,
|
|
"valid_targets_mean": 15345.9,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 1.2610088070456364,
|
|
"grad_norm": 0.21219622352906828,
|
|
"learning_rate": 5.424213271541578e-05,
|
|
"loss": 0.6518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6449532508850098,
|
|
"step": 4725,
|
|
"valid_targets_mean": 16548.9,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 1.262343207899653,
|
|
"grad_norm": 0.20776927168613465,
|
|
"learning_rate": 5.4074325034859336e-05,
|
|
"loss": 0.6266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6154712438583374,
|
|
"step": 4730,
|
|
"valid_targets_mean": 15414.3,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 1.2636776087536696,
|
|
"grad_norm": 0.14797261024718483,
|
|
"learning_rate": 5.390663087731605e-05,
|
|
"loss": 0.641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6244392395019531,
|
|
"step": 4735,
|
|
"valid_targets_mean": 16484.8,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 1.2650120096076862,
|
|
"grad_norm": 0.23990355060984186,
|
|
"learning_rate": 5.3739051152536506e-05,
|
|
"loss": 0.6199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6142117977142334,
|
|
"step": 4740,
|
|
"valid_targets_mean": 16228.0,
|
|
"valid_targets_min": 56
|
|
},
|
|
{
|
|
"epoch": 1.2663464104617028,
|
|
"grad_norm": 0.22640906977866915,
|
|
"learning_rate": 5.357158676965059e-05,
|
|
"loss": 0.634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6549844741821289,
|
|
"step": 4745,
|
|
"valid_targets_mean": 16698.5,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 1.2676808113157192,
|
|
"grad_norm": 0.2735033998795201,
|
|
"learning_rate": 5.3404238637162364e-05,
|
|
"loss": 0.6364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6428447961807251,
|
|
"step": 4750,
|
|
"valid_targets_mean": 16238.8,
|
|
"valid_targets_min": 118
|
|
},
|
|
{
|
|
"epoch": 1.2690152121697358,
|
|
"grad_norm": 0.2462400805867784,
|
|
"learning_rate": 5.323700766294526e-05,
|
|
"loss": 0.6269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6167312264442444,
|
|
"step": 4755,
|
|
"valid_targets_mean": 15054.7,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 1.2703496130237524,
|
|
"grad_norm": 0.18587242356920117,
|
|
"learning_rate": 5.306989475423712e-05,
|
|
"loss": 0.625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6305737495422363,
|
|
"step": 4760,
|
|
"valid_targets_mean": 16805.6,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 1.2716840138777687,
|
|
"grad_norm": 0.2021984168637943,
|
|
"learning_rate": 5.2902900817635264e-05,
|
|
"loss": 0.6395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6341131925582886,
|
|
"step": 4765,
|
|
"valid_targets_mean": 14891.0,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 1.2730184147317853,
|
|
"grad_norm": 0.1721371921090052,
|
|
"learning_rate": 5.273602675909159e-05,
|
|
"loss": 0.6528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6565139293670654,
|
|
"step": 4770,
|
|
"valid_targets_mean": 16145.6,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 1.274352815585802,
|
|
"grad_norm": 0.1607326214209887,
|
|
"learning_rate": 5.256927348390765e-05,
|
|
"loss": 0.6384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6334860920906067,
|
|
"step": 4775,
|
|
"valid_targets_mean": 16294.3,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 1.2756872164398185,
|
|
"grad_norm": 0.16235131734879868,
|
|
"learning_rate": 5.24026418967297e-05,
|
|
"loss": 0.6329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6610786318778992,
|
|
"step": 4780,
|
|
"valid_targets_mean": 15935.6,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 1.2770216172938351,
|
|
"grad_norm": 0.16419031382936092,
|
|
"learning_rate": 5.2236132901543895e-05,
|
|
"loss": 0.6168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6397436261177063,
|
|
"step": 4785,
|
|
"valid_targets_mean": 15607.6,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 1.2783560181478517,
|
|
"grad_norm": 0.1987148988456433,
|
|
"learning_rate": 5.2069747401671334e-05,
|
|
"loss": 0.6283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6338160037994385,
|
|
"step": 4790,
|
|
"valid_targets_mean": 15817.7,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 1.279690419001868,
|
|
"grad_norm": 0.17343024441593377,
|
|
"learning_rate": 5.1903486299763026e-05,
|
|
"loss": 0.6343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.607448935508728,
|
|
"step": 4795,
|
|
"valid_targets_mean": 16366.8,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.2810248198558847,
|
|
"grad_norm": 0.16366990841190826,
|
|
"learning_rate": 5.173735049779523e-05,
|
|
"loss": 0.6427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.623712956905365,
|
|
"step": 4800,
|
|
"valid_targets_mean": 16776.7,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.2823592207099013,
|
|
"grad_norm": 0.19917720907875183,
|
|
"learning_rate": 5.1571340897064454e-05,
|
|
"loss": 0.6323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6857748627662659,
|
|
"step": 4805,
|
|
"valid_targets_mean": 14956.1,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 1.2836936215639179,
|
|
"grad_norm": 0.3813637628640378,
|
|
"learning_rate": 5.140545839818242e-05,
|
|
"loss": 0.6201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.605983316898346,
|
|
"step": 4810,
|
|
"valid_targets_mean": 16493.7,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 1.2850280224179342,
|
|
"grad_norm": 0.18836472575490848,
|
|
"learning_rate": 5.1239703901071506e-05,
|
|
"loss": 0.6119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6219553351402283,
|
|
"step": 4815,
|
|
"valid_targets_mean": 16133.3,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 1.2863624232719508,
|
|
"grad_norm": 0.1893647179336856,
|
|
"learning_rate": 5.1074078304959474e-05,
|
|
"loss": 0.6274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6246172785758972,
|
|
"step": 4820,
|
|
"valid_targets_mean": 15314.3,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 1.2876968241259674,
|
|
"grad_norm": 0.1584138984615297,
|
|
"learning_rate": 5.090858250837495e-05,
|
|
"loss": 0.6358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6141932606697083,
|
|
"step": 4825,
|
|
"valid_targets_mean": 15665.5,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 1.289031224979984,
|
|
"grad_norm": 0.18886991213724974,
|
|
"learning_rate": 5.0743217409142344e-05,
|
|
"loss": 0.6267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5984083414077759,
|
|
"step": 4830,
|
|
"valid_targets_mean": 17682.3,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 1.2903656258340006,
|
|
"grad_norm": 0.1787055361455221,
|
|
"learning_rate": 5.057798390437696e-05,
|
|
"loss": 0.6363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6204890012741089,
|
|
"step": 4835,
|
|
"valid_targets_mean": 17329.2,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 1.2917000266880172,
|
|
"grad_norm": 0.2697400634607623,
|
|
"learning_rate": 5.0412882890480266e-05,
|
|
"loss": 0.6312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6120416522026062,
|
|
"step": 4840,
|
|
"valid_targets_mean": 16662.8,
|
|
"valid_targets_min": 381
|
|
},
|
|
{
|
|
"epoch": 1.2930344275420336,
|
|
"grad_norm": 0.18808204647018414,
|
|
"learning_rate": 5.0247915263134984e-05,
|
|
"loss": 0.6364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6301390528678894,
|
|
"step": 4845,
|
|
"valid_targets_mean": 16133.6,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 1.2943688283960502,
|
|
"grad_norm": 0.18255274583932535,
|
|
"learning_rate": 5.0083081917300086e-05,
|
|
"loss": 0.6501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6858794689178467,
|
|
"step": 4850,
|
|
"valid_targets_mean": 16014.1,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 1.2957032292500668,
|
|
"grad_norm": 0.1655884067769675,
|
|
"learning_rate": 4.991838374720618e-05,
|
|
"loss": 0.6462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6166006326675415,
|
|
"step": 4855,
|
|
"valid_targets_mean": 15985.5,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.2970376301040831,
|
|
"grad_norm": 0.17322275617538,
|
|
"learning_rate": 4.975382164635051e-05,
|
|
"loss": 0.6126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6046770215034485,
|
|
"step": 4860,
|
|
"valid_targets_mean": 15465.7,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 1.2983720309580997,
|
|
"grad_norm": 0.15974272488697086,
|
|
"learning_rate": 4.9589396507492044e-05,
|
|
"loss": 0.6373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6578046083450317,
|
|
"step": 4865,
|
|
"valid_targets_mean": 16058.8,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 1.2997064318121163,
|
|
"grad_norm": 0.18475723438811187,
|
|
"learning_rate": 4.942510922264686e-05,
|
|
"loss": 0.634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5904414057731628,
|
|
"step": 4870,
|
|
"valid_targets_mean": 16700.1,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 1.301040832666133,
|
|
"grad_norm": 0.1698843941112048,
|
|
"learning_rate": 4.926096068308312e-05,
|
|
"loss": 0.6265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6341725587844849,
|
|
"step": 4875,
|
|
"valid_targets_mean": 15507.0,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 1.3023752335201495,
|
|
"grad_norm": 0.14543667801391244,
|
|
"learning_rate": 4.90969517793162e-05,
|
|
"loss": 0.6388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6560980081558228,
|
|
"step": 4880,
|
|
"valid_targets_mean": 16731.6,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 1.303709634374166,
|
|
"grad_norm": 0.14532183493811077,
|
|
"learning_rate": 4.893308340110407e-05,
|
|
"loss": 0.6356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6323143243789673,
|
|
"step": 4885,
|
|
"valid_targets_mean": 15907.5,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 1.3050440352281825,
|
|
"grad_norm": 0.2876475131773128,
|
|
"learning_rate": 4.876935643744229e-05,
|
|
"loss": 0.6411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6975796818733215,
|
|
"step": 4890,
|
|
"valid_targets_mean": 15803.8,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 1.306378436082199,
|
|
"grad_norm": 0.2102077296249055,
|
|
"learning_rate": 4.860577177655922e-05,
|
|
"loss": 0.6362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6357290148735046,
|
|
"step": 4895,
|
|
"valid_targets_mean": 16207.7,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 1.3077128369362157,
|
|
"grad_norm": 0.18759868555822456,
|
|
"learning_rate": 4.844233030591122e-05,
|
|
"loss": 0.6321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6458548307418823,
|
|
"step": 4900,
|
|
"valid_targets_mean": 16728.7,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 1.3090472377902322,
|
|
"grad_norm": 0.18943513623219738,
|
|
"learning_rate": 4.827903291217785e-05,
|
|
"loss": 0.6428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6549099087715149,
|
|
"step": 4905,
|
|
"valid_targets_mean": 16717.1,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 1.3103816386442486,
|
|
"grad_norm": 0.14584218135147162,
|
|
"learning_rate": 4.8115880481257066e-05,
|
|
"loss": 0.6271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.604077935218811,
|
|
"step": 4910,
|
|
"valid_targets_mean": 16537.5,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 1.3117160394982652,
|
|
"grad_norm": 0.15614884433868975,
|
|
"learning_rate": 4.795287389826035e-05,
|
|
"loss": 0.6139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6058860421180725,
|
|
"step": 4915,
|
|
"valid_targets_mean": 15564.1,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 1.3130504403522818,
|
|
"grad_norm": 0.19647445810549968,
|
|
"learning_rate": 4.779001404750797e-05,
|
|
"loss": 0.6234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6127307415008545,
|
|
"step": 4920,
|
|
"valid_targets_mean": 16563.2,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 1.3143848412062984,
|
|
"grad_norm": 0.15351376101927297,
|
|
"learning_rate": 4.762730181252415e-05,
|
|
"loss": 0.6248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6400808095932007,
|
|
"step": 4925,
|
|
"valid_targets_mean": 15953.7,
|
|
"valid_targets_min": 25
|
|
},
|
|
{
|
|
"epoch": 1.315719242060315,
|
|
"grad_norm": 0.19129910797630822,
|
|
"learning_rate": 4.746473807603235e-05,
|
|
"loss": 0.6481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6250849962234497,
|
|
"step": 4930,
|
|
"valid_targets_mean": 15565.2,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 1.3170536429143316,
|
|
"grad_norm": 0.24069633672786006,
|
|
"learning_rate": 4.730232371995029e-05,
|
|
"loss": 0.6273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6520248651504517,
|
|
"step": 4935,
|
|
"valid_targets_mean": 16213.8,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 1.318388043768348,
|
|
"grad_norm": 0.206513608573115,
|
|
"learning_rate": 4.714005962538543e-05,
|
|
"loss": 0.6367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.634706974029541,
|
|
"step": 4940,
|
|
"valid_targets_mean": 17679.6,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.3197224446223645,
|
|
"grad_norm": 0.24134880808451342,
|
|
"learning_rate": 4.6977946672630004e-05,
|
|
"loss": 0.63,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.607745885848999,
|
|
"step": 4945,
|
|
"valid_targets_mean": 17591.5,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 1.3210568454763811,
|
|
"grad_norm": 0.1801320898077436,
|
|
"learning_rate": 4.681598574115622e-05,
|
|
"loss": 0.6287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6350492835044861,
|
|
"step": 4950,
|
|
"valid_targets_mean": 16330.3,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 1.3223912463303977,
|
|
"grad_norm": 0.1738518510160165,
|
|
"learning_rate": 4.665417770961166e-05,
|
|
"loss": 0.6276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6333613395690918,
|
|
"step": 4955,
|
|
"valid_targets_mean": 15356.5,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.323725647184414,
|
|
"grad_norm": 0.15642467135401047,
|
|
"learning_rate": 4.6492523455814415e-05,
|
|
"loss": 0.6094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5787743330001831,
|
|
"step": 4960,
|
|
"valid_targets_mean": 15801.2,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 1.3250600480384307,
|
|
"grad_norm": 0.149988243905769,
|
|
"learning_rate": 4.633102385674821e-05,
|
|
"loss": 0.6304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6080968379974365,
|
|
"step": 4965,
|
|
"valid_targets_mean": 16495.6,
|
|
"valid_targets_min": 123
|
|
},
|
|
{
|
|
"epoch": 1.3263944488924473,
|
|
"grad_norm": 0.1564258726734769,
|
|
"learning_rate": 4.616967978855788e-05,
|
|
"loss": 0.6144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6178469657897949,
|
|
"step": 4970,
|
|
"valid_targets_mean": 15740.2,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.3277288497464639,
|
|
"grad_norm": 0.15096598858177918,
|
|
"learning_rate": 4.6008492126544476e-05,
|
|
"loss": 0.6229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6312127113342285,
|
|
"step": 4975,
|
|
"valid_targets_mean": 15290.4,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 1.3290632506004805,
|
|
"grad_norm": 0.16745460403329684,
|
|
"learning_rate": 4.584746174516045e-05,
|
|
"loss": 0.6172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5870546102523804,
|
|
"step": 4980,
|
|
"valid_targets_mean": 17124.3,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 1.330397651454497,
|
|
"grad_norm": 0.1986477705300295,
|
|
"learning_rate": 4.568658951800512e-05,
|
|
"loss": 0.6276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6241950988769531,
|
|
"step": 4985,
|
|
"valid_targets_mean": 14701.8,
|
|
"valid_targets_min": 381
|
|
},
|
|
{
|
|
"epoch": 1.3317320523085134,
|
|
"grad_norm": 0.2503093418138642,
|
|
"learning_rate": 4.552587631781969e-05,
|
|
"loss": 0.6104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6214447617530823,
|
|
"step": 4990,
|
|
"valid_targets_mean": 17417.6,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 1.33306645316253,
|
|
"grad_norm": 0.16628560708840986,
|
|
"learning_rate": 4.536532301648271e-05,
|
|
"loss": 0.6313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6199056506156921,
|
|
"step": 4995,
|
|
"valid_targets_mean": 16565.2,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 1.3344008540165466,
|
|
"grad_norm": 0.25358311504745584,
|
|
"learning_rate": 4.5204930485005306e-05,
|
|
"loss": 0.6297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6275741457939148,
|
|
"step": 5000,
|
|
"valid_targets_mean": 16034.8,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 1.335735254870563,
|
|
"grad_norm": 0.22096396426353762,
|
|
"learning_rate": 4.504469959352627e-05,
|
|
"loss": 0.6221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6337085366249084,
|
|
"step": 5005,
|
|
"valid_targets_mean": 15851.9,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 1.3370696557245796,
|
|
"grad_norm": 0.17905523831113826,
|
|
"learning_rate": 4.488463121130762e-05,
|
|
"loss": 0.6239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6013862490653992,
|
|
"step": 5010,
|
|
"valid_targets_mean": 16295.4,
|
|
"valid_targets_min": 199
|
|
},
|
|
{
|
|
"epoch": 1.3384040565785962,
|
|
"grad_norm": 0.18892903740727368,
|
|
"learning_rate": 4.472472620672976e-05,
|
|
"loss": 0.621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6233558654785156,
|
|
"step": 5015,
|
|
"valid_targets_mean": 14884.6,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 1.3397384574326128,
|
|
"grad_norm": 0.1960202408759845,
|
|
"learning_rate": 4.4564985447286614e-05,
|
|
"loss": 0.6421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6817205548286438,
|
|
"step": 5020,
|
|
"valid_targets_mean": 13823.1,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.3410728582866294,
|
|
"grad_norm": 0.16756027953019842,
|
|
"learning_rate": 4.440540979958124e-05,
|
|
"loss": 0.6355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6471720337867737,
|
|
"step": 5025,
|
|
"valid_targets_mean": 15493.6,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 1.342407259140646,
|
|
"grad_norm": 0.17529206291663108,
|
|
"learning_rate": 4.4246000129320867e-05,
|
|
"loss": 0.6356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6711809635162354,
|
|
"step": 5030,
|
|
"valid_targets_mean": 14982.9,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 1.3437416599946623,
|
|
"grad_norm": 0.19955952977785812,
|
|
"learning_rate": 4.408675730131227e-05,
|
|
"loss": 0.6303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6456050872802734,
|
|
"step": 5035,
|
|
"valid_targets_mean": 15447.3,
|
|
"valid_targets_min": 157
|
|
},
|
|
{
|
|
"epoch": 1.345076060848679,
|
|
"grad_norm": 0.16163297867636223,
|
|
"learning_rate": 4.3927682179457144e-05,
|
|
"loss": 0.6183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6260631084442139,
|
|
"step": 5040,
|
|
"valid_targets_mean": 15513.2,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 1.3464104617026955,
|
|
"grad_norm": 0.18520629633766658,
|
|
"learning_rate": 4.376877562674737e-05,
|
|
"loss": 0.6267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6137248277664185,
|
|
"step": 5045,
|
|
"valid_targets_mean": 16198.9,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 1.347744862556712,
|
|
"grad_norm": 0.2500265725529047,
|
|
"learning_rate": 4.3610038505260264e-05,
|
|
"loss": 0.6469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6569816470146179,
|
|
"step": 5050,
|
|
"valid_targets_mean": 15119.1,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 1.3490792634107285,
|
|
"grad_norm": 0.1694413784545287,
|
|
"learning_rate": 4.3451471676154035e-05,
|
|
"loss": 0.6172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6165503263473511,
|
|
"step": 5055,
|
|
"valid_targets_mean": 17920.4,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.350413664264745,
|
|
"grad_norm": 0.15935104068927747,
|
|
"learning_rate": 4.329307599966306e-05,
|
|
"loss": 0.6398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6341271996498108,
|
|
"step": 5060,
|
|
"valid_targets_mean": 16518.5,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 1.3517480651187617,
|
|
"grad_norm": 0.20247588720231224,
|
|
"learning_rate": 4.313485233509309e-05,
|
|
"loss": 0.641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6382253170013428,
|
|
"step": 5065,
|
|
"valid_targets_mean": 14843.2,
|
|
"valid_targets_min": 147
|
|
},
|
|
{
|
|
"epoch": 1.3530824659727783,
|
|
"grad_norm": 0.17668341881020452,
|
|
"learning_rate": 4.297680154081686e-05,
|
|
"loss": 0.628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6434556245803833,
|
|
"step": 5070,
|
|
"valid_targets_mean": 16067.9,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.3544168668267949,
|
|
"grad_norm": 0.23015809453586503,
|
|
"learning_rate": 4.2818924474269126e-05,
|
|
"loss": 0.6307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6627788543701172,
|
|
"step": 5075,
|
|
"valid_targets_mean": 15679.1,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.3557512676808114,
|
|
"grad_norm": 0.19363015963806815,
|
|
"learning_rate": 4.266122199194226e-05,
|
|
"loss": 0.6294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6344991326332092,
|
|
"step": 5080,
|
|
"valid_targets_mean": 15946.4,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 1.3570856685348278,
|
|
"grad_norm": 0.1696018155620583,
|
|
"learning_rate": 4.250369494938146e-05,
|
|
"loss": 0.6314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6520615816116333,
|
|
"step": 5085,
|
|
"valid_targets_mean": 14841.5,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 1.3584200693888444,
|
|
"grad_norm": 0.16023070867785383,
|
|
"learning_rate": 4.234634420118021e-05,
|
|
"loss": 0.6288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6024264097213745,
|
|
"step": 5090,
|
|
"valid_targets_mean": 16656.5,
|
|
"valid_targets_min": 142
|
|
},
|
|
{
|
|
"epoch": 1.359754470242861,
|
|
"grad_norm": 0.27425959242038045,
|
|
"learning_rate": 4.218917060097547e-05,
|
|
"loss": 0.6506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6734393835067749,
|
|
"step": 5095,
|
|
"valid_targets_mean": 13772.1,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 1.3610888710968774,
|
|
"grad_norm": 0.2609294240114705,
|
|
"learning_rate": 4.203217500144326e-05,
|
|
"loss": 0.6207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6329338550567627,
|
|
"step": 5100,
|
|
"valid_targets_mean": 15875.0,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.362423271950894,
|
|
"grad_norm": 0.21382203340370412,
|
|
"learning_rate": 4.187535825429396e-05,
|
|
"loss": 0.6311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6623399257659912,
|
|
"step": 5105,
|
|
"valid_targets_mean": 15016.3,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.3637576728049106,
|
|
"grad_norm": 0.186234246475179,
|
|
"learning_rate": 4.171872121026753e-05,
|
|
"loss": 0.6431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6502887010574341,
|
|
"step": 5110,
|
|
"valid_targets_mean": 15757.6,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 1.3650920736589272,
|
|
"grad_norm": 0.14765918505153394,
|
|
"learning_rate": 4.1562264719129165e-05,
|
|
"loss": 0.6172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5981555581092834,
|
|
"step": 5115,
|
|
"valid_targets_mean": 17159.0,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 1.3664264745129437,
|
|
"grad_norm": 0.17270382952270566,
|
|
"learning_rate": 4.140598962966447e-05,
|
|
"loss": 0.636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6030063033103943,
|
|
"step": 5120,
|
|
"valid_targets_mean": 16237.5,
|
|
"valid_targets_min": 94
|
|
},
|
|
{
|
|
"epoch": 1.3677608753669603,
|
|
"grad_norm": 0.16978609229491592,
|
|
"learning_rate": 4.124989678967503e-05,
|
|
"loss": 0.6253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6567881107330322,
|
|
"step": 5125,
|
|
"valid_targets_mean": 14794.4,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.3690952762209767,
|
|
"grad_norm": 0.14802313718879107,
|
|
"learning_rate": 4.109398704597357e-05,
|
|
"loss": 0.6248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6337324976921082,
|
|
"step": 5130,
|
|
"valid_targets_mean": 14994.8,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 1.3704296770749933,
|
|
"grad_norm": 0.14832414023378732,
|
|
"learning_rate": 4.093826124437962e-05,
|
|
"loss": 0.6198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6228146553039551,
|
|
"step": 5135,
|
|
"valid_targets_mean": 16929.2,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 1.37176407792901,
|
|
"grad_norm": 0.14911468961970115,
|
|
"learning_rate": 4.078272022971481e-05,
|
|
"loss": 0.6279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6165578961372375,
|
|
"step": 5140,
|
|
"valid_targets_mean": 15223.8,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 1.3730984787830265,
|
|
"grad_norm": 0.1296469390132913,
|
|
"learning_rate": 4.06273648457982e-05,
|
|
"loss": 0.6272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6289052367210388,
|
|
"step": 5145,
|
|
"valid_targets_mean": 16206.2,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 1.3744328796370429,
|
|
"grad_norm": 0.20771349174880657,
|
|
"learning_rate": 4.0472195935441904e-05,
|
|
"loss": 0.6371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.635810375213623,
|
|
"step": 5150,
|
|
"valid_targets_mean": 15276.2,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 1.3757672804910595,
|
|
"grad_norm": 0.1426689571733511,
|
|
"learning_rate": 4.031721434044635e-05,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6031745076179504,
|
|
"step": 5155,
|
|
"valid_targets_mean": 16678.9,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 1.377101681345076,
|
|
"grad_norm": 0.1736397062698139,
|
|
"learning_rate": 4.016242090159574e-05,
|
|
"loss": 0.6139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6417535543441772,
|
|
"step": 5160,
|
|
"valid_targets_mean": 14546.7,
|
|
"valid_targets_min": 99
|
|
},
|
|
{
|
|
"epoch": 1.3784360821990926,
|
|
"grad_norm": 0.15597388264000142,
|
|
"learning_rate": 4.0007816458653566e-05,
|
|
"loss": 0.6104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5856055617332458,
|
|
"step": 5165,
|
|
"valid_targets_mean": 15726.9,
|
|
"valid_targets_min": 85
|
|
},
|
|
{
|
|
"epoch": 1.3797704830531092,
|
|
"grad_norm": 0.15334672011634948,
|
|
"learning_rate": 3.9853401850358036e-05,
|
|
"loss": 0.6257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6129058003425598,
|
|
"step": 5170,
|
|
"valid_targets_mean": 17155.0,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.3811048839071258,
|
|
"grad_norm": 0.1631981195786689,
|
|
"learning_rate": 3.969917791441739e-05,
|
|
"loss": 0.6298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6201703548431396,
|
|
"step": 5175,
|
|
"valid_targets_mean": 16696.0,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 1.3824392847611422,
|
|
"grad_norm": 0.15249718456766068,
|
|
"learning_rate": 3.954514548750553e-05,
|
|
"loss": 0.6251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6350277662277222,
|
|
"step": 5180,
|
|
"valid_targets_mean": 15671.8,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.3837736856151588,
|
|
"grad_norm": 0.13970025285922955,
|
|
"learning_rate": 3.939130540525746e-05,
|
|
"loss": 0.623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5961894989013672,
|
|
"step": 5185,
|
|
"valid_targets_mean": 16183.0,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 1.3851080864691754,
|
|
"grad_norm": 0.16541443655951255,
|
|
"learning_rate": 3.923765850226456e-05,
|
|
"loss": 0.6362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.605224609375,
|
|
"step": 5190,
|
|
"valid_targets_mean": 15942.9,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.3864424873231918,
|
|
"grad_norm": 0.20505065015451213,
|
|
"learning_rate": 3.908420561207032e-05,
|
|
"loss": 0.6226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6064752340316772,
|
|
"step": 5195,
|
|
"valid_targets_mean": 16724.2,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 1.3877768881772083,
|
|
"grad_norm": 0.1625840801923186,
|
|
"learning_rate": 3.893094756716569e-05,
|
|
"loss": 0.6415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6216104030609131,
|
|
"step": 5200,
|
|
"valid_targets_mean": 15669.0,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 1.389111289031225,
|
|
"grad_norm": 0.15262810603720672,
|
|
"learning_rate": 3.87778851989845e-05,
|
|
"loss": 0.6279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6247696876525879,
|
|
"step": 5205,
|
|
"valid_targets_mean": 15331.0,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 1.3904456898852415,
|
|
"grad_norm": 0.17970027336562808,
|
|
"learning_rate": 3.862501933789908e-05,
|
|
"loss": 0.6235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6287873387336731,
|
|
"step": 5210,
|
|
"valid_targets_mean": 15491.3,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 1.3917800907392581,
|
|
"grad_norm": 0.17621270663724736,
|
|
"learning_rate": 3.847235081321573e-05,
|
|
"loss": 0.643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6243505477905273,
|
|
"step": 5215,
|
|
"valid_targets_mean": 15494.3,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 1.3931144915932747,
|
|
"grad_norm": 0.1434464905938825,
|
|
"learning_rate": 3.831988045317007e-05,
|
|
"loss": 0.614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6041746139526367,
|
|
"step": 5220,
|
|
"valid_targets_mean": 17664.0,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.394448892447291,
|
|
"grad_norm": 0.15414481962946985,
|
|
"learning_rate": 3.816760908492282e-05,
|
|
"loss": 0.6127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6363335847854614,
|
|
"step": 5225,
|
|
"valid_targets_mean": 15424.6,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 1.3957832933013077,
|
|
"grad_norm": 0.12673670646454216,
|
|
"learning_rate": 3.8015537534555e-05,
|
|
"loss": 0.6554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6517373323440552,
|
|
"step": 5230,
|
|
"valid_targets_mean": 15626.9,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.3971176941553243,
|
|
"grad_norm": 0.15606926008374739,
|
|
"learning_rate": 3.786366662706372e-05,
|
|
"loss": 0.635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6305726766586304,
|
|
"step": 5235,
|
|
"valid_targets_mean": 15393.1,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 1.3984520950093409,
|
|
"grad_norm": 0.17304182022885428,
|
|
"learning_rate": 3.771199718635758e-05,
|
|
"loss": 0.6295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6062044501304626,
|
|
"step": 5240,
|
|
"valid_targets_mean": 15592.8,
|
|
"valid_targets_min": 123
|
|
},
|
|
{
|
|
"epoch": 1.3997864958633572,
|
|
"grad_norm": 0.14937846564123805,
|
|
"learning_rate": 3.756053003525213e-05,
|
|
"loss": 0.6258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5961400270462036,
|
|
"step": 5245,
|
|
"valid_targets_mean": 17602.9,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 1.4011208967173738,
|
|
"grad_norm": 0.14330067972015584,
|
|
"learning_rate": 3.7409265995465577e-05,
|
|
"loss": 0.6433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.636821985244751,
|
|
"step": 5250,
|
|
"valid_targets_mean": 15189.9,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.4024552975713904,
|
|
"grad_norm": 0.15326204532333548,
|
|
"learning_rate": 3.725820588761422e-05,
|
|
"loss": 0.6332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6448045969009399,
|
|
"step": 5255,
|
|
"valid_targets_mean": 15761.1,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 1.403789698425407,
|
|
"grad_norm": 0.1911081730773287,
|
|
"learning_rate": 3.7107350531207944e-05,
|
|
"loss": 0.6228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6157747507095337,
|
|
"step": 5260,
|
|
"valid_targets_mean": 16320.1,
|
|
"valid_targets_min": 122
|
|
},
|
|
{
|
|
"epoch": 1.4051240992794236,
|
|
"grad_norm": 0.15975441609751267,
|
|
"learning_rate": 3.6956700744645934e-05,
|
|
"loss": 0.6378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.664246141910553,
|
|
"step": 5265,
|
|
"valid_targets_mean": 15061.1,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 1.4064585001334402,
|
|
"grad_norm": 0.17030194394230258,
|
|
"learning_rate": 3.6806257345212136e-05,
|
|
"loss": 0.63,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6642188429832458,
|
|
"step": 5270,
|
|
"valid_targets_mean": 15093.4,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 1.4077929009874566,
|
|
"grad_norm": 0.2071399958210783,
|
|
"learning_rate": 3.665602114907075e-05,
|
|
"loss": 0.6247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6460654735565186,
|
|
"step": 5275,
|
|
"valid_targets_mean": 15851.9,
|
|
"valid_targets_min": 153
|
|
},
|
|
{
|
|
"epoch": 1.4091273018414732,
|
|
"grad_norm": 0.20186223724820232,
|
|
"learning_rate": 3.650599297126198e-05,
|
|
"loss": 0.6138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6479173898696899,
|
|
"step": 5280,
|
|
"valid_targets_mean": 15088.5,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 1.4104617026954898,
|
|
"grad_norm": 0.1715220156560148,
|
|
"learning_rate": 3.63561736256975e-05,
|
|
"loss": 0.6428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6402706503868103,
|
|
"step": 5285,
|
|
"valid_targets_mean": 16333.2,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 1.4117961035495064,
|
|
"grad_norm": 0.18866956062784462,
|
|
"learning_rate": 3.6206563925156e-05,
|
|
"loss": 0.6222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6222355365753174,
|
|
"step": 5290,
|
|
"valid_targets_mean": 15949.2,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 1.4131305044035227,
|
|
"grad_norm": 0.1692810813069157,
|
|
"learning_rate": 3.605716468127889e-05,
|
|
"loss": 0.627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6255773305892944,
|
|
"step": 5295,
|
|
"valid_targets_mean": 15020.3,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 1.4144649052575393,
|
|
"grad_norm": 0.1761368719849253,
|
|
"learning_rate": 3.590797670456586e-05,
|
|
"loss": 0.6345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6473125219345093,
|
|
"step": 5300,
|
|
"valid_targets_mean": 15610.2,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 1.415799306111556,
|
|
"grad_norm": 0.15287688398886665,
|
|
"learning_rate": 3.575900080437036e-05,
|
|
"loss": 0.6368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6090598702430725,
|
|
"step": 5305,
|
|
"valid_targets_mean": 17042.0,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 1.4171337069655725,
|
|
"grad_norm": 0.20294069550913238,
|
|
"learning_rate": 3.561023778889545e-05,
|
|
"loss": 0.6175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6067546010017395,
|
|
"step": 5310,
|
|
"valid_targets_mean": 16089.0,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 1.418468107819589,
|
|
"grad_norm": 0.17411375489959932,
|
|
"learning_rate": 3.546168846518915e-05,
|
|
"loss": 0.6221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6281801462173462,
|
|
"step": 5315,
|
|
"valid_targets_mean": 16322.3,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 1.4198025086736055,
|
|
"grad_norm": 0.1992434771552196,
|
|
"learning_rate": 3.531335363914027e-05,
|
|
"loss": 0.6201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.652416467666626,
|
|
"step": 5320,
|
|
"valid_targets_mean": 15787.6,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 1.421136909527622,
|
|
"grad_norm": 0.2804856175246008,
|
|
"learning_rate": 3.516523411547397e-05,
|
|
"loss": 0.6347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.601277232170105,
|
|
"step": 5325,
|
|
"valid_targets_mean": 15735.6,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 1.4224713103816387,
|
|
"grad_norm": 0.2331782119792056,
|
|
"learning_rate": 3.5017330697747276e-05,
|
|
"loss": 0.6259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6436649560928345,
|
|
"step": 5330,
|
|
"valid_targets_mean": 15940.2,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 1.4238057112356552,
|
|
"grad_norm": 0.20092372413155127,
|
|
"learning_rate": 3.486964418834495e-05,
|
|
"loss": 0.6299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6183117628097534,
|
|
"step": 5335,
|
|
"valid_targets_mean": 14376.1,
|
|
"valid_targets_min": 28
|
|
},
|
|
{
|
|
"epoch": 1.4251401120896716,
|
|
"grad_norm": 0.18482644972327308,
|
|
"learning_rate": 3.472217538847496e-05,
|
|
"loss": 0.6272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6374001502990723,
|
|
"step": 5340,
|
|
"valid_targets_mean": 16009.2,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 1.4264745129436882,
|
|
"grad_norm": 0.18657467619893198,
|
|
"learning_rate": 3.457492509816416e-05,
|
|
"loss": 0.6195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5956864953041077,
|
|
"step": 5345,
|
|
"valid_targets_mean": 15606.7,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 1.4278089137977048,
|
|
"grad_norm": 0.17376513299379437,
|
|
"learning_rate": 3.442789411625402e-05,
|
|
"loss": 0.6365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6099961996078491,
|
|
"step": 5350,
|
|
"valid_targets_mean": 15835.6,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 1.4291433146517214,
|
|
"grad_norm": 0.16282284195478497,
|
|
"learning_rate": 3.428108324039626e-05,
|
|
"loss": 0.6354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.623703122138977,
|
|
"step": 5355,
|
|
"valid_targets_mean": 17537.7,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 1.430477715505738,
|
|
"grad_norm": 0.15783998047918987,
|
|
"learning_rate": 3.413449326704843e-05,
|
|
"loss": 0.6091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5866317749023438,
|
|
"step": 5360,
|
|
"valid_targets_mean": 16450.6,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 1.4318121163597546,
|
|
"grad_norm": 0.16225480021708807,
|
|
"learning_rate": 3.3988124991469764e-05,
|
|
"loss": 0.6477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6615272760391235,
|
|
"step": 5365,
|
|
"valid_targets_mean": 15123.1,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 1.433146517213771,
|
|
"grad_norm": 0.1319022511747964,
|
|
"learning_rate": 3.384197920771676e-05,
|
|
"loss": 0.6156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.585268497467041,
|
|
"step": 5370,
|
|
"valid_targets_mean": 18510.1,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 1.4344809180677875,
|
|
"grad_norm": 0.22534083880885686,
|
|
"learning_rate": 3.36960567086388e-05,
|
|
"loss": 0.6454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6688559651374817,
|
|
"step": 5375,
|
|
"valid_targets_mean": 15660.4,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 1.4358153189218041,
|
|
"grad_norm": 0.1504794415732279,
|
|
"learning_rate": 3.355035828587403e-05,
|
|
"loss": 0.6236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6434562802314758,
|
|
"step": 5380,
|
|
"valid_targets_mean": 15358.2,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 1.4371497197758207,
|
|
"grad_norm": 0.14914466776523827,
|
|
"learning_rate": 3.340488472984493e-05,
|
|
"loss": 0.6216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6424883604049683,
|
|
"step": 5385,
|
|
"valid_targets_mean": 15463.3,
|
|
"valid_targets_min": 167
|
|
},
|
|
{
|
|
"epoch": 1.438484120629837,
|
|
"grad_norm": 0.17701115124674544,
|
|
"learning_rate": 3.3259636829754086e-05,
|
|
"loss": 0.6291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6274256706237793,
|
|
"step": 5390,
|
|
"valid_targets_mean": 15954.8,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 1.4398185214838537,
|
|
"grad_norm": 0.16413150057095138,
|
|
"learning_rate": 3.3114615373579827e-05,
|
|
"loss": 0.6155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6335543990135193,
|
|
"step": 5395,
|
|
"valid_targets_mean": 16068.7,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 1.4411529223378703,
|
|
"grad_norm": 0.20083715478632508,
|
|
"learning_rate": 3.296982114807207e-05,
|
|
"loss": 0.6366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6472524404525757,
|
|
"step": 5400,
|
|
"valid_targets_mean": 15599.1,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.4424873231918869,
|
|
"grad_norm": 0.18175668651315993,
|
|
"learning_rate": 3.282525493874798e-05,
|
|
"loss": 0.6258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6218005418777466,
|
|
"step": 5405,
|
|
"valid_targets_mean": 15867.5,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 1.4438217240459035,
|
|
"grad_norm": 0.15353302359402962,
|
|
"learning_rate": 3.2680917529887746e-05,
|
|
"loss": 0.6339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6132454872131348,
|
|
"step": 5410,
|
|
"valid_targets_mean": 17124.1,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 1.44515612489992,
|
|
"grad_norm": 0.16417620846628184,
|
|
"learning_rate": 3.2536809704530206e-05,
|
|
"loss": 0.6117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6290859580039978,
|
|
"step": 5415,
|
|
"valid_targets_mean": 16340.5,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 1.4464905257539364,
|
|
"grad_norm": 0.17678199877547868,
|
|
"learning_rate": 3.239293224446879e-05,
|
|
"loss": 0.6285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6204519271850586,
|
|
"step": 5420,
|
|
"valid_targets_mean": 16516.2,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 1.447824926607953,
|
|
"grad_norm": 0.1742844233875428,
|
|
"learning_rate": 3.224928593024719e-05,
|
|
"loss": 0.6161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6010374426841736,
|
|
"step": 5425,
|
|
"valid_targets_mean": 16831.8,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 1.4491593274619696,
|
|
"grad_norm": 0.13521384271171266,
|
|
"learning_rate": 3.210587154115501e-05,
|
|
"loss": 0.6377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6182513236999512,
|
|
"step": 5430,
|
|
"valid_targets_mean": 16220.3,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 1.450493728315986,
|
|
"grad_norm": 0.1358185671226795,
|
|
"learning_rate": 3.196268985522376e-05,
|
|
"loss": 0.629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6077415943145752,
|
|
"step": 5435,
|
|
"valid_targets_mean": 15695.3,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 1.4518281291700026,
|
|
"grad_norm": 0.1711167885783527,
|
|
"learning_rate": 3.1819741649222485e-05,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.624676525592804,
|
|
"step": 5440,
|
|
"valid_targets_mean": 15729.9,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 1.4531625300240192,
|
|
"grad_norm": 0.12948194931451742,
|
|
"learning_rate": 3.167702769865354e-05,
|
|
"loss": 0.649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6523416638374329,
|
|
"step": 5445,
|
|
"valid_targets_mean": 16590.8,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 1.4544969308780358,
|
|
"grad_norm": 0.12902218339041557,
|
|
"learning_rate": 3.153454877774849e-05,
|
|
"loss": 0.6273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6242078542709351,
|
|
"step": 5450,
|
|
"valid_targets_mean": 16365.0,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 1.4558313317320524,
|
|
"grad_norm": 0.1432691221891138,
|
|
"learning_rate": 3.139230565946387e-05,
|
|
"loss": 0.6218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6167353987693787,
|
|
"step": 5455,
|
|
"valid_targets_mean": 15632.9,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 1.457165732586069,
|
|
"grad_norm": 0.1276182225412851,
|
|
"learning_rate": 3.1250299115476874e-05,
|
|
"loss": 0.6399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6298602223396301,
|
|
"step": 5460,
|
|
"valid_targets_mean": 15770.1,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 1.4585001334400853,
|
|
"grad_norm": 0.13430459310454113,
|
|
"learning_rate": 3.110852991618135e-05,
|
|
"loss": 0.6288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6262118816375732,
|
|
"step": 5465,
|
|
"valid_targets_mean": 15365.5,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 1.459834534294102,
|
|
"grad_norm": 0.1599431741046989,
|
|
"learning_rate": 3.0966998830683536e-05,
|
|
"loss": 0.626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6742227673530579,
|
|
"step": 5470,
|
|
"valid_targets_mean": 15713.1,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.4611689351481185,
|
|
"grad_norm": 0.15944593752909758,
|
|
"learning_rate": 3.082570662679782e-05,
|
|
"loss": 0.6222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6399157047271729,
|
|
"step": 5475,
|
|
"valid_targets_mean": 15939.1,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 1.4625033360021351,
|
|
"grad_norm": 0.15541531945278628,
|
|
"learning_rate": 3.068465407104275e-05,
|
|
"loss": 0.6261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6311227679252625,
|
|
"step": 5480,
|
|
"valid_targets_mean": 16185.3,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 1.4638377368561515,
|
|
"grad_norm": 0.14751704087209916,
|
|
"learning_rate": 3.054384192863664e-05,
|
|
"loss": 0.61,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6270675659179688,
|
|
"step": 5485,
|
|
"valid_targets_mean": 15642.4,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.465172137710168,
|
|
"grad_norm": 0.15132906174249833,
|
|
"learning_rate": 3.0403270963493657e-05,
|
|
"loss": 0.6209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6355879306793213,
|
|
"step": 5490,
|
|
"valid_targets_mean": 15252.9,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 1.4665065385641847,
|
|
"grad_norm": 0.16247058056010277,
|
|
"learning_rate": 3.026294193821954e-05,
|
|
"loss": 0.6313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6449081897735596,
|
|
"step": 5495,
|
|
"valid_targets_mean": 14411.7,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.4678409394182013,
|
|
"grad_norm": 0.13192412620555163,
|
|
"learning_rate": 3.012285561410742e-05,
|
|
"loss": 0.6244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6533715724945068,
|
|
"step": 5500,
|
|
"valid_targets_mean": 16055.3,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 1.4691753402722179,
|
|
"grad_norm": 0.15751341118940315,
|
|
"learning_rate": 2.9983012751133852e-05,
|
|
"loss": 0.6246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6110358238220215,
|
|
"step": 5505,
|
|
"valid_targets_mean": 16525.5,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 1.4705097411262344,
|
|
"grad_norm": 0.1393812435209019,
|
|
"learning_rate": 2.9843414107954588e-05,
|
|
"loss": 0.6175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.600792646408081,
|
|
"step": 5510,
|
|
"valid_targets_mean": 17548.1,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 1.4718441419802508,
|
|
"grad_norm": 0.19223152973983335,
|
|
"learning_rate": 2.9704060441900402e-05,
|
|
"loss": 0.6382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6561647653579712,
|
|
"step": 5515,
|
|
"valid_targets_mean": 15718.0,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.4731785428342674,
|
|
"grad_norm": 0.17235258312296173,
|
|
"learning_rate": 2.956495250897311e-05,
|
|
"loss": 0.6234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6213047504425049,
|
|
"step": 5520,
|
|
"valid_targets_mean": 15810.1,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.474512943688284,
|
|
"grad_norm": 0.13984398266172365,
|
|
"learning_rate": 2.9426091063841444e-05,
|
|
"loss": 0.6013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.622529149055481,
|
|
"step": 5525,
|
|
"valid_targets_mean": 16110.9,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 1.4758473445423004,
|
|
"grad_norm": 0.23048960361106072,
|
|
"learning_rate": 2.9287476859836817e-05,
|
|
"loss": 0.6269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6269211769104004,
|
|
"step": 5530,
|
|
"valid_targets_mean": 15144.9,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 1.477181745396317,
|
|
"grad_norm": 0.15808842147469523,
|
|
"learning_rate": 2.9149110648949447e-05,
|
|
"loss": 0.6364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6489793658256531,
|
|
"step": 5535,
|
|
"valid_targets_mean": 15504.8,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 1.4785161462503336,
|
|
"grad_norm": 0.1773982287753316,
|
|
"learning_rate": 2.9010993181824158e-05,
|
|
"loss": 0.6278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6429401636123657,
|
|
"step": 5540,
|
|
"valid_targets_mean": 17764.1,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.4798505471043502,
|
|
"grad_norm": 0.15296864635389418,
|
|
"learning_rate": 2.8873125207756255e-05,
|
|
"loss": 0.6263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6262995004653931,
|
|
"step": 5545,
|
|
"valid_targets_mean": 15969.9,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 1.4811849479583667,
|
|
"grad_norm": 0.16954670356160156,
|
|
"learning_rate": 2.8735507474687603e-05,
|
|
"loss": 0.6286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5944501161575317,
|
|
"step": 5550,
|
|
"valid_targets_mean": 16193.0,
|
|
"valid_targets_min": 155
|
|
},
|
|
{
|
|
"epoch": 1.4825193488123833,
|
|
"grad_norm": 0.1733352867343545,
|
|
"learning_rate": 2.859814072920249e-05,
|
|
"loss": 0.6152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5982875823974609,
|
|
"step": 5555,
|
|
"valid_targets_mean": 16719.0,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.4838537496663997,
|
|
"grad_norm": 0.1474432157642333,
|
|
"learning_rate": 2.846102571652352e-05,
|
|
"loss": 0.6297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6284036636352539,
|
|
"step": 5560,
|
|
"valid_targets_mean": 15888.5,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.4851881505204163,
|
|
"grad_norm": 0.1547180714905254,
|
|
"learning_rate": 2.8324163180507716e-05,
|
|
"loss": 0.6269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.606930673122406,
|
|
"step": 5565,
|
|
"valid_targets_mean": 16781.9,
|
|
"valid_targets_min": 133
|
|
},
|
|
{
|
|
"epoch": 1.486522551374433,
|
|
"grad_norm": 0.1544643190284509,
|
|
"learning_rate": 2.8187553863642314e-05,
|
|
"loss": 0.6191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6283329725265503,
|
|
"step": 5570,
|
|
"valid_targets_mean": 16809.4,
|
|
"valid_targets_min": 62
|
|
},
|
|
{
|
|
"epoch": 1.4878569522284495,
|
|
"grad_norm": 0.13276430690247742,
|
|
"learning_rate": 2.8051198507040876e-05,
|
|
"loss": 0.6375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6261632442474365,
|
|
"step": 5575,
|
|
"valid_targets_mean": 16062.8,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 1.4891913530824659,
|
|
"grad_norm": 0.1433474702905804,
|
|
"learning_rate": 2.7915097850439238e-05,
|
|
"loss": 0.6257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.633348822593689,
|
|
"step": 5580,
|
|
"valid_targets_mean": 17175.4,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 1.4905257539364825,
|
|
"grad_norm": 0.16163825754953928,
|
|
"learning_rate": 2.7779252632191394e-05,
|
|
"loss": 0.622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6284037828445435,
|
|
"step": 5585,
|
|
"valid_targets_mean": 16106.5,
|
|
"valid_targets_min": 128
|
|
},
|
|
{
|
|
"epoch": 1.491860154790499,
|
|
"grad_norm": 0.1398100737015759,
|
|
"learning_rate": 2.7643663589265642e-05,
|
|
"loss": 0.6309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6473536491394043,
|
|
"step": 5590,
|
|
"valid_targets_mean": 15708.0,
|
|
"valid_targets_min": 331
|
|
},
|
|
{
|
|
"epoch": 1.4931945556445156,
|
|
"grad_norm": 0.14418286876980943,
|
|
"learning_rate": 2.750833145724049e-05,
|
|
"loss": 0.6368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6269103288650513,
|
|
"step": 5595,
|
|
"valid_targets_mean": 16005.2,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.4945289564985322,
|
|
"grad_norm": 0.13891282326030407,
|
|
"learning_rate": 2.7373256970300663e-05,
|
|
"loss": 0.6319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6326860785484314,
|
|
"step": 5600,
|
|
"valid_targets_mean": 15055.2,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 1.4958633573525488,
|
|
"grad_norm": 0.1493646883274745,
|
|
"learning_rate": 2.7238440861233176e-05,
|
|
"loss": 0.6427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6493257284164429,
|
|
"step": 5605,
|
|
"valid_targets_mean": 15705.0,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 1.4971977582065652,
|
|
"grad_norm": 0.1431354744300022,
|
|
"learning_rate": 2.710388386142335e-05,
|
|
"loss": 0.6366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6588246822357178,
|
|
"step": 5610,
|
|
"valid_targets_mean": 16106.0,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 1.4985321590605818,
|
|
"grad_norm": 0.15838659749919395,
|
|
"learning_rate": 2.6969586700850753e-05,
|
|
"loss": 0.6328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6347640752792358,
|
|
"step": 5615,
|
|
"valid_targets_mean": 16459.5,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 1.4998665599145984,
|
|
"grad_norm": 0.14202052693469316,
|
|
"learning_rate": 2.6835550108085373e-05,
|
|
"loss": 0.6234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6258725523948669,
|
|
"step": 5620,
|
|
"valid_targets_mean": 15989.1,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 1.5012009607686148,
|
|
"grad_norm": 0.1439483870521881,
|
|
"learning_rate": 2.67017748102836e-05,
|
|
"loss": 0.6214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6074327230453491,
|
|
"step": 5625,
|
|
"valid_targets_mean": 16374.8,
|
|
"valid_targets_min": 116
|
|
},
|
|
{
|
|
"epoch": 1.5025353616226313,
|
|
"grad_norm": 0.15289582914743377,
|
|
"learning_rate": 2.6568261533184233e-05,
|
|
"loss": 0.634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6085629463195801,
|
|
"step": 5630,
|
|
"valid_targets_mean": 16198.2,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 1.503869762476648,
|
|
"grad_norm": 0.14157557834741372,
|
|
"learning_rate": 2.643501100110463e-05,
|
|
"loss": 0.6228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.602777361869812,
|
|
"step": 5635,
|
|
"valid_targets_mean": 15891.9,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 1.5052041633306645,
|
|
"grad_norm": 0.14813942268202115,
|
|
"learning_rate": 2.6302023936936776e-05,
|
|
"loss": 0.6309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6272621154785156,
|
|
"step": 5640,
|
|
"valid_targets_mean": 15252.1,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 1.5065385641846811,
|
|
"grad_norm": 0.12366778416433712,
|
|
"learning_rate": 2.616930106214323e-05,
|
|
"loss": 0.6271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6292228698730469,
|
|
"step": 5645,
|
|
"valid_targets_mean": 14449.2,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 1.5078729650386977,
|
|
"grad_norm": 0.13723249797592718,
|
|
"learning_rate": 2.6036843096753394e-05,
|
|
"loss": 0.6201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6192312836647034,
|
|
"step": 5650,
|
|
"valid_targets_mean": 16084.3,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 1.5092073658927143,
|
|
"grad_norm": 0.14013442644306484,
|
|
"learning_rate": 2.5904650759359528e-05,
|
|
"loss": 0.6359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6109635233879089,
|
|
"step": 5655,
|
|
"valid_targets_mean": 15055.7,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 1.5105417667467307,
|
|
"grad_norm": 0.16452772456933798,
|
|
"learning_rate": 2.5772724767112753e-05,
|
|
"loss": 0.6231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6206374764442444,
|
|
"step": 5660,
|
|
"valid_targets_mean": 14545.7,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 1.5118761676007473,
|
|
"grad_norm": 0.12885976269120666,
|
|
"learning_rate": 2.564106583571933e-05,
|
|
"loss": 0.6435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6476463079452515,
|
|
"step": 5665,
|
|
"valid_targets_mean": 14707.1,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 1.5132105684547636,
|
|
"grad_norm": 0.14333944608867255,
|
|
"learning_rate": 2.550967467943668e-05,
|
|
"loss": 0.6233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5939033627510071,
|
|
"step": 5670,
|
|
"valid_targets_mean": 16372.6,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 1.5145449693087802,
|
|
"grad_norm": 0.14271168882303076,
|
|
"learning_rate": 2.537855201106955e-05,
|
|
"loss": 0.6281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6289101839065552,
|
|
"step": 5675,
|
|
"valid_targets_mean": 17221.4,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.5158793701627968,
|
|
"grad_norm": 0.12587353338371615,
|
|
"learning_rate": 2.5247698541966066e-05,
|
|
"loss": 0.6366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6356736421585083,
|
|
"step": 5680,
|
|
"valid_targets_mean": 15807.4,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 1.5172137710168134,
|
|
"grad_norm": 0.135942169633523,
|
|
"learning_rate": 2.511711498201397e-05,
|
|
"loss": 0.6266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6369425058364868,
|
|
"step": 5685,
|
|
"valid_targets_mean": 16512.2,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 1.51854817187083,
|
|
"grad_norm": 0.1300809788916359,
|
|
"learning_rate": 2.4986802039636773e-05,
|
|
"loss": 0.6259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5703003406524658,
|
|
"step": 5690,
|
|
"valid_targets_mean": 16038.2,
|
|
"valid_targets_min": 100
|
|
},
|
|
{
|
|
"epoch": 1.5198825727248466,
|
|
"grad_norm": 0.13972459116404298,
|
|
"learning_rate": 2.485676042178976e-05,
|
|
"loss": 0.6279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6317213773727417,
|
|
"step": 5695,
|
|
"valid_targets_mean": 15872.5,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 1.5212169735788632,
|
|
"grad_norm": 0.13552004006518586,
|
|
"learning_rate": 2.4726990833956363e-05,
|
|
"loss": 0.6313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6211484670639038,
|
|
"step": 5700,
|
|
"valid_targets_mean": 14656.1,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 1.5225513744328798,
|
|
"grad_norm": 0.14386227146441763,
|
|
"learning_rate": 2.45974939801442e-05,
|
|
"loss": 0.6199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5977884531021118,
|
|
"step": 5705,
|
|
"valid_targets_mean": 16085.2,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 1.5238857752868962,
|
|
"grad_norm": 0.14276055446610306,
|
|
"learning_rate": 2.446827056288131e-05,
|
|
"loss": 0.6134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6355063915252686,
|
|
"step": 5710,
|
|
"valid_targets_mean": 16763.1,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.5252201761409128,
|
|
"grad_norm": 0.1579645574450176,
|
|
"learning_rate": 2.4339321283212276e-05,
|
|
"loss": 0.628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.639201283454895,
|
|
"step": 5715,
|
|
"valid_targets_mean": 15428.9,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 1.5265545769949291,
|
|
"grad_norm": 0.143331740428986,
|
|
"learning_rate": 2.421064684069453e-05,
|
|
"loss": 0.6343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6173781752586365,
|
|
"step": 5720,
|
|
"valid_targets_mean": 16842.1,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.5278889778489457,
|
|
"grad_norm": 0.1473926456593145,
|
|
"learning_rate": 2.4082247933394414e-05,
|
|
"loss": 0.6317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6531630754470825,
|
|
"step": 5725,
|
|
"valid_targets_mean": 15427.3,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 1.5292233787029623,
|
|
"grad_norm": 0.12416220263906289,
|
|
"learning_rate": 2.3954125257883558e-05,
|
|
"loss": 0.6258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5931440591812134,
|
|
"step": 5730,
|
|
"valid_targets_mean": 16346.6,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 1.530557779556979,
|
|
"grad_norm": 0.14830788556227337,
|
|
"learning_rate": 2.382627950923501e-05,
|
|
"loss": 0.6268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6504865884780884,
|
|
"step": 5735,
|
|
"valid_targets_mean": 15952.8,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 1.5318921804109955,
|
|
"grad_norm": 0.12532641548964424,
|
|
"learning_rate": 2.3698711381019398e-05,
|
|
"loss": 0.627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6455780863761902,
|
|
"step": 5740,
|
|
"valid_targets_mean": 15303.9,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 1.533226581265012,
|
|
"grad_norm": 0.13403566036637113,
|
|
"learning_rate": 2.3571421565301315e-05,
|
|
"loss": 0.6159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6332656145095825,
|
|
"step": 5745,
|
|
"valid_targets_mean": 16482.3,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 1.5345609821190287,
|
|
"grad_norm": 0.12054379160148655,
|
|
"learning_rate": 2.3444410752635512e-05,
|
|
"loss": 0.6324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6107887029647827,
|
|
"step": 5750,
|
|
"valid_targets_mean": 15134.1,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 1.535895382973045,
|
|
"grad_norm": 0.1230026498193065,
|
|
"learning_rate": 2.331767963206302e-05,
|
|
"loss": 0.6174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6055983304977417,
|
|
"step": 5755,
|
|
"valid_targets_mean": 15497.7,
|
|
"valid_targets_min": 31
|
|
},
|
|
{
|
|
"epoch": 1.5372297838270617,
|
|
"grad_norm": 0.11103447026365328,
|
|
"learning_rate": 2.319122889110763e-05,
|
|
"loss": 0.6219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.597805917263031,
|
|
"step": 5760,
|
|
"valid_targets_mean": 16754.6,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 1.5385641846810783,
|
|
"grad_norm": 0.12465669790762561,
|
|
"learning_rate": 2.3065059215772057e-05,
|
|
"loss": 0.6176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6329092383384705,
|
|
"step": 5765,
|
|
"valid_targets_mean": 16111.2,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 1.5398985855350946,
|
|
"grad_norm": 0.12713411167158647,
|
|
"learning_rate": 2.2939171290534127e-05,
|
|
"loss": 0.6152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6023916006088257,
|
|
"step": 5770,
|
|
"valid_targets_mean": 15803.7,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 1.5412329863891112,
|
|
"grad_norm": 0.15179356801369148,
|
|
"learning_rate": 2.281356579834324e-05,
|
|
"loss": 0.6249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6431618332862854,
|
|
"step": 5775,
|
|
"valid_targets_mean": 16487.9,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 1.5425673872431278,
|
|
"grad_norm": 0.1691329261460599,
|
|
"learning_rate": 2.2688243420616573e-05,
|
|
"loss": 0.6162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6375676393508911,
|
|
"step": 5780,
|
|
"valid_targets_mean": 14028.1,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 1.5439017880971444,
|
|
"grad_norm": 0.12370458237505477,
|
|
"learning_rate": 2.2563204837235323e-05,
|
|
"loss": 0.637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.660173237323761,
|
|
"step": 5785,
|
|
"valid_targets_mean": 16358.3,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 1.545236188951161,
|
|
"grad_norm": 0.13657774631918929,
|
|
"learning_rate": 2.243845072654115e-05,
|
|
"loss": 0.6082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6270567178726196,
|
|
"step": 5790,
|
|
"valid_targets_mean": 15915.3,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 1.5465705898051776,
|
|
"grad_norm": 0.12029933244132973,
|
|
"learning_rate": 2.2313981765332464e-05,
|
|
"loss": 0.6252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6249598264694214,
|
|
"step": 5795,
|
|
"valid_targets_mean": 15742.1,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.5479049906591942,
|
|
"grad_norm": 0.14750127845581507,
|
|
"learning_rate": 2.2189798628860604e-05,
|
|
"loss": 0.6246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6229920387268066,
|
|
"step": 5800,
|
|
"valid_targets_mean": 15027.0,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 1.5492393915132106,
|
|
"grad_norm": 0.13344401129734557,
|
|
"learning_rate": 2.206590199082642e-05,
|
|
"loss": 0.6215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5828033089637756,
|
|
"step": 5805,
|
|
"valid_targets_mean": 16073.2,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.5505737923672271,
|
|
"grad_norm": 0.13797418177317824,
|
|
"learning_rate": 2.194229252337639e-05,
|
|
"loss": 0.6246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.627313494682312,
|
|
"step": 5810,
|
|
"valid_targets_mean": 15657.3,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 1.5519081932212435,
|
|
"grad_norm": 0.1307355779013856,
|
|
"learning_rate": 2.181897089709913e-05,
|
|
"loss": 0.6169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6516543626785278,
|
|
"step": 5815,
|
|
"valid_targets_mean": 16195.4,
|
|
"valid_targets_min": 213
|
|
},
|
|
{
|
|
"epoch": 1.55324259407526,
|
|
"grad_norm": 4.503023293741496,
|
|
"learning_rate": 2.1695937781021736e-05,
|
|
"loss": 0.6162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6091921925544739,
|
|
"step": 5820,
|
|
"valid_targets_mean": 17610.6,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 1.5545769949292767,
|
|
"grad_norm": 0.11458451257446015,
|
|
"learning_rate": 2.1573193842606007e-05,
|
|
"loss": 0.6212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6066598296165466,
|
|
"step": 5825,
|
|
"valid_targets_mean": 15284.7,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 1.5559113957832933,
|
|
"grad_norm": 0.1322682207343315,
|
|
"learning_rate": 2.1450739747745034e-05,
|
|
"loss": 0.6232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.631981611251831,
|
|
"step": 5830,
|
|
"valid_targets_mean": 15703.0,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 1.5572457966373099,
|
|
"grad_norm": 0.13382475468535615,
|
|
"learning_rate": 2.1328576160759486e-05,
|
|
"loss": 0.6184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.608359694480896,
|
|
"step": 5835,
|
|
"valid_targets_mean": 15746.4,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 1.5585801974913265,
|
|
"grad_norm": 0.13395960709287713,
|
|
"learning_rate": 2.1206703744393936e-05,
|
|
"loss": 0.6477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6912551522254944,
|
|
"step": 5840,
|
|
"valid_targets_mean": 15902.0,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 1.559914598345343,
|
|
"grad_norm": 0.14083845588341448,
|
|
"learning_rate": 2.1085123159813398e-05,
|
|
"loss": 0.6249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6276473999023438,
|
|
"step": 5845,
|
|
"valid_targets_mean": 15508.2,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 1.5612489991993594,
|
|
"grad_norm": 0.12300532252725843,
|
|
"learning_rate": 2.0963835066599703e-05,
|
|
"loss": 0.6153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6046478748321533,
|
|
"step": 5850,
|
|
"valid_targets_mean": 16468.6,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 1.562583400053376,
|
|
"grad_norm": 0.16509827475279112,
|
|
"learning_rate": 2.084284012274781e-05,
|
|
"loss": 0.6387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6476929783821106,
|
|
"step": 5855,
|
|
"valid_targets_mean": 15581.5,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 1.5639178009073926,
|
|
"grad_norm": 0.11987186265018815,
|
|
"learning_rate": 2.0722138984662415e-05,
|
|
"loss": 0.6134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5942327380180359,
|
|
"step": 5860,
|
|
"valid_targets_mean": 15606.3,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.565252201761409,
|
|
"grad_norm": 0.11365970803386147,
|
|
"learning_rate": 2.0601732307154283e-05,
|
|
"loss": 0.6241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6330580711364746,
|
|
"step": 5865,
|
|
"valid_targets_mean": 16879.3,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 1.5665866026154256,
|
|
"grad_norm": 0.11422402035884933,
|
|
"learning_rate": 2.048162074343665e-05,
|
|
"loss": 0.6079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6077611446380615,
|
|
"step": 5870,
|
|
"valid_targets_mean": 15804.7,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 1.5679210034694422,
|
|
"grad_norm": 0.13577381742940237,
|
|
"learning_rate": 2.036180494512181e-05,
|
|
"loss": 0.6194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.605219304561615,
|
|
"step": 5875,
|
|
"valid_targets_mean": 15895.9,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 1.5692554043234588,
|
|
"grad_norm": 0.12183516763109287,
|
|
"learning_rate": 2.024228556221752e-05,
|
|
"loss": 0.6357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6044329404830933,
|
|
"step": 5880,
|
|
"valid_targets_mean": 16786.0,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 1.5705898051774754,
|
|
"grad_norm": 0.11449228550751389,
|
|
"learning_rate": 2.0123063243123395e-05,
|
|
"loss": 0.6309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6163490414619446,
|
|
"step": 5885,
|
|
"valid_targets_mean": 16912.6,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.571924206031492,
|
|
"grad_norm": 0.12515999124031796,
|
|
"learning_rate": 2.000413863462754e-05,
|
|
"loss": 0.6178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6148951053619385,
|
|
"step": 5890,
|
|
"valid_targets_mean": 17588.8,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 1.5732586068855086,
|
|
"grad_norm": 0.142567993474962,
|
|
"learning_rate": 1.988551238190288e-05,
|
|
"loss": 0.6099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5837486982345581,
|
|
"step": 5895,
|
|
"valid_targets_mean": 15626.2,
|
|
"valid_targets_min": 160
|
|
},
|
|
{
|
|
"epoch": 1.574593007739525,
|
|
"grad_norm": 0.13447486127424788,
|
|
"learning_rate": 1.9767185128503817e-05,
|
|
"loss": 0.6257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6562528610229492,
|
|
"step": 5900,
|
|
"valid_targets_mean": 16196.6,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 1.5759274085935415,
|
|
"grad_norm": 0.14137687867755716,
|
|
"learning_rate": 1.9649157516362663e-05,
|
|
"loss": 0.6298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5984712839126587,
|
|
"step": 5905,
|
|
"valid_targets_mean": 17495.9,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 1.577261809447558,
|
|
"grad_norm": 0.1667576470540979,
|
|
"learning_rate": 1.953143018578607e-05,
|
|
"loss": 0.6245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6378732919692993,
|
|
"step": 5910,
|
|
"valid_targets_mean": 15191.1,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 1.5785962103015745,
|
|
"grad_norm": 0.12107450822688537,
|
|
"learning_rate": 1.9414003775451754e-05,
|
|
"loss": 0.6281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6455228328704834,
|
|
"step": 5915,
|
|
"valid_targets_mean": 15451.3,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 1.579930611155591,
|
|
"grad_norm": 0.110173869196171,
|
|
"learning_rate": 1.9296878922404868e-05,
|
|
"loss": 0.6295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6453032493591309,
|
|
"step": 5920,
|
|
"valid_targets_mean": 15667.5,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.5812650120096077,
|
|
"grad_norm": 0.1599541540200817,
|
|
"learning_rate": 1.9180056262054575e-05,
|
|
"loss": 0.6268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6145048141479492,
|
|
"step": 5925,
|
|
"valid_targets_mean": 16622.8,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 1.5825994128636243,
|
|
"grad_norm": 0.13096133851618708,
|
|
"learning_rate": 1.9063536428170682e-05,
|
|
"loss": 0.6261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6367355585098267,
|
|
"step": 5930,
|
|
"valid_targets_mean": 15805.8,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 1.5839338137176409,
|
|
"grad_norm": 0.13026012485411959,
|
|
"learning_rate": 1.8947320052880106e-05,
|
|
"loss": 0.6265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6729874610900879,
|
|
"step": 5935,
|
|
"valid_targets_mean": 14871.8,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 1.5852682145716575,
|
|
"grad_norm": 0.14375365982896932,
|
|
"learning_rate": 1.8831407766663513e-05,
|
|
"loss": 0.633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6526641249656677,
|
|
"step": 5940,
|
|
"valid_targets_mean": 16618.8,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 1.5866026154256738,
|
|
"grad_norm": 0.13475996262830925,
|
|
"learning_rate": 1.8715800198351824e-05,
|
|
"loss": 0.6283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6065152287483215,
|
|
"step": 5945,
|
|
"valid_targets_mean": 15873.9,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 1.5879370162796904,
|
|
"grad_norm": 0.11567212712524262,
|
|
"learning_rate": 1.8600497975122877e-05,
|
|
"loss": 0.609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6047914028167725,
|
|
"step": 5950,
|
|
"valid_targets_mean": 15512.6,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 1.589271417133707,
|
|
"grad_norm": 0.11457752537575618,
|
|
"learning_rate": 1.8485501722498024e-05,
|
|
"loss": 0.6252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6193585395812988,
|
|
"step": 5955,
|
|
"valid_targets_mean": 15587.5,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 1.5906058179877234,
|
|
"grad_norm": 0.11153565821354677,
|
|
"learning_rate": 1.8370812064338624e-05,
|
|
"loss": 0.6192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6255379915237427,
|
|
"step": 5960,
|
|
"valid_targets_mean": 17222.9,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 1.59194021884174,
|
|
"grad_norm": 0.13306138930387013,
|
|
"learning_rate": 1.8256429622842818e-05,
|
|
"loss": 0.6184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6517558693885803,
|
|
"step": 5965,
|
|
"valid_targets_mean": 16220.2,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.5932746196957566,
|
|
"grad_norm": 0.1232840132144764,
|
|
"learning_rate": 1.814235501854206e-05,
|
|
"loss": 0.6188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6157183647155762,
|
|
"step": 5970,
|
|
"valid_targets_mean": 16222.9,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 1.5946090205497732,
|
|
"grad_norm": 0.1297889203000817,
|
|
"learning_rate": 1.8028588870297774e-05,
|
|
"loss": 0.6289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6302587985992432,
|
|
"step": 5975,
|
|
"valid_targets_mean": 14871.2,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 1.5959434214037898,
|
|
"grad_norm": 0.12199596817565361,
|
|
"learning_rate": 1.7915131795297956e-05,
|
|
"loss": 0.6275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6188775897026062,
|
|
"step": 5980,
|
|
"valid_targets_mean": 17172.1,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 1.5972778222578063,
|
|
"grad_norm": 0.10827901865386277,
|
|
"learning_rate": 1.7801984409053897e-05,
|
|
"loss": 0.6195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6051005125045776,
|
|
"step": 5985,
|
|
"valid_targets_mean": 16912.6,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 1.598612223111823,
|
|
"grad_norm": 0.1317375536272242,
|
|
"learning_rate": 1.7689147325396822e-05,
|
|
"loss": 0.6195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6553663015365601,
|
|
"step": 5990,
|
|
"valid_targets_mean": 15914.1,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 1.5999466239658393,
|
|
"grad_norm": 0.1552078070629989,
|
|
"learning_rate": 1.757662115647448e-05,
|
|
"loss": 0.6315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6441587209701538,
|
|
"step": 5995,
|
|
"valid_targets_mean": 15740.9,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 1.601281024819856,
|
|
"grad_norm": 0.11280524840766815,
|
|
"learning_rate": 1.7464406512747964e-05,
|
|
"loss": 0.6244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6006412506103516,
|
|
"step": 6000,
|
|
"valid_targets_mean": 15848.0,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 1.6026154256738723,
|
|
"grad_norm": 0.12471678249023585,
|
|
"learning_rate": 1.7352504002988303e-05,
|
|
"loss": 0.6293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6565764546394348,
|
|
"step": 6005,
|
|
"valid_targets_mean": 14849.7,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 1.6039498265278889,
|
|
"grad_norm": 0.1302767791335395,
|
|
"learning_rate": 1.7240914234273126e-05,
|
|
"loss": 0.64,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6389578580856323,
|
|
"step": 6010,
|
|
"valid_targets_mean": 16321.4,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 1.6052842273819055,
|
|
"grad_norm": 0.1418301646406482,
|
|
"learning_rate": 1.7129637811983507e-05,
|
|
"loss": 0.6195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6155678033828735,
|
|
"step": 6015,
|
|
"valid_targets_mean": 15113.6,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 1.606618628235922,
|
|
"grad_norm": 0.12063202066398034,
|
|
"learning_rate": 1.7018675339800557e-05,
|
|
"loss": 0.6288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6044756174087524,
|
|
"step": 6020,
|
|
"valid_targets_mean": 16629.7,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 1.6079530290899386,
|
|
"grad_norm": 0.11807271642364235,
|
|
"learning_rate": 1.690802741970217e-05,
|
|
"loss": 0.6217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5868763327598572,
|
|
"step": 6025,
|
|
"valid_targets_mean": 16365.8,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 1.6092874299439552,
|
|
"grad_norm": 0.12548250813804548,
|
|
"learning_rate": 1.6797694651959806e-05,
|
|
"loss": 0.6146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6389151811599731,
|
|
"step": 6030,
|
|
"valid_targets_mean": 16226.5,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 1.6106218307979718,
|
|
"grad_norm": 0.1155585728721283,
|
|
"learning_rate": 1.6687677635135218e-05,
|
|
"loss": 0.6304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6380968689918518,
|
|
"step": 6035,
|
|
"valid_targets_mean": 14740.4,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 1.6119562316519882,
|
|
"grad_norm": 0.10679030044235797,
|
|
"learning_rate": 1.657797696607714e-05,
|
|
"loss": 0.631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6434690356254578,
|
|
"step": 6040,
|
|
"valid_targets_mean": 15633.7,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 1.6132906325060048,
|
|
"grad_norm": 0.1318356171263497,
|
|
"learning_rate": 1.6468593239918136e-05,
|
|
"loss": 0.6325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6549495458602905,
|
|
"step": 6045,
|
|
"valid_targets_mean": 15992.0,
|
|
"valid_targets_min": 189
|
|
},
|
|
{
|
|
"epoch": 1.6146250333600214,
|
|
"grad_norm": 0.12706740457026952,
|
|
"learning_rate": 1.635952705007136e-05,
|
|
"loss": 0.6177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6158535480499268,
|
|
"step": 6050,
|
|
"valid_targets_mean": 17493.1,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 1.6159594342140378,
|
|
"grad_norm": 0.11346395785230665,
|
|
"learning_rate": 1.6250778988227248e-05,
|
|
"loss": 0.6375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6460615396499634,
|
|
"step": 6055,
|
|
"valid_targets_mean": 16660.3,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 1.6172938350680544,
|
|
"grad_norm": 0.10806654813730997,
|
|
"learning_rate": 1.614234964435044e-05,
|
|
"loss": 0.6128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.620269238948822,
|
|
"step": 6060,
|
|
"valid_targets_mean": 17277.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.618628235922071,
|
|
"grad_norm": 0.11771772078819821,
|
|
"learning_rate": 1.603423960667645e-05,
|
|
"loss": 0.6282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6567904949188232,
|
|
"step": 6065,
|
|
"valid_targets_mean": 16147.1,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 1.6199626367760875,
|
|
"grad_norm": 0.11055707030855468,
|
|
"learning_rate": 1.5926449461708577e-05,
|
|
"loss": 0.6325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6463902592658997,
|
|
"step": 6070,
|
|
"valid_targets_mean": 16450.0,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 1.6212970376301041,
|
|
"grad_norm": 0.12058597331639832,
|
|
"learning_rate": 1.581897979421471e-05,
|
|
"loss": 0.6296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6483221650123596,
|
|
"step": 6075,
|
|
"valid_targets_mean": 15915.4,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 1.6226314384841207,
|
|
"grad_norm": 0.11426474258314467,
|
|
"learning_rate": 1.571183118722405e-05,
|
|
"loss": 0.6261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.623265266418457,
|
|
"step": 6080,
|
|
"valid_targets_mean": 15442.9,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 1.6239658393381373,
|
|
"grad_norm": 0.1167146873257186,
|
|
"learning_rate": 1.5605004222024074e-05,
|
|
"loss": 0.6316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6657377481460571,
|
|
"step": 6085,
|
|
"valid_targets_mean": 15991.5,
|
|
"valid_targets_min": 110
|
|
},
|
|
{
|
|
"epoch": 1.6253002401921537,
|
|
"grad_norm": 0.13662401295294158,
|
|
"learning_rate": 1.549849947815737e-05,
|
|
"loss": 0.6307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6083266735076904,
|
|
"step": 6090,
|
|
"valid_targets_mean": 15308.7,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 1.6266346410461703,
|
|
"grad_norm": 0.12981339267319347,
|
|
"learning_rate": 1.5392317533418366e-05,
|
|
"loss": 0.6336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6244166493415833,
|
|
"step": 6095,
|
|
"valid_targets_mean": 16245.6,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 1.6279690419001867,
|
|
"grad_norm": 0.11868066092095313,
|
|
"learning_rate": 1.5286458963850363e-05,
|
|
"loss": 0.6164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6081252098083496,
|
|
"step": 6100,
|
|
"valid_targets_mean": 15854.4,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 1.6293034427542032,
|
|
"grad_norm": 0.09876253127857607,
|
|
"learning_rate": 1.5180924343742316e-05,
|
|
"loss": 0.6209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6011303067207336,
|
|
"step": 6105,
|
|
"valid_targets_mean": 16851.0,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.6306378436082198,
|
|
"grad_norm": 0.1075861225035157,
|
|
"learning_rate": 1.5075714245625689e-05,
|
|
"loss": 0.6094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6410889029502869,
|
|
"step": 6110,
|
|
"valid_targets_mean": 15197.6,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 1.6319722444622364,
|
|
"grad_norm": 0.10809641783904705,
|
|
"learning_rate": 1.4970829240271448e-05,
|
|
"loss": 0.6174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6171035766601562,
|
|
"step": 6115,
|
|
"valid_targets_mean": 16254.8,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.633306645316253,
|
|
"grad_norm": 0.11734497353593293,
|
|
"learning_rate": 1.4866269896686917e-05,
|
|
"loss": 0.6557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6341985464096069,
|
|
"step": 6120,
|
|
"valid_targets_mean": 16048.5,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 1.6346410461702696,
|
|
"grad_norm": 0.11884394081671218,
|
|
"learning_rate": 1.4762036782112624e-05,
|
|
"loss": 0.6305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.629259467124939,
|
|
"step": 6125,
|
|
"valid_targets_mean": 16594.1,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 1.6359754470242862,
|
|
"grad_norm": 0.12452586757341529,
|
|
"learning_rate": 1.465813046201934e-05,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5998879671096802,
|
|
"step": 6130,
|
|
"valid_targets_mean": 16457.3,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 1.6373098478783028,
|
|
"grad_norm": 0.11193146868364372,
|
|
"learning_rate": 1.4554551500104971e-05,
|
|
"loss": 0.621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6431502103805542,
|
|
"step": 6135,
|
|
"valid_targets_mean": 16036.7,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 1.6386442487323192,
|
|
"grad_norm": 0.11208089126063975,
|
|
"learning_rate": 1.4451300458291401e-05,
|
|
"loss": 0.625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6364864706993103,
|
|
"step": 6140,
|
|
"valid_targets_mean": 16052.0,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 1.6399786495863358,
|
|
"grad_norm": 0.11237804561337364,
|
|
"learning_rate": 1.4348377896721635e-05,
|
|
"loss": 0.6175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6063425540924072,
|
|
"step": 6145,
|
|
"valid_targets_mean": 16655.0,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 1.6413130504403521,
|
|
"grad_norm": 0.12681529743407421,
|
|
"learning_rate": 1.4245784373756566e-05,
|
|
"loss": 0.6243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6388323307037354,
|
|
"step": 6150,
|
|
"valid_targets_mean": 14902.1,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 1.6426474512943687,
|
|
"grad_norm": 0.10827991259037892,
|
|
"learning_rate": 1.4143520445972078e-05,
|
|
"loss": 0.6307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6546180844306946,
|
|
"step": 6155,
|
|
"valid_targets_mean": 15754.9,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.6439818521483853,
|
|
"grad_norm": 0.11328509104388595,
|
|
"learning_rate": 1.4041586668155989e-05,
|
|
"loss": 0.6298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6234450340270996,
|
|
"step": 6160,
|
|
"valid_targets_mean": 16188.8,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 1.645316253002402,
|
|
"grad_norm": 0.11256147696959475,
|
|
"learning_rate": 1.3939983593304992e-05,
|
|
"loss": 0.6049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5933871865272522,
|
|
"step": 6165,
|
|
"valid_targets_mean": 16867.5,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 1.6466506538564185,
|
|
"grad_norm": 0.11122460509970357,
|
|
"learning_rate": 1.3838711772621743e-05,
|
|
"loss": 0.6239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5979670882225037,
|
|
"step": 6170,
|
|
"valid_targets_mean": 15328.9,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 1.647985054710435,
|
|
"grad_norm": 0.1113993401924013,
|
|
"learning_rate": 1.3737771755511811e-05,
|
|
"loss": 0.6301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5980991721153259,
|
|
"step": 6175,
|
|
"valid_targets_mean": 15607.7,
|
|
"valid_targets_min": 19
|
|
},
|
|
{
|
|
"epoch": 1.6493194555644517,
|
|
"grad_norm": 0.09801233009419923,
|
|
"learning_rate": 1.3637164089580673e-05,
|
|
"loss": 0.6179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.582968533039093,
|
|
"step": 6180,
|
|
"valid_targets_mean": 17630.9,
|
|
"valid_targets_min": 199
|
|
},
|
|
{
|
|
"epoch": 1.650653856418468,
|
|
"grad_norm": 0.1087711592160608,
|
|
"learning_rate": 1.3536889320630841e-05,
|
|
"loss": 0.626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6532264947891235,
|
|
"step": 6185,
|
|
"valid_targets_mean": 16123.4,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.6519882572724847,
|
|
"grad_norm": 0.11184043267681884,
|
|
"learning_rate": 1.3436947992658814e-05,
|
|
"loss": 0.608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6151658892631531,
|
|
"step": 6190,
|
|
"valid_targets_mean": 16591.8,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 1.6533226581265013,
|
|
"grad_norm": 0.11221188634828733,
|
|
"learning_rate": 1.3337340647852135e-05,
|
|
"loss": 0.6236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6335886716842651,
|
|
"step": 6195,
|
|
"valid_targets_mean": 15858.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 1.6546570589805176,
|
|
"grad_norm": 0.10263641797224998,
|
|
"learning_rate": 1.3238067826586491e-05,
|
|
"loss": 0.629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.626166582107544,
|
|
"step": 6200,
|
|
"valid_targets_mean": 15845.0,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.6559914598345342,
|
|
"grad_norm": 0.1338534198225952,
|
|
"learning_rate": 1.3139130067422792e-05,
|
|
"loss": 0.6255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6296816468238831,
|
|
"step": 6205,
|
|
"valid_targets_mean": 16170.1,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 1.6573258606885508,
|
|
"grad_norm": 0.12010717720533998,
|
|
"learning_rate": 1.3040527907104126e-05,
|
|
"loss": 0.6354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6044759750366211,
|
|
"step": 6210,
|
|
"valid_targets_mean": 17815.8,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 1.6586602615425674,
|
|
"grad_norm": 0.12918287791024774,
|
|
"learning_rate": 1.2942261880553012e-05,
|
|
"loss": 0.6194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6262900829315186,
|
|
"step": 6215,
|
|
"valid_targets_mean": 16474.6,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.659994662396584,
|
|
"grad_norm": 0.132575982276469,
|
|
"learning_rate": 1.2844332520868433e-05,
|
|
"loss": 0.6166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5894056558609009,
|
|
"step": 6220,
|
|
"valid_targets_mean": 15954.4,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 1.6613290632506006,
|
|
"grad_norm": 0.1321503283916604,
|
|
"learning_rate": 1.2746740359322857e-05,
|
|
"loss": 0.6316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6472313404083252,
|
|
"step": 6225,
|
|
"valid_targets_mean": 13889.1,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 1.6626634641046172,
|
|
"grad_norm": 0.11477171567083412,
|
|
"learning_rate": 1.2649485925359514e-05,
|
|
"loss": 0.6315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6197164058685303,
|
|
"step": 6230,
|
|
"valid_targets_mean": 15947.2,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 1.6639978649586336,
|
|
"grad_norm": 0.12877855557649734,
|
|
"learning_rate": 1.2552569746589386e-05,
|
|
"loss": 0.6129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6343130469322205,
|
|
"step": 6235,
|
|
"valid_targets_mean": 15370.8,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 1.6653322658126501,
|
|
"grad_norm": 0.09600522815823713,
|
|
"learning_rate": 1.245599234878846e-05,
|
|
"loss": 0.6177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6135959029197693,
|
|
"step": 6240,
|
|
"valid_targets_mean": 15830.2,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.11616223212883085,
|
|
"learning_rate": 1.2359754255894737e-05,
|
|
"loss": 0.6274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.639473557472229,
|
|
"step": 6245,
|
|
"valid_targets_mean": 16264.7,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.6680010675206831,
|
|
"grad_norm": 0.11963674310921693,
|
|
"learning_rate": 1.2263855990005527e-05,
|
|
"loss": 0.6341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6277503967285156,
|
|
"step": 6250,
|
|
"valid_targets_mean": 16285.1,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 1.6693354683746997,
|
|
"grad_norm": 0.11062418716806117,
|
|
"learning_rate": 1.2168298071374543e-05,
|
|
"loss": 0.6287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6286278367042542,
|
|
"step": 6255,
|
|
"valid_targets_mean": 16346.8,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 1.6706698692287163,
|
|
"grad_norm": 0.10960058347069801,
|
|
"learning_rate": 1.2073081018409112e-05,
|
|
"loss": 0.6193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6018089652061462,
|
|
"step": 6260,
|
|
"valid_targets_mean": 16467.2,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 1.672004270082733,
|
|
"grad_norm": 0.10255118898034118,
|
|
"learning_rate": 1.1978205347667303e-05,
|
|
"loss": 0.6278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6391971707344055,
|
|
"step": 6265,
|
|
"valid_targets_mean": 16542.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 1.6733386709367495,
|
|
"grad_norm": 0.10248278837455822,
|
|
"learning_rate": 1.1883671573855186e-05,
|
|
"loss": 0.6231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6175813674926758,
|
|
"step": 6270,
|
|
"valid_targets_mean": 16722.9,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 1.674673071790766,
|
|
"grad_norm": 0.10176928928958952,
|
|
"learning_rate": 1.1789480209824064e-05,
|
|
"loss": 0.6142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5902460813522339,
|
|
"step": 6275,
|
|
"valid_targets_mean": 16472.0,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 1.6760074726447824,
|
|
"grad_norm": 0.1087183247378677,
|
|
"learning_rate": 1.1695631766567562e-05,
|
|
"loss": 0.6238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5925770401954651,
|
|
"step": 6280,
|
|
"valid_targets_mean": 17731.5,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 1.677341873498799,
|
|
"grad_norm": 0.10441887834481199,
|
|
"learning_rate": 1.1602126753219005e-05,
|
|
"loss": 0.6117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6068078875541687,
|
|
"step": 6285,
|
|
"valid_targets_mean": 15705.7,
|
|
"valid_targets_min": 167
|
|
},
|
|
{
|
|
"epoch": 1.6786762743528156,
|
|
"grad_norm": 0.1111444887936262,
|
|
"learning_rate": 1.1508965677048585e-05,
|
|
"loss": 0.628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6204681396484375,
|
|
"step": 6290,
|
|
"valid_targets_mean": 15415.0,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 1.680010675206832,
|
|
"grad_norm": 0.11846573698079757,
|
|
"learning_rate": 1.1416149043460562e-05,
|
|
"loss": 0.6156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6317400336265564,
|
|
"step": 6295,
|
|
"valid_targets_mean": 15837.8,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 1.6813450760608486,
|
|
"grad_norm": 0.10428763462278852,
|
|
"learning_rate": 1.132367735599066e-05,
|
|
"loss": 0.644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6342741250991821,
|
|
"step": 6300,
|
|
"valid_targets_mean": 16270.4,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 1.6826794769148652,
|
|
"grad_norm": 0.11537287186352241,
|
|
"learning_rate": 1.1231551116303162e-05,
|
|
"loss": 0.6336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6315730810165405,
|
|
"step": 6305,
|
|
"valid_targets_mean": 15533.6,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 1.6840138777688818,
|
|
"grad_norm": 0.11791504517126551,
|
|
"learning_rate": 1.1139770824188334e-05,
|
|
"loss": 0.6193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6582194566726685,
|
|
"step": 6310,
|
|
"valid_targets_mean": 16453.3,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.6853482786228984,
|
|
"grad_norm": 0.11369935640753852,
|
|
"learning_rate": 1.1048336977559666e-05,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6206117868423462,
|
|
"step": 6315,
|
|
"valid_targets_mean": 16782.0,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 1.686682679476915,
|
|
"grad_norm": 0.1105370783701265,
|
|
"learning_rate": 1.0957250072451084e-05,
|
|
"loss": 0.6292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6181900501251221,
|
|
"step": 6320,
|
|
"valid_targets_mean": 15370.0,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.6880170803309316,
|
|
"grad_norm": 0.11493760168455935,
|
|
"learning_rate": 1.0866510603014411e-05,
|
|
"loss": 0.634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6352699995040894,
|
|
"step": 6325,
|
|
"valid_targets_mean": 16530.7,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 1.689351481184948,
|
|
"grad_norm": 0.11362010503965773,
|
|
"learning_rate": 1.0776119061516613e-05,
|
|
"loss": 0.6304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6385295987129211,
|
|
"step": 6330,
|
|
"valid_targets_mean": 16278.2,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 1.6906858820389645,
|
|
"grad_norm": 0.09896105840122521,
|
|
"learning_rate": 1.0686075938337055e-05,
|
|
"loss": 0.6021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5819606184959412,
|
|
"step": 6335,
|
|
"valid_targets_mean": 16842.0,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.692020282892981,
|
|
"grad_norm": 0.11475399800879332,
|
|
"learning_rate": 1.0596381721964984e-05,
|
|
"loss": 0.6366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.652917742729187,
|
|
"step": 6340,
|
|
"valid_targets_mean": 14536.0,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 1.6933546837469975,
|
|
"grad_norm": 0.10134623003905739,
|
|
"learning_rate": 1.0507036898996787e-05,
|
|
"loss": 0.6139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6215161681175232,
|
|
"step": 6345,
|
|
"valid_targets_mean": 16243.6,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 1.694689084601014,
|
|
"grad_norm": 0.09588119258250771,
|
|
"learning_rate": 1.0418041954133346e-05,
|
|
"loss": 0.6214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5866297483444214,
|
|
"step": 6350,
|
|
"valid_targets_mean": 15628.8,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 1.6960234854550307,
|
|
"grad_norm": 0.09986891287447414,
|
|
"learning_rate": 1.032939737017745e-05,
|
|
"loss": 0.6276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.610047459602356,
|
|
"step": 6355,
|
|
"valid_targets_mean": 15860.2,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 1.6973578863090473,
|
|
"grad_norm": 0.11484316617515455,
|
|
"learning_rate": 1.02411036280312e-05,
|
|
"loss": 0.6516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6614749431610107,
|
|
"step": 6360,
|
|
"valid_targets_mean": 15743.5,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.6986922871630639,
|
|
"grad_norm": 0.10913443246250537,
|
|
"learning_rate": 1.0153161206693269e-05,
|
|
"loss": 0.6237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5896070003509521,
|
|
"step": 6365,
|
|
"valid_targets_mean": 15874.9,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 1.7000266880170805,
|
|
"grad_norm": 0.11212393578471182,
|
|
"learning_rate": 1.0065570583256483e-05,
|
|
"loss": 0.6234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6237644553184509,
|
|
"step": 6370,
|
|
"valid_targets_mean": 16417.2,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 1.7013610888710968,
|
|
"grad_norm": 0.1169098281301673,
|
|
"learning_rate": 9.978332232905114e-06,
|
|
"loss": 0.6162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6001089811325073,
|
|
"step": 6375,
|
|
"valid_targets_mean": 16773.5,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 1.7026954897251134,
|
|
"grad_norm": 0.10243637132312938,
|
|
"learning_rate": 9.891446628912286e-06,
|
|
"loss": 0.6244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6666500568389893,
|
|
"step": 6380,
|
|
"valid_targets_mean": 14801.5,
|
|
"valid_targets_min": 87
|
|
},
|
|
{
|
|
"epoch": 1.70402989057913,
|
|
"grad_norm": 0.10482966119554739,
|
|
"learning_rate": 9.804914242637541e-06,
|
|
"loss": 0.6195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6140177249908447,
|
|
"step": 6385,
|
|
"valid_targets_mean": 16329.9,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 1.7053642914331464,
|
|
"grad_norm": 0.09782409890646393,
|
|
"learning_rate": 9.718735543524103e-06,
|
|
"loss": 0.6184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6123791933059692,
|
|
"step": 6390,
|
|
"valid_targets_mean": 16561.2,
|
|
"valid_targets_min": 21
|
|
},
|
|
{
|
|
"epoch": 1.706698692287163,
|
|
"grad_norm": 0.10668349901133359,
|
|
"learning_rate": 9.632910999096486e-06,
|
|
"loss": 0.6302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6120879650115967,
|
|
"step": 6395,
|
|
"valid_targets_mean": 16596.8,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.7080330931411796,
|
|
"grad_norm": 0.10309181428762786,
|
|
"learning_rate": 9.547441074957884e-06,
|
|
"loss": 0.6206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6141809225082397,
|
|
"step": 6400,
|
|
"valid_targets_mean": 16667.8,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 1.7093674939951962,
|
|
"grad_norm": 0.10161441538417851,
|
|
"learning_rate": 9.462326234787621e-06,
|
|
"loss": 0.6177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6218645572662354,
|
|
"step": 6405,
|
|
"valid_targets_mean": 15889.5,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 1.7107018948492128,
|
|
"grad_norm": 0.10421604528090228,
|
|
"learning_rate": 9.377566940338712e-06,
|
|
"loss": 0.6341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6338856220245361,
|
|
"step": 6410,
|
|
"valid_targets_mean": 17526.1,
|
|
"valid_targets_min": 116
|
|
},
|
|
{
|
|
"epoch": 1.7120362957032293,
|
|
"grad_norm": 0.09632850265502085,
|
|
"learning_rate": 9.293163651435298e-06,
|
|
"loss": 0.6171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6092801094055176,
|
|
"step": 6415,
|
|
"valid_targets_mean": 16826.8,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 1.713370696557246,
|
|
"grad_norm": 0.14268700677759225,
|
|
"learning_rate": 9.20911682597015e-06,
|
|
"loss": 0.6092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6385226249694824,
|
|
"step": 6420,
|
|
"valid_targets_mean": 15199.0,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.7147050974112623,
|
|
"grad_norm": 0.1141300203133269,
|
|
"learning_rate": 9.125426919902231e-06,
|
|
"loss": 0.5973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6092257499694824,
|
|
"step": 6425,
|
|
"valid_targets_mean": 16535.1,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 1.716039498265279,
|
|
"grad_norm": 0.10828299644111676,
|
|
"learning_rate": 9.042094387254212e-06,
|
|
"loss": 0.622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6178991794586182,
|
|
"step": 6430,
|
|
"valid_targets_mean": 14752.1,
|
|
"valid_targets_min": 120
|
|
},
|
|
{
|
|
"epoch": 1.7173738991192953,
|
|
"grad_norm": 0.11398118285533808,
|
|
"learning_rate": 8.959119680109918e-06,
|
|
"loss": 0.6139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6384214162826538,
|
|
"step": 6435,
|
|
"valid_targets_mean": 15340.4,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 1.7187082999733119,
|
|
"grad_norm": 0.10092605169272632,
|
|
"learning_rate": 8.876503248612036e-06,
|
|
"loss": 0.6222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6220699548721313,
|
|
"step": 6440,
|
|
"valid_targets_mean": 15507.5,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 1.7200427008273285,
|
|
"grad_norm": 0.11330458786874878,
|
|
"learning_rate": 8.794245540959546e-06,
|
|
"loss": 0.6221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6084585785865784,
|
|
"step": 6445,
|
|
"valid_targets_mean": 17206.7,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 1.721377101681345,
|
|
"grad_norm": 0.10025211270623526,
|
|
"learning_rate": 8.712347003405304e-06,
|
|
"loss": 0.6093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6082017421722412,
|
|
"step": 6450,
|
|
"valid_targets_mean": 15802.1,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 1.7227115025353616,
|
|
"grad_norm": 0.09997128198065185,
|
|
"learning_rate": 8.630808080253701e-06,
|
|
"loss": 0.6162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6296147704124451,
|
|
"step": 6455,
|
|
"valid_targets_mean": 16807.6,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 1.7240459033893782,
|
|
"grad_norm": 0.10414111634434077,
|
|
"learning_rate": 8.549629213858192e-06,
|
|
"loss": 0.6194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.631471574306488,
|
|
"step": 6460,
|
|
"valid_targets_mean": 16801.4,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 1.7253803042433948,
|
|
"grad_norm": 0.10504156485164873,
|
|
"learning_rate": 8.468810844618842e-06,
|
|
"loss": 0.629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6191542148590088,
|
|
"step": 6465,
|
|
"valid_targets_mean": 15910.0,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 1.7267147050974114,
|
|
"grad_norm": 0.11531327546603624,
|
|
"learning_rate": 8.388353410980075e-06,
|
|
"loss": 0.6222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6417456865310669,
|
|
"step": 6470,
|
|
"valid_targets_mean": 16802.6,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 1.7280491059514278,
|
|
"grad_norm": 0.10884658145661602,
|
|
"learning_rate": 8.308257349428154e-06,
|
|
"loss": 0.617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6139084100723267,
|
|
"step": 6475,
|
|
"valid_targets_mean": 16511.0,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 1.7293835068054444,
|
|
"grad_norm": 0.10834253980457187,
|
|
"learning_rate": 8.228523094488928e-06,
|
|
"loss": 0.6147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.656073808670044,
|
|
"step": 6480,
|
|
"valid_targets_mean": 14856.8,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 1.7307179076594608,
|
|
"grad_norm": 0.10406939979420007,
|
|
"learning_rate": 8.149151078725416e-06,
|
|
"loss": 0.6319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6458457708358765,
|
|
"step": 6485,
|
|
"valid_targets_mean": 15603.7,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 1.7320523085134774,
|
|
"grad_norm": 0.0983819920288647,
|
|
"learning_rate": 8.070141732735424e-06,
|
|
"loss": 0.6173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6092060804367065,
|
|
"step": 6490,
|
|
"valid_targets_mean": 16581.9,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 1.733386709367494,
|
|
"grad_norm": 0.10687437145067663,
|
|
"learning_rate": 7.991495485149294e-06,
|
|
"loss": 0.6272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6291359663009644,
|
|
"step": 6495,
|
|
"valid_targets_mean": 15324.7,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 1.7347211102215105,
|
|
"grad_norm": 0.09660211319603874,
|
|
"learning_rate": 7.913212762627539e-06,
|
|
"loss": 0.6255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6016951203346252,
|
|
"step": 6500,
|
|
"valid_targets_mean": 16839.8,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 1.7360555110755271,
|
|
"grad_norm": 0.09778142185019807,
|
|
"learning_rate": 7.835293989858527e-06,
|
|
"loss": 0.6234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6057382225990295,
|
|
"step": 6505,
|
|
"valid_targets_mean": 16022.5,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 1.7373899119295437,
|
|
"grad_norm": 0.10616126123251467,
|
|
"learning_rate": 7.75773958955614e-06,
|
|
"loss": 0.6115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6232156753540039,
|
|
"step": 6510,
|
|
"valid_targets_mean": 15161.9,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 1.7387243127835603,
|
|
"grad_norm": 0.1107579677431388,
|
|
"learning_rate": 7.680549982457553e-06,
|
|
"loss": 0.6259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6101968288421631,
|
|
"step": 6515,
|
|
"valid_targets_mean": 15299.2,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 1.7400587136375767,
|
|
"grad_norm": 0.10381732514792791,
|
|
"learning_rate": 7.6037255873209165e-06,
|
|
"loss": 0.6347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6579749584197998,
|
|
"step": 6520,
|
|
"valid_targets_mean": 17051.9,
|
|
"valid_targets_min": 38
|
|
},
|
|
{
|
|
"epoch": 1.7413931144915933,
|
|
"grad_norm": 0.09614501769644973,
|
|
"learning_rate": 7.527266820923089e-06,
|
|
"loss": 0.6265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6192217469215393,
|
|
"step": 6525,
|
|
"valid_targets_mean": 16462.7,
|
|
"valid_targets_min": 50
|
|
},
|
|
{
|
|
"epoch": 1.7427275153456097,
|
|
"grad_norm": 0.0990367797322132,
|
|
"learning_rate": 7.45117409805733e-06,
|
|
"loss": 0.6227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6273068189620972,
|
|
"step": 6530,
|
|
"valid_targets_mean": 15751.7,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 1.7440619161996262,
|
|
"grad_norm": 0.09931179454623716,
|
|
"learning_rate": 7.375447831531128e-06,
|
|
"loss": 0.6198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5904843211174011,
|
|
"step": 6535,
|
|
"valid_targets_mean": 15938.6,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 1.7453963170536428,
|
|
"grad_norm": 0.09179141156609635,
|
|
"learning_rate": 7.300088432163945e-06,
|
|
"loss": 0.6215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6543793678283691,
|
|
"step": 6540,
|
|
"valid_targets_mean": 16912.6,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.7467307179076594,
|
|
"grad_norm": 0.10288250755398817,
|
|
"learning_rate": 7.2250963087849e-06,
|
|
"loss": 0.602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6032967567443848,
|
|
"step": 6545,
|
|
"valid_targets_mean": 14788.7,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 1.748065118761676,
|
|
"grad_norm": 0.10263235031062287,
|
|
"learning_rate": 7.1504718682306754e-06,
|
|
"loss": 0.6276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6326683163642883,
|
|
"step": 6550,
|
|
"valid_targets_mean": 14526.3,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 1.7493995196156926,
|
|
"grad_norm": 0.0976976803425861,
|
|
"learning_rate": 7.076215515343256e-06,
|
|
"loss": 0.6152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6148498058319092,
|
|
"step": 6555,
|
|
"valid_targets_mean": 15711.9,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 1.7507339204697092,
|
|
"grad_norm": 0.1025678208255761,
|
|
"learning_rate": 7.0023276529676655e-06,
|
|
"loss": 0.6431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6772218942642212,
|
|
"step": 6560,
|
|
"valid_targets_mean": 15850.6,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 1.7520683213237258,
|
|
"grad_norm": 0.09645792081965308,
|
|
"learning_rate": 6.928808681949919e-06,
|
|
"loss": 0.6173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6133409738540649,
|
|
"step": 6565,
|
|
"valid_targets_mean": 15282.2,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.7534027221777422,
|
|
"grad_norm": 0.0914832409321191,
|
|
"learning_rate": 6.855659001134739e-06,
|
|
"loss": 0.6368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.609194815158844,
|
|
"step": 6570,
|
|
"valid_targets_mean": 15832.7,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 1.7547371230317588,
|
|
"grad_norm": 0.09547658611807025,
|
|
"learning_rate": 6.78287900736342e-06,
|
|
"loss": 0.618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6080896854400635,
|
|
"step": 6575,
|
|
"valid_targets_mean": 16496.8,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.7560715238857751,
|
|
"grad_norm": 0.09427204552893005,
|
|
"learning_rate": 6.710469095471701e-06,
|
|
"loss": 0.6122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6215167045593262,
|
|
"step": 6580,
|
|
"valid_targets_mean": 17439.6,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.7574059247397917,
|
|
"grad_norm": 0.09694008938352663,
|
|
"learning_rate": 6.638429658287603e-06,
|
|
"loss": 0.6204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5812457799911499,
|
|
"step": 6585,
|
|
"valid_targets_mean": 15750.2,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 1.7587403255938083,
|
|
"grad_norm": 0.10046226105291199,
|
|
"learning_rate": 6.566761086629285e-06,
|
|
"loss": 0.6166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6253821849822998,
|
|
"step": 6590,
|
|
"valid_targets_mean": 17012.2,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 1.760074726447825,
|
|
"grad_norm": 0.10557213616677373,
|
|
"learning_rate": 6.495463769302952e-06,
|
|
"loss": 0.6269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6374891996383667,
|
|
"step": 6595,
|
|
"valid_targets_mean": 15844.3,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 1.7614091273018415,
|
|
"grad_norm": 0.10711113282060361,
|
|
"learning_rate": 6.424538093100745e-06,
|
|
"loss": 0.6302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6101715564727783,
|
|
"step": 6600,
|
|
"valid_targets_mean": 15971.3,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 1.762743528155858,
|
|
"grad_norm": 0.08854100780900193,
|
|
"learning_rate": 6.353984442798582e-06,
|
|
"loss": 0.6273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5808321237564087,
|
|
"step": 6605,
|
|
"valid_targets_mean": 17444.8,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 1.7640779290098747,
|
|
"grad_norm": 0.10110922215288053,
|
|
"learning_rate": 6.283803201154173e-06,
|
|
"loss": 0.6126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6101663708686829,
|
|
"step": 6610,
|
|
"valid_targets_mean": 17214.1,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 1.765412329863891,
|
|
"grad_norm": 0.20493053145002474,
|
|
"learning_rate": 6.213994748904866e-06,
|
|
"loss": 0.6174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6190000772476196,
|
|
"step": 6615,
|
|
"valid_targets_mean": 16494.0,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 1.7667467307179077,
|
|
"grad_norm": 0.09578545081324519,
|
|
"learning_rate": 6.144559464765605e-06,
|
|
"loss": 0.6216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.592930257320404,
|
|
"step": 6620,
|
|
"valid_targets_mean": 16264.1,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 1.7680811315719243,
|
|
"grad_norm": 0.10950373475500869,
|
|
"learning_rate": 6.075497725426862e-06,
|
|
"loss": 0.6267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6404623985290527,
|
|
"step": 6625,
|
|
"valid_targets_mean": 15382.0,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 1.7694155324259406,
|
|
"grad_norm": 0.099924757154183,
|
|
"learning_rate": 6.0068099055526505e-06,
|
|
"loss": 0.6325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6134600639343262,
|
|
"step": 6630,
|
|
"valid_targets_mean": 16468.3,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.7707499332799572,
|
|
"grad_norm": 0.10276845222389408,
|
|
"learning_rate": 5.938496377778395e-06,
|
|
"loss": 0.6277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6179153919219971,
|
|
"step": 6635,
|
|
"valid_targets_mean": 15734.6,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 1.7720843341339738,
|
|
"grad_norm": 0.10264895930540381,
|
|
"learning_rate": 5.870557512709001e-06,
|
|
"loss": 0.6237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.611757755279541,
|
|
"step": 6640,
|
|
"valid_targets_mean": 16182.3,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 1.7734187349879904,
|
|
"grad_norm": 0.10089157369476227,
|
|
"learning_rate": 5.802993678916773e-06,
|
|
"loss": 0.6333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6308541297912598,
|
|
"step": 6645,
|
|
"valid_targets_mean": 15795.2,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 1.774753135842007,
|
|
"grad_norm": 0.09778285328051206,
|
|
"learning_rate": 5.7358052429394785e-06,
|
|
"loss": 0.6253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.614090085029602,
|
|
"step": 6650,
|
|
"valid_targets_mean": 16161.4,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 1.7760875366960236,
|
|
"grad_norm": 0.09629551427385658,
|
|
"learning_rate": 5.668992569278347e-06,
|
|
"loss": 0.6242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6327216625213623,
|
|
"step": 6655,
|
|
"valid_targets_mean": 15158.7,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 1.7774219375500402,
|
|
"grad_norm": 0.09088615364424045,
|
|
"learning_rate": 5.602556020396004e-06,
|
|
"loss": 0.604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.596990704536438,
|
|
"step": 6660,
|
|
"valid_targets_mean": 16353.3,
|
|
"valid_targets_min": 144
|
|
},
|
|
{
|
|
"epoch": 1.7787563384040566,
|
|
"grad_norm": 0.09034274812917362,
|
|
"learning_rate": 5.5364959567146514e-06,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6033511161804199,
|
|
"step": 6665,
|
|
"valid_targets_mean": 16006.4,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 1.7800907392580732,
|
|
"grad_norm": 0.10302388333968907,
|
|
"learning_rate": 5.470812736614014e-06,
|
|
"loss": 0.6278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6411268711090088,
|
|
"step": 6670,
|
|
"valid_targets_mean": 14318.7,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 1.7814251401120895,
|
|
"grad_norm": 0.09713243549010361,
|
|
"learning_rate": 5.405506716429378e-06,
|
|
"loss": 0.62,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6400075554847717,
|
|
"step": 6675,
|
|
"valid_targets_mean": 16221.4,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 1.7827595409661061,
|
|
"grad_norm": 0.09115390102874049,
|
|
"learning_rate": 5.340578250449742e-06,
|
|
"loss": 0.6066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5856536626815796,
|
|
"step": 6680,
|
|
"valid_targets_mean": 16640.9,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 1.7840939418201227,
|
|
"grad_norm": 0.09460582406704682,
|
|
"learning_rate": 5.276027690915868e-06,
|
|
"loss": 0.6335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6678420305252075,
|
|
"step": 6685,
|
|
"valid_targets_mean": 16394.7,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 1.7854283426741393,
|
|
"grad_norm": 0.09718467678046305,
|
|
"learning_rate": 5.211855388018282e-06,
|
|
"loss": 0.6329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6457293629646301,
|
|
"step": 6690,
|
|
"valid_targets_mean": 16005.6,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 1.786762743528156,
|
|
"grad_norm": 0.09177107012065362,
|
|
"learning_rate": 5.148061689895519e-06,
|
|
"loss": 0.6356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6243642568588257,
|
|
"step": 6695,
|
|
"valid_targets_mean": 15437.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.7880971443821725,
|
|
"grad_norm": 0.10024429506027321,
|
|
"learning_rate": 5.084646942632123e-06,
|
|
"loss": 0.6149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.591733455657959,
|
|
"step": 6700,
|
|
"valid_targets_mean": 16580.0,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 1.789431545236189,
|
|
"grad_norm": 0.10069899627284365,
|
|
"learning_rate": 5.0216114902567995e-06,
|
|
"loss": 0.6165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.649412989616394,
|
|
"step": 6705,
|
|
"valid_targets_mean": 14921.5,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 1.7907659460902055,
|
|
"grad_norm": 0.09197071177860598,
|
|
"learning_rate": 4.9589556747406e-06,
|
|
"loss": 0.6132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6070678234100342,
|
|
"step": 6710,
|
|
"valid_targets_mean": 16360.8,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 1.792100346944222,
|
|
"grad_norm": 0.09280745282636378,
|
|
"learning_rate": 4.896679835994965e-06,
|
|
"loss": 0.6083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5776534676551819,
|
|
"step": 6715,
|
|
"valid_targets_mean": 16509.3,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 1.7934347477982386,
|
|
"grad_norm": 0.08650443355926254,
|
|
"learning_rate": 4.834784311869985e-06,
|
|
"loss": 0.606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5921191573143005,
|
|
"step": 6720,
|
|
"valid_targets_mean": 16442.8,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 1.794769148652255,
|
|
"grad_norm": 0.1016813277797864,
|
|
"learning_rate": 4.773269438152516e-06,
|
|
"loss": 0.6313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.631247878074646,
|
|
"step": 6725,
|
|
"valid_targets_mean": 14925.6,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.7961035495062716,
|
|
"grad_norm": 0.08980829163466826,
|
|
"learning_rate": 4.712135548564333e-06,
|
|
"loss": 0.6032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.625686764717102,
|
|
"step": 6730,
|
|
"valid_targets_mean": 16406.4,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 1.7974379503602882,
|
|
"grad_norm": 0.09012413974461835,
|
|
"learning_rate": 4.651382974760382e-06,
|
|
"loss": 0.6265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6504734754562378,
|
|
"step": 6735,
|
|
"valid_targets_mean": 16003.1,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 1.7987723512143048,
|
|
"grad_norm": 0.09455961172931404,
|
|
"learning_rate": 4.591012046326944e-06,
|
|
"loss": 0.6138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6531174778938293,
|
|
"step": 6740,
|
|
"valid_targets_mean": 15780.7,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.8001067520683214,
|
|
"grad_norm": 0.08931663578747313,
|
|
"learning_rate": 4.5310230907798285e-06,
|
|
"loss": 0.6212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6175740957260132,
|
|
"step": 6745,
|
|
"valid_targets_mean": 15773.0,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 1.801441152922338,
|
|
"grad_norm": 0.09386050236286557,
|
|
"learning_rate": 4.471416433562638e-06,
|
|
"loss": 0.6148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6489225625991821,
|
|
"step": 6750,
|
|
"valid_targets_mean": 16038.3,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 1.8027755537763546,
|
|
"grad_norm": 0.2394574484960667,
|
|
"learning_rate": 4.412192398044997e-06,
|
|
"loss": 0.611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5736192464828491,
|
|
"step": 6755,
|
|
"valid_targets_mean": 16190.5,
|
|
"valid_targets_min": 18
|
|
},
|
|
{
|
|
"epoch": 1.804109954630371,
|
|
"grad_norm": 0.10661505123251912,
|
|
"learning_rate": 4.353351305520747e-06,
|
|
"loss": 0.6223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6465322375297546,
|
|
"step": 6760,
|
|
"valid_targets_mean": 17468.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.8054443554843875,
|
|
"grad_norm": 0.09541439718146676,
|
|
"learning_rate": 4.2948934752062655e-06,
|
|
"loss": 0.6358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6139446496963501,
|
|
"step": 6765,
|
|
"valid_targets_mean": 17065.6,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 1.806778756338404,
|
|
"grad_norm": 0.09386372933318869,
|
|
"learning_rate": 4.2368192242387355e-06,
|
|
"loss": 0.627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6409814357757568,
|
|
"step": 6770,
|
|
"valid_targets_mean": 14635.9,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 1.8081131571924205,
|
|
"grad_norm": 0.10396463801811591,
|
|
"learning_rate": 4.179128867674348e-06,
|
|
"loss": 0.6016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6246637105941772,
|
|
"step": 6775,
|
|
"valid_targets_mean": 17589.5,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 1.809447558046437,
|
|
"grad_norm": 0.10734828990482441,
|
|
"learning_rate": 4.121822718486664e-06,
|
|
"loss": 0.6237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6268129944801331,
|
|
"step": 6780,
|
|
"valid_targets_mean": 15491.9,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 1.8107819589004537,
|
|
"grad_norm": 0.1001303927768195,
|
|
"learning_rate": 4.064901087564918e-06,
|
|
"loss": 0.6359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6495320200920105,
|
|
"step": 6785,
|
|
"valid_targets_mean": 14824.9,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 1.8121163597544703,
|
|
"grad_norm": 0.09842864829102572,
|
|
"learning_rate": 4.008364283712298e-06,
|
|
"loss": 0.6049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6317485570907593,
|
|
"step": 6790,
|
|
"valid_targets_mean": 15738.1,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 1.8134507606084869,
|
|
"grad_norm": 0.10150505686914285,
|
|
"learning_rate": 3.9522126136442515e-06,
|
|
"loss": 0.613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6200088858604431,
|
|
"step": 6795,
|
|
"valid_targets_mean": 17679.3,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 1.8147851614625035,
|
|
"grad_norm": 0.10700599241927931,
|
|
"learning_rate": 3.896446381986901e-06,
|
|
"loss": 0.6174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6170170307159424,
|
|
"step": 6800,
|
|
"valid_targets_mean": 15742.3,
|
|
"valid_targets_min": 20
|
|
},
|
|
{
|
|
"epoch": 1.8161195623165198,
|
|
"grad_norm": 0.09039156496096841,
|
|
"learning_rate": 3.841065891275328e-06,
|
|
"loss": 0.6106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6032929420471191,
|
|
"step": 6805,
|
|
"valid_targets_mean": 16021.7,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 1.8174539631705364,
|
|
"grad_norm": 0.08908389333812784,
|
|
"learning_rate": 3.786071441951918e-06,
|
|
"loss": 0.6003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6013544201850891,
|
|
"step": 6810,
|
|
"valid_targets_mean": 14962.3,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 1.818788364024553,
|
|
"grad_norm": 0.08782159043693183,
|
|
"learning_rate": 3.7314633323647952e-06,
|
|
"loss": 0.6287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6162735819816589,
|
|
"step": 6815,
|
|
"valid_targets_mean": 15977.1,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 1.8201227648785694,
|
|
"grad_norm": 0.09504087980101503,
|
|
"learning_rate": 3.6772418587661474e-06,
|
|
"loss": 0.6185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6311695575714111,
|
|
"step": 6820,
|
|
"valid_targets_mean": 16229.0,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.821457165732586,
|
|
"grad_norm": 0.09507045562862382,
|
|
"learning_rate": 3.623407315310667e-06,
|
|
"loss": 0.6392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6561924815177917,
|
|
"step": 6825,
|
|
"valid_targets_mean": 15305.6,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 1.8227915665866026,
|
|
"grad_norm": 0.09690981421722983,
|
|
"learning_rate": 3.5699599940538836e-06,
|
|
"loss": 0.622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6211844086647034,
|
|
"step": 6830,
|
|
"valid_targets_mean": 15366.9,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 1.8241259674406192,
|
|
"grad_norm": 0.09590079146274327,
|
|
"learning_rate": 3.5169001849506496e-06,
|
|
"loss": 0.6443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6609474420547485,
|
|
"step": 6835,
|
|
"valid_targets_mean": 15275.2,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 1.8254603682946358,
|
|
"grad_norm": 0.08760180167427375,
|
|
"learning_rate": 3.4642281758535645e-06,
|
|
"loss": 0.6246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5892127752304077,
|
|
"step": 6840,
|
|
"valid_targets_mean": 16206.0,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 1.8267947691486524,
|
|
"grad_norm": 0.09239379002212816,
|
|
"learning_rate": 3.4119442525113283e-06,
|
|
"loss": 0.6236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5922919511795044,
|
|
"step": 6845,
|
|
"valid_targets_mean": 15991.8,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 1.828129170002669,
|
|
"grad_norm": 0.0946090785298155,
|
|
"learning_rate": 3.3600486985673163e-06,
|
|
"loss": 0.6147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6722390651702881,
|
|
"step": 6850,
|
|
"valid_targets_mean": 14939.3,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 1.8294635708566853,
|
|
"grad_norm": 0.09402100238995499,
|
|
"learning_rate": 3.308541795557948e-06,
|
|
"loss": 0.6244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6498205661773682,
|
|
"step": 6855,
|
|
"valid_targets_mean": 14678.4,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 1.830797971710702,
|
|
"grad_norm": 0.09131839871535954,
|
|
"learning_rate": 3.2574238229111704e-06,
|
|
"loss": 0.6201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5872923135757446,
|
|
"step": 6860,
|
|
"valid_targets_mean": 15649.0,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 1.8321323725647183,
|
|
"grad_norm": 0.09225730410624328,
|
|
"learning_rate": 3.2066950579450024e-06,
|
|
"loss": 0.6118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5875071287155151,
|
|
"step": 6865,
|
|
"valid_targets_mean": 16057.5,
|
|
"valid_targets_min": 57
|
|
},
|
|
{
|
|
"epoch": 1.8334667734187349,
|
|
"grad_norm": 0.09345988443966001,
|
|
"learning_rate": 3.156355775865968e-06,
|
|
"loss": 0.6076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6212291717529297,
|
|
"step": 6870,
|
|
"valid_targets_mean": 15905.6,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 1.8348011742727515,
|
|
"grad_norm": 0.09836957478446351,
|
|
"learning_rate": 3.106406249767607e-06,
|
|
"loss": 0.6107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6226930618286133,
|
|
"step": 6875,
|
|
"valid_targets_mean": 15694.5,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 1.836135575126768,
|
|
"grad_norm": 0.0935003323911921,
|
|
"learning_rate": 3.056846750629041e-06,
|
|
"loss": 0.6327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6048910021781921,
|
|
"step": 6880,
|
|
"valid_targets_mean": 16648.4,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 1.8374699759807847,
|
|
"grad_norm": 0.0996757747150725,
|
|
"learning_rate": 3.007677547313436e-06,
|
|
"loss": 0.6252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6208453178405762,
|
|
"step": 6885,
|
|
"valid_targets_mean": 16256.8,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 1.8388043768348012,
|
|
"grad_norm": 0.08849021655160869,
|
|
"learning_rate": 2.958898906566626e-06,
|
|
"loss": 0.6258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6154869794845581,
|
|
"step": 6890,
|
|
"valid_targets_mean": 16353.3,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 1.8401387776888178,
|
|
"grad_norm": 0.08928376035381869,
|
|
"learning_rate": 2.910511093015588e-06,
|
|
"loss": 0.6283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6386107206344604,
|
|
"step": 6895,
|
|
"valid_targets_mean": 15752.9,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 1.8414731785428344,
|
|
"grad_norm": 0.09861416715637732,
|
|
"learning_rate": 2.8625143691670404e-06,
|
|
"loss": 0.6164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6506497859954834,
|
|
"step": 6900,
|
|
"valid_targets_mean": 14408.1,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 1.8428075793968508,
|
|
"grad_norm": 0.09163939734191671,
|
|
"learning_rate": 2.8149089954060287e-06,
|
|
"loss": 0.6141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.597012996673584,
|
|
"step": 6905,
|
|
"valid_targets_mean": 15989.0,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 1.8441419802508674,
|
|
"grad_norm": 0.08995619879356641,
|
|
"learning_rate": 2.767695229994507e-06,
|
|
"loss": 0.6416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6536890268325806,
|
|
"step": 6910,
|
|
"valid_targets_mean": 17191.4,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 1.8454763811048838,
|
|
"grad_norm": 0.0839875319084046,
|
|
"learning_rate": 2.720873329069895e-06,
|
|
"loss": 0.6183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5766624808311462,
|
|
"step": 6915,
|
|
"valid_targets_mean": 17233.1,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 1.8468107819589004,
|
|
"grad_norm": 0.09083917258196791,
|
|
"learning_rate": 2.6744435466437535e-06,
|
|
"loss": 0.6195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6382717490196228,
|
|
"step": 6920,
|
|
"valid_targets_mean": 14588.1,
|
|
"valid_targets_min": 100
|
|
},
|
|
{
|
|
"epoch": 1.848145182812917,
|
|
"grad_norm": 0.08650332344005253,
|
|
"learning_rate": 2.6284061346004055e-06,
|
|
"loss": 0.6177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6030929684638977,
|
|
"step": 6925,
|
|
"valid_targets_mean": 16558.5,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 1.8494795836669335,
|
|
"grad_norm": 0.09444305089496056,
|
|
"learning_rate": 2.5827613426954664e-06,
|
|
"loss": 0.6256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6380075216293335,
|
|
"step": 6930,
|
|
"valid_targets_mean": 14690.7,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.8508139845209501,
|
|
"grad_norm": 0.09483051297100908,
|
|
"learning_rate": 2.537509418554631e-06,
|
|
"loss": 0.6215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6225526332855225,
|
|
"step": 6935,
|
|
"valid_targets_mean": 15293.6,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 1.8521483853749667,
|
|
"grad_norm": 0.10046672290201178,
|
|
"learning_rate": 2.4926506076722417e-06,
|
|
"loss": 0.6288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6159603595733643,
|
|
"step": 6940,
|
|
"valid_targets_mean": 15709.3,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 1.8534827862289833,
|
|
"grad_norm": 0.08827032404875429,
|
|
"learning_rate": 2.4481851534099707e-06,
|
|
"loss": 0.6151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5953460931777954,
|
|
"step": 6945,
|
|
"valid_targets_mean": 15381.7,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.8548171870829997,
|
|
"grad_norm": 0.08758508845183906,
|
|
"learning_rate": 2.404113296995505e-06,
|
|
"loss": 0.6321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6001626253128052,
|
|
"step": 6950,
|
|
"valid_targets_mean": 15501.0,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 1.8561515879370163,
|
|
"grad_norm": 0.08709771266028127,
|
|
"learning_rate": 2.360435277521275e-06,
|
|
"loss": 0.6201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6188645958900452,
|
|
"step": 6955,
|
|
"valid_targets_mean": 16469.7,
|
|
"valid_targets_min": 118
|
|
},
|
|
{
|
|
"epoch": 1.8574859887910327,
|
|
"grad_norm": 0.09427280621483805,
|
|
"learning_rate": 2.3171513319430596e-06,
|
|
"loss": 0.6346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.649395227432251,
|
|
"step": 6960,
|
|
"valid_targets_mean": 14874.3,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 1.8588203896450493,
|
|
"grad_norm": 0.0808931066414944,
|
|
"learning_rate": 2.274261695078841e-06,
|
|
"loss": 0.6268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6024237871170044,
|
|
"step": 6965,
|
|
"valid_targets_mean": 17161.4,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 1.8601547904990658,
|
|
"grad_norm": 0.08414593552341547,
|
|
"learning_rate": 2.231766599607371e-06,
|
|
"loss": 0.5995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5632792711257935,
|
|
"step": 6970,
|
|
"valid_targets_mean": 15532.3,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 1.8614891913530824,
|
|
"grad_norm": 0.09441058517305718,
|
|
"learning_rate": 2.1896662760670618e-06,
|
|
"loss": 0.621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6468135714530945,
|
|
"step": 6975,
|
|
"valid_targets_mean": 16032.7,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 1.862823592207099,
|
|
"grad_norm": 0.0851258200952786,
|
|
"learning_rate": 2.1479609528546328e-06,
|
|
"loss": 0.6248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6156222820281982,
|
|
"step": 6980,
|
|
"valid_targets_mean": 16126.7,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 1.8641579930611156,
|
|
"grad_norm": 0.08353380996015211,
|
|
"learning_rate": 2.106650856223899e-06,
|
|
"loss": 0.6274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6163721084594727,
|
|
"step": 6985,
|
|
"valid_targets_mean": 16980.7,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 1.8654923939151322,
|
|
"grad_norm": 0.0901724183069621,
|
|
"learning_rate": 2.0657362102845576e-06,
|
|
"loss": 0.6138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6244465112686157,
|
|
"step": 6990,
|
|
"valid_targets_mean": 14835.6,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 1.8668267947691488,
|
|
"grad_norm": 0.3287515102847484,
|
|
"learning_rate": 2.0252172370009646e-06,
|
|
"loss": 0.6018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.603171169757843,
|
|
"step": 6995,
|
|
"valid_targets_mean": 16227.0,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 1.8681611956231652,
|
|
"grad_norm": 0.08880212175295829,
|
|
"learning_rate": 1.985094156190925e-06,
|
|
"loss": 0.6345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6128568649291992,
|
|
"step": 7000,
|
|
"valid_targets_mean": 16593.5,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 1.8694955964771818,
|
|
"grad_norm": 0.09309393580041371,
|
|
"learning_rate": 1.9453671855244963e-06,
|
|
"loss": 0.6211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6289182901382446,
|
|
"step": 7005,
|
|
"valid_targets_mean": 16403.7,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 1.8708299973311981,
|
|
"grad_norm": 0.09480583622572787,
|
|
"learning_rate": 1.906036540522829e-06,
|
|
"loss": 0.6272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6634846329689026,
|
|
"step": 7010,
|
|
"valid_targets_mean": 15357.0,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 1.8721643981852147,
|
|
"grad_norm": 0.08715063530216999,
|
|
"learning_rate": 1.8671024345569773e-06,
|
|
"loss": 0.6295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6392030119895935,
|
|
"step": 7015,
|
|
"valid_targets_mean": 16358.6,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.8734987990392313,
|
|
"grad_norm": 0.08347898430310177,
|
|
"learning_rate": 1.8285650788467415e-06,
|
|
"loss": 0.6117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5922134518623352,
|
|
"step": 7020,
|
|
"valid_targets_mean": 16060.0,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.874833199893248,
|
|
"grad_norm": 0.08718723584074349,
|
|
"learning_rate": 1.7904246824595514e-06,
|
|
"loss": 0.621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6190272569656372,
|
|
"step": 7025,
|
|
"valid_targets_mean": 16108.1,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 1.8761676007472645,
|
|
"grad_norm": 0.08913725712331906,
|
|
"learning_rate": 1.7526814523092763e-06,
|
|
"loss": 0.634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6637852787971497,
|
|
"step": 7030,
|
|
"valid_targets_mean": 14897.7,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 1.8775020016012811,
|
|
"grad_norm": 0.09023604956675488,
|
|
"learning_rate": 1.7153355931551592e-06,
|
|
"loss": 0.6462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6529320478439331,
|
|
"step": 7035,
|
|
"valid_targets_mean": 15052.8,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.8788364024552977,
|
|
"grad_norm": 0.09370662512546263,
|
|
"learning_rate": 1.678387307600676e-06,
|
|
"loss": 0.6337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6144789457321167,
|
|
"step": 7040,
|
|
"valid_targets_mean": 15715.4,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 1.880170803309314,
|
|
"grad_norm": 0.08469020269460416,
|
|
"learning_rate": 1.6418367960924271e-06,
|
|
"loss": 0.6135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6082459688186646,
|
|
"step": 7045,
|
|
"valid_targets_mean": 15793.0,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 1.8815052041633307,
|
|
"grad_norm": 0.08138453596006119,
|
|
"learning_rate": 1.6056842569190987e-06,
|
|
"loss": 0.6217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5803401470184326,
|
|
"step": 7050,
|
|
"valid_targets_mean": 16992.2,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 1.8828396050173473,
|
|
"grad_norm": 0.08643291719031197,
|
|
"learning_rate": 1.5699298862103276e-06,
|
|
"loss": 0.6179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6261525750160217,
|
|
"step": 7055,
|
|
"valid_targets_mean": 16001.4,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 1.8841740058713636,
|
|
"grad_norm": 0.0938394622153444,
|
|
"learning_rate": 1.5345738779356714e-06,
|
|
"loss": 0.6301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6548340320587158,
|
|
"step": 7060,
|
|
"valid_targets_mean": 15555.3,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 1.8855084067253802,
|
|
"grad_norm": 0.09162369025565296,
|
|
"learning_rate": 1.4996164239035408e-06,
|
|
"loss": 0.6039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.593757152557373,
|
|
"step": 7065,
|
|
"valid_targets_mean": 15477.0,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 1.8868428075793968,
|
|
"grad_norm": 0.08752477920167435,
|
|
"learning_rate": 1.4650577137601843e-06,
|
|
"loss": 0.6246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6162948608398438,
|
|
"step": 7070,
|
|
"valid_targets_mean": 15712.4,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 1.8881772084334134,
|
|
"grad_norm": 0.08378801161427556,
|
|
"learning_rate": 1.4308979349886146e-06,
|
|
"loss": 0.6202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6429492235183716,
|
|
"step": 7075,
|
|
"valid_targets_mean": 17087.5,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 1.88951160928743,
|
|
"grad_norm": 0.08869747558124076,
|
|
"learning_rate": 1.3971372729076503e-06,
|
|
"loss": 0.6312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6538381576538086,
|
|
"step": 7080,
|
|
"valid_targets_mean": 15415.0,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 1.8908460101414466,
|
|
"grad_norm": 0.08483051600051576,
|
|
"learning_rate": 1.3637759106708501e-06,
|
|
"loss": 0.6266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6220189332962036,
|
|
"step": 7085,
|
|
"valid_targets_mean": 16511.9,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.8921804109954632,
|
|
"grad_norm": 0.08482293342113899,
|
|
"learning_rate": 1.3308140292655645e-06,
|
|
"loss": 0.608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6194273233413696,
|
|
"step": 7090,
|
|
"valid_targets_mean": 16692.6,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 1.8935148118494796,
|
|
"grad_norm": 0.08867654734388321,
|
|
"learning_rate": 1.2982518075119352e-06,
|
|
"loss": 0.6046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6477352380752563,
|
|
"step": 7095,
|
|
"valid_targets_mean": 15490.1,
|
|
"valid_targets_min": 142
|
|
},
|
|
{
|
|
"epoch": 1.8948492127034962,
|
|
"grad_norm": 0.08680246823710645,
|
|
"learning_rate": 1.2660894220619139e-06,
|
|
"loss": 0.625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6353548765182495,
|
|
"step": 7100,
|
|
"valid_targets_mean": 16462.5,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 1.8961836135575125,
|
|
"grad_norm": 0.08252503818191527,
|
|
"learning_rate": 1.2343270473983286e-06,
|
|
"loss": 0.6265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6071639060974121,
|
|
"step": 7105,
|
|
"valid_targets_mean": 16614.5,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 1.8975180144115291,
|
|
"grad_norm": 0.08293124137152696,
|
|
"learning_rate": 1.202964855833935e-06,
|
|
"loss": 0.6133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5994781255722046,
|
|
"step": 7110,
|
|
"valid_targets_mean": 15927.0,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 1.8988524152655457,
|
|
"grad_norm": 0.0824839342237248,
|
|
"learning_rate": 1.1720030175104506e-06,
|
|
"loss": 0.6281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6254457235336304,
|
|
"step": 7115,
|
|
"valid_targets_mean": 16195.4,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 1.9001868161195623,
|
|
"grad_norm": 0.08473796491677214,
|
|
"learning_rate": 1.1414417003976634e-06,
|
|
"loss": 0.6074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5947679281234741,
|
|
"step": 7120,
|
|
"valid_targets_mean": 16226.3,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 1.901521216973579,
|
|
"grad_norm": 0.07863211744651008,
|
|
"learning_rate": 1.1112810702925163e-06,
|
|
"loss": 0.6139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5904717445373535,
|
|
"step": 7125,
|
|
"valid_targets_mean": 17311.0,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 1.9028556178275955,
|
|
"grad_norm": 0.08729504718220402,
|
|
"learning_rate": 1.0815212908181825e-06,
|
|
"loss": 0.6133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6068782210350037,
|
|
"step": 7130,
|
|
"valid_targets_mean": 16227.7,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 1.904190018681612,
|
|
"grad_norm": 0.08799751275061786,
|
|
"learning_rate": 1.0521625234232333e-06,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.639133870601654,
|
|
"step": 7135,
|
|
"valid_targets_mean": 15186.4,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.9055244195356285,
|
|
"grad_norm": 0.08766233161698946,
|
|
"learning_rate": 1.023204927380672e-06,
|
|
"loss": 0.6309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6215628385543823,
|
|
"step": 7140,
|
|
"valid_targets_mean": 16491.2,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 1.906858820389645,
|
|
"grad_norm": 0.08136063705733775,
|
|
"learning_rate": 9.946486597871672e-07,
|
|
"loss": 0.6228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.603968620300293,
|
|
"step": 7145,
|
|
"valid_targets_mean": 16284.1,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.9081932212436616,
|
|
"grad_norm": 0.08427514491379798,
|
|
"learning_rate": 9.664938755621632e-07,
|
|
"loss": 0.6145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6328669786453247,
|
|
"step": 7150,
|
|
"valid_targets_mean": 15816.9,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 1.909527622097678,
|
|
"grad_norm": 0.08537633135837205,
|
|
"learning_rate": 9.387407274469793e-07,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6234576106071472,
|
|
"step": 7155,
|
|
"valid_targets_mean": 16524.2,
|
|
"valid_targets_min": 177
|
|
},
|
|
{
|
|
"epoch": 1.9108620229516946,
|
|
"grad_norm": 0.08688371520142285,
|
|
"learning_rate": 9.113893660041033e-07,
|
|
"loss": 0.6156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6136159300804138,
|
|
"step": 7160,
|
|
"valid_targets_mean": 15847.0,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 1.9121964238057112,
|
|
"grad_norm": 0.08095805144808471,
|
|
"learning_rate": 8.844399396162577e-07,
|
|
"loss": 0.6096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5837612152099609,
|
|
"step": 7165,
|
|
"valid_targets_mean": 16031.0,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.9135308246597278,
|
|
"grad_norm": 0.08923077898805334,
|
|
"learning_rate": 8.578925944856596e-07,
|
|
"loss": 0.6296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6362388134002686,
|
|
"step": 7170,
|
|
"valid_targets_mean": 16244.0,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 1.9148652255137444,
|
|
"grad_norm": 0.08023949216756944,
|
|
"learning_rate": 8.317474746332126e-07,
|
|
"loss": 0.6045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5978532433509827,
|
|
"step": 7175,
|
|
"valid_targets_mean": 16691.7,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 1.916199626367761,
|
|
"grad_norm": 0.08614039079696198,
|
|
"learning_rate": 8.060047218977323e-07,
|
|
"loss": 0.6247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6216354370117188,
|
|
"step": 7180,
|
|
"valid_targets_mean": 15503.6,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 1.9175340272217776,
|
|
"grad_norm": 0.08513143725032378,
|
|
"learning_rate": 7.806644759351471e-07,
|
|
"loss": 0.6215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.633056640625,
|
|
"step": 7185,
|
|
"valid_targets_mean": 16464.3,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 1.918868428075794,
|
|
"grad_norm": 0.08707050074017611,
|
|
"learning_rate": 7.557268742177908e-07,
|
|
"loss": 0.6245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6251076459884644,
|
|
"step": 7190,
|
|
"valid_targets_mean": 15298.0,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.9202028289298105,
|
|
"grad_norm": 0.08298457877614573,
|
|
"learning_rate": 7.311920520336106e-07,
|
|
"loss": 0.6185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.63387531042099,
|
|
"step": 7195,
|
|
"valid_targets_mean": 15801.4,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.921537229783827,
|
|
"grad_norm": 0.07864046187495834,
|
|
"learning_rate": 7.070601424854522e-07,
|
|
"loss": 0.6197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6031808257102966,
|
|
"step": 7200,
|
|
"valid_targets_mean": 17465.6,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 1.9228716306378435,
|
|
"grad_norm": 0.08324704107943265,
|
|
"learning_rate": 6.833312764903343e-07,
|
|
"loss": 0.6259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6396461725234985,
|
|
"step": 7205,
|
|
"valid_targets_mean": 16632.7,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 1.92420603149186,
|
|
"grad_norm": 0.08765309774752181,
|
|
"learning_rate": 6.600055827787581e-07,
|
|
"loss": 0.6175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6199076175689697,
|
|
"step": 7210,
|
|
"valid_targets_mean": 15613.3,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.9255404323458767,
|
|
"grad_norm": 0.08291832400714277,
|
|
"learning_rate": 6.370831878939747e-07,
|
|
"loss": 0.643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6336727142333984,
|
|
"step": 7215,
|
|
"valid_targets_mean": 16355.1,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 1.9268748331998933,
|
|
"grad_norm": 0.08704375505728458,
|
|
"learning_rate": 6.1456421619131e-07,
|
|
"loss": 0.6088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6210100650787354,
|
|
"step": 7220,
|
|
"valid_targets_mean": 14533.1,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 1.9282092340539099,
|
|
"grad_norm": 0.08803696864770845,
|
|
"learning_rate": 5.924487898375158e-07,
|
|
"loss": 0.6182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6384143829345703,
|
|
"step": 7225,
|
|
"valid_targets_mean": 15410.6,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 1.9295436349079265,
|
|
"grad_norm": 0.0834751205891046,
|
|
"learning_rate": 5.707370288100782e-07,
|
|
"loss": 0.6333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6324965357780457,
|
|
"step": 7230,
|
|
"valid_targets_mean": 16607.8,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 1.9308780357619428,
|
|
"grad_norm": 0.08952295118745271,
|
|
"learning_rate": 5.494290508965605e-07,
|
|
"loss": 0.6345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6554481387138367,
|
|
"step": 7235,
|
|
"valid_targets_mean": 14700.1,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 1.9322124366159594,
|
|
"grad_norm": 0.07836025153728045,
|
|
"learning_rate": 5.285249716940026e-07,
|
|
"loss": 0.6132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5875779986381531,
|
|
"step": 7240,
|
|
"valid_targets_mean": 17065.6,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 1.933546837469976,
|
|
"grad_norm": 0.08531868760517099,
|
|
"learning_rate": 5.080249046082563e-07,
|
|
"loss": 0.6271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6375229954719543,
|
|
"step": 7245,
|
|
"valid_targets_mean": 15968.9,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 1.9348812383239924,
|
|
"grad_norm": 0.08139798817382352,
|
|
"learning_rate": 4.879289608533926e-07,
|
|
"loss": 0.6356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6170284748077393,
|
|
"step": 7250,
|
|
"valid_targets_mean": 16205.3,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 1.936215639178009,
|
|
"grad_norm": 0.08562110265994628,
|
|
"learning_rate": 4.6823724945107e-07,
|
|
"loss": 0.6165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6298227906227112,
|
|
"step": 7255,
|
|
"valid_targets_mean": 15892.4,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.9375500400320256,
|
|
"grad_norm": 0.08088218838388078,
|
|
"learning_rate": 4.489498772299843e-07,
|
|
"loss": 0.6257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6066421866416931,
|
|
"step": 7260,
|
|
"valid_targets_mean": 16061.2,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 1.9388844408860422,
|
|
"grad_norm": 0.08634923618970136,
|
|
"learning_rate": 4.3006694882526947e-07,
|
|
"loss": 0.6217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6236517429351807,
|
|
"step": 7265,
|
|
"valid_targets_mean": 16005.8,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 1.9402188417400588,
|
|
"grad_norm": 0.08670401203490817,
|
|
"learning_rate": 4.115885666779062e-07,
|
|
"loss": 0.622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6537075638771057,
|
|
"step": 7270,
|
|
"valid_targets_mean": 15107.6,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 1.9415532425940754,
|
|
"grad_norm": 0.07899525341897014,
|
|
"learning_rate": 3.9351483103420566e-07,
|
|
"loss": 0.6098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.583057165145874,
|
|
"step": 7275,
|
|
"valid_targets_mean": 17427.4,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 1.942887643448092,
|
|
"grad_norm": 0.07866070576361257,
|
|
"learning_rate": 3.758458399452519e-07,
|
|
"loss": 0.6148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.587480366230011,
|
|
"step": 7280,
|
|
"valid_targets_mean": 16900.7,
|
|
"valid_targets_min": 189
|
|
},
|
|
{
|
|
"epoch": 1.9442220443021083,
|
|
"grad_norm": 0.07868622469701628,
|
|
"learning_rate": 3.585816892663351e-07,
|
|
"loss": 0.616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5950984954833984,
|
|
"step": 7285,
|
|
"valid_targets_mean": 16367.6,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 1.945556445156125,
|
|
"grad_norm": 0.08299176442121954,
|
|
"learning_rate": 3.4172247265650267e-07,
|
|
"loss": 0.6255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6286250352859497,
|
|
"step": 7290,
|
|
"valid_targets_mean": 15500.8,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 1.9468908460101413,
|
|
"grad_norm": 0.08515529041637454,
|
|
"learning_rate": 3.252682815779922e-07,
|
|
"loss": 0.6279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6196545362472534,
|
|
"step": 7295,
|
|
"valid_targets_mean": 15439.1,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 1.9482252468641579,
|
|
"grad_norm": 0.08309506619992096,
|
|
"learning_rate": 3.0921920529574096e-07,
|
|
"loss": 0.6279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6408487558364868,
|
|
"step": 7300,
|
|
"valid_targets_mean": 16132.7,
|
|
"valid_targets_min": 408
|
|
},
|
|
{
|
|
"epoch": 1.9495596477181745,
|
|
"grad_norm": 0.08463021920591228,
|
|
"learning_rate": 2.9357533087694397e-07,
|
|
"loss": 0.6201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6341683864593506,
|
|
"step": 7305,
|
|
"valid_targets_mean": 15742.3,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 1.950894048572191,
|
|
"grad_norm": 0.07687671870926512,
|
|
"learning_rate": 2.7833674319052977e-07,
|
|
"loss": 0.6179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6142845153808594,
|
|
"step": 7310,
|
|
"valid_targets_mean": 16706.5,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 1.9522284494262077,
|
|
"grad_norm": 0.08331816457428305,
|
|
"learning_rate": 2.6350352490672746e-07,
|
|
"loss": 0.6222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6477742791175842,
|
|
"step": 7315,
|
|
"valid_targets_mean": 15859.2,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 1.9535628502802243,
|
|
"grad_norm": 0.08418031536600762,
|
|
"learning_rate": 2.49075756496625e-07,
|
|
"loss": 0.62,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6104916334152222,
|
|
"step": 7320,
|
|
"valid_targets_mean": 16536.3,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.9548972511342408,
|
|
"grad_norm": 0.08018563522764396,
|
|
"learning_rate": 2.3505351623170353e-07,
|
|
"loss": 0.6365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6002669334411621,
|
|
"step": 7325,
|
|
"valid_targets_mean": 15635.6,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.9562316519882574,
|
|
"grad_norm": 0.08294347847486674,
|
|
"learning_rate": 2.2143688018343707e-07,
|
|
"loss": 0.6376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6311354637145996,
|
|
"step": 7330,
|
|
"valid_targets_mean": 15950.8,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.9575660528422738,
|
|
"grad_norm": 0.08169118747835444,
|
|
"learning_rate": 2.0822592222287659e-07,
|
|
"loss": 0.6263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6531394720077515,
|
|
"step": 7335,
|
|
"valid_targets_mean": 16453.7,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 1.9589004536962904,
|
|
"grad_norm": 0.08870720045507874,
|
|
"learning_rate": 1.9542071402024185e-07,
|
|
"loss": 0.608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6536878347396851,
|
|
"step": 7340,
|
|
"valid_targets_mean": 14271.3,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 1.9602348545503068,
|
|
"grad_norm": 0.08391865160704697,
|
|
"learning_rate": 1.830213250445467e-07,
|
|
"loss": 0.6066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5975584387779236,
|
|
"step": 7345,
|
|
"valid_targets_mean": 15199.5,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 1.9615692554043234,
|
|
"grad_norm": 0.08387455033289205,
|
|
"learning_rate": 1.7102782256319115e-07,
|
|
"loss": 0.637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6481696963310242,
|
|
"step": 7350,
|
|
"valid_targets_mean": 15820.8,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.96290365625834,
|
|
"grad_norm": 0.086376860258013,
|
|
"learning_rate": 1.5944027164163652e-07,
|
|
"loss": 0.6364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6533975005149841,
|
|
"step": 7355,
|
|
"valid_targets_mean": 15217.0,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 1.9642380571123566,
|
|
"grad_norm": 0.08124994645822725,
|
|
"learning_rate": 1.4825873514302257e-07,
|
|
"loss": 0.6258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6236833333969116,
|
|
"step": 7360,
|
|
"valid_targets_mean": 15678.1,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.9655724579663731,
|
|
"grad_norm": 0.08412854928063453,
|
|
"learning_rate": 1.3748327372784252e-07,
|
|
"loss": 0.6263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.638558566570282,
|
|
"step": 7365,
|
|
"valid_targets_mean": 16400.4,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 1.9669068588203897,
|
|
"grad_norm": 0.08204700545173244,
|
|
"learning_rate": 1.271139458536019e-07,
|
|
"loss": 0.6221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.600684404373169,
|
|
"step": 7370,
|
|
"valid_targets_mean": 16416.3,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 1.9682412596744063,
|
|
"grad_norm": 0.0791137749291822,
|
|
"learning_rate": 1.1715080777451868e-07,
|
|
"loss": 0.6198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6197346448898315,
|
|
"step": 7375,
|
|
"valid_targets_mean": 16873.1,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 1.9695756605284227,
|
|
"grad_norm": 0.0798165165215919,
|
|
"learning_rate": 1.0759391354119017e-07,
|
|
"loss": 0.6166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6060048341751099,
|
|
"step": 7380,
|
|
"valid_targets_mean": 15929.8,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 1.9709100613824393,
|
|
"grad_norm": 0.07952241282859478,
|
|
"learning_rate": 9.844331500034331e-08,
|
|
"loss": 0.6259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6397472023963928,
|
|
"step": 7385,
|
|
"valid_targets_mean": 16361.3,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 1.9722444622364559,
|
|
"grad_norm": 0.08577264214557807,
|
|
"learning_rate": 8.969906179449316e-08,
|
|
"loss": 0.639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6487519145011902,
|
|
"step": 7390,
|
|
"valid_targets_mean": 15036.0,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 1.9735788630904723,
|
|
"grad_norm": 0.08287326698149942,
|
|
"learning_rate": 8.136120136174318e-08,
|
|
"loss": 0.6347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6241464018821716,
|
|
"step": 7395,
|
|
"valid_targets_mean": 16079.6,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 1.9749132639444889,
|
|
"grad_norm": 0.08095087491935442,
|
|
"learning_rate": 7.342977893546875e-08,
|
|
"loss": 0.6383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.635275661945343,
|
|
"step": 7400,
|
|
"valid_targets_mean": 16328.8,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 1.9762476647985054,
|
|
"grad_norm": 0.07702461316924054,
|
|
"learning_rate": 6.590483754409237e-08,
|
|
"loss": 0.6017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5917639136314392,
|
|
"step": 7405,
|
|
"valid_targets_mean": 16817.6,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 1.977582065652522,
|
|
"grad_norm": 0.08353983089533804,
|
|
"learning_rate": 5.878641801087547e-08,
|
|
"loss": 0.6188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6240130662918091,
|
|
"step": 7410,
|
|
"valid_targets_mean": 15362.0,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 1.9789164665065386,
|
|
"grad_norm": 0.08288481791229246,
|
|
"learning_rate": 5.207455895365198e-08,
|
|
"loss": 0.6206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.618938148021698,
|
|
"step": 7415,
|
|
"valid_targets_mean": 15739.4,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 1.9802508673605552,
|
|
"grad_norm": 0.08294923182205442,
|
|
"learning_rate": 4.5769296784653463e-08,
|
|
"loss": 0.626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6074753403663635,
|
|
"step": 7420,
|
|
"valid_targets_mean": 15770.5,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.9815852682145718,
|
|
"grad_norm": 0.08301269507597478,
|
|
"learning_rate": 3.9870665710300954e-08,
|
|
"loss": 0.6286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6352344751358032,
|
|
"step": 7425,
|
|
"valid_targets_mean": 15678.1,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 1.9829196690685882,
|
|
"grad_norm": 0.0873006698492684,
|
|
"learning_rate": 3.437869773101343e-08,
|
|
"loss": 0.6227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6336438059806824,
|
|
"step": 7430,
|
|
"valid_targets_mean": 14569.5,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 1.9842540699226048,
|
|
"grad_norm": 0.08296937636453226,
|
|
"learning_rate": 2.929342264103296e-08,
|
|
"loss": 0.6079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6380923986434937,
|
|
"step": 7435,
|
|
"valid_targets_mean": 17373.0,
|
|
"valid_targets_min": 195
|
|
},
|
|
{
|
|
"epoch": 1.9855884707766212,
|
|
"grad_norm": 0.08313305694130789,
|
|
"learning_rate": 2.4614868028274837e-08,
|
|
"loss": 0.6155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5898568630218506,
|
|
"step": 7440,
|
|
"valid_targets_mean": 15781.7,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 1.9869228716306377,
|
|
"grad_norm": 0.07841450895238265,
|
|
"learning_rate": 2.034305927416935e-08,
|
|
"loss": 0.6142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.607092022895813,
|
|
"step": 7445,
|
|
"valid_targets_mean": 17081.3,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 1.9882572724846543,
|
|
"grad_norm": 0.08059226081528832,
|
|
"learning_rate": 1.647801955354522e-08,
|
|
"loss": 0.6058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.593668520450592,
|
|
"step": 7450,
|
|
"valid_targets_mean": 15864.9,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 1.989591673338671,
|
|
"grad_norm": 0.07953280935790458,
|
|
"learning_rate": 1.301976983445474e-08,
|
|
"loss": 0.6146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5993211269378662,
|
|
"step": 7455,
|
|
"valid_targets_mean": 16358.1,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 1.9909260741926875,
|
|
"grad_norm": 0.08334060728670939,
|
|
"learning_rate": 9.968328878115495e-09,
|
|
"loss": 0.6428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6364392042160034,
|
|
"step": 7460,
|
|
"valid_targets_mean": 15288.5,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 1.9922604750467041,
|
|
"grad_norm": 0.08366994870475707,
|
|
"learning_rate": 7.3237132387604646e-09,
|
|
"loss": 0.6364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.643349289894104,
|
|
"step": 7465,
|
|
"valid_targets_mean": 15160.0,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 1.9935948759007207,
|
|
"grad_norm": 0.08287524752697885,
|
|
"learning_rate": 5.0859372635964065e-09,
|
|
"loss": 0.6105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.612499475479126,
|
|
"step": 7470,
|
|
"valid_targets_mean": 15704.7,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 1.994929276754737,
|
|
"grad_norm": 0.08134192923159073,
|
|
"learning_rate": 3.2550130926789487e-09,
|
|
"loss": 0.6222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6421562433242798,
|
|
"step": 7475,
|
|
"valid_targets_mean": 16023.1,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 1.9962636776087537,
|
|
"grad_norm": 0.08336881992144246,
|
|
"learning_rate": 1.8309506588959356e-09,
|
|
"loss": 0.6224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.638372004032135,
|
|
"step": 7480,
|
|
"valid_targets_mean": 16144.6,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.9975980784627703,
|
|
"grad_norm": 0.08151270047704698,
|
|
"learning_rate": 8.137576878508578e-10,
|
|
"loss": 0.6412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6109259128570557,
|
|
"step": 7485,
|
|
"valid_targets_mean": 15843.8,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 1.9989324793167866,
|
|
"grad_norm": 0.08234255749905797,
|
|
"learning_rate": 2.0343969787950387e-10,
|
|
"loss": 0.6171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6063399314880371,
|
|
"step": 7490,
|
|
"valid_targets_mean": 16544.9,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6287916302680969,
|
|
"step": 7494,
|
|
"total_flos": 2.788441056922829e+16,
|
|
"train_loss": 0.21610206831669151,
|
|
"train_runtime": 19384.2842,
|
|
"train_samples_per_second": 98.968,
|
|
"train_steps_per_second": 0.387,
|
|
"valid_targets_mean": 15631.9,
|
|
"valid_targets_min": 505
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 7494,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 2,
|
|
"save_steps": 100,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.788441056922829e+16,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|