4096 lines
114 KiB
JSON
4096 lines
114 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1841,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.019011406844106463,
|
|
"grad_norm": 18.768985433269826,
|
|
"learning_rate": 8.64864864864865e-07,
|
|
"loss": 0.8865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45824751257896423,
|
|
"step": 5,
|
|
"valid_targets_mean": 4315.6,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 0.03802281368821293,
|
|
"grad_norm": 4.161641756419172,
|
|
"learning_rate": 1.945945945945946e-06,
|
|
"loss": 0.7829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32671618461608887,
|
|
"step": 10,
|
|
"valid_targets_mean": 4631.1,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 0.057034220532319393,
|
|
"grad_norm": 1.6968311772903435,
|
|
"learning_rate": 3.0270270270270274e-06,
|
|
"loss": 0.6528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004853129386902,
|
|
"step": 15,
|
|
"valid_targets_mean": 4061.9,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 0.07604562737642585,
|
|
"grad_norm": 0.9491466398184562,
|
|
"learning_rate": 4.108108108108108e-06,
|
|
"loss": 0.5875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27897870540618896,
|
|
"step": 20,
|
|
"valid_targets_mean": 4492.1,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 0.09505703422053231,
|
|
"grad_norm": 0.5548287452373166,
|
|
"learning_rate": 5.18918918918919e-06,
|
|
"loss": 0.5445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2667747437953949,
|
|
"step": 25,
|
|
"valid_targets_mean": 4387.7,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 0.11406844106463879,
|
|
"grad_norm": 0.49652319636920533,
|
|
"learning_rate": 6.270270270270271e-06,
|
|
"loss": 0.4983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2525099217891693,
|
|
"step": 30,
|
|
"valid_targets_mean": 4403.1,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 0.13307984790874525,
|
|
"grad_norm": 0.40788904142748184,
|
|
"learning_rate": 7.3513513513513525e-06,
|
|
"loss": 0.4857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23482461273670197,
|
|
"step": 35,
|
|
"valid_targets_mean": 4255.0,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 0.1520912547528517,
|
|
"grad_norm": 0.3460099864906639,
|
|
"learning_rate": 8.432432432432434e-06,
|
|
"loss": 0.464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21213467419147491,
|
|
"step": 40,
|
|
"valid_targets_mean": 4031.0,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 0.17110266159695817,
|
|
"grad_norm": 0.3294697867452768,
|
|
"learning_rate": 9.513513513513514e-06,
|
|
"loss": 0.4457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20797699689865112,
|
|
"step": 45,
|
|
"valid_targets_mean": 4213.6,
|
|
"valid_targets_min": 1827
|
|
},
|
|
{
|
|
"epoch": 0.19011406844106463,
|
|
"grad_norm": 0.3007798207728767,
|
|
"learning_rate": 1.0594594594594597e-05,
|
|
"loss": 0.3791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1763700693845749,
|
|
"step": 50,
|
|
"valid_targets_mean": 5610.1,
|
|
"valid_targets_min": 2840
|
|
},
|
|
{
|
|
"epoch": 0.20912547528517111,
|
|
"grad_norm": 0.3135648808425615,
|
|
"learning_rate": 1.1675675675675677e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15119099617004395,
|
|
"step": 55,
|
|
"valid_targets_mean": 4945.7,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 0.22813688212927757,
|
|
"grad_norm": 0.3001927364971795,
|
|
"learning_rate": 1.2756756756756758e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13437247276306152,
|
|
"step": 60,
|
|
"valid_targets_mean": 5207.0,
|
|
"valid_targets_min": 2341
|
|
},
|
|
{
|
|
"epoch": 0.24714828897338403,
|
|
"grad_norm": 0.26327896731667505,
|
|
"learning_rate": 1.383783783783784e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14838241040706635,
|
|
"step": 65,
|
|
"valid_targets_mean": 5175.4,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 0.2661596958174905,
|
|
"grad_norm": 0.23565637422517188,
|
|
"learning_rate": 1.491891891891892e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273541897535324,
|
|
"step": 70,
|
|
"valid_targets_mean": 5396.7,
|
|
"valid_targets_min": 1885
|
|
},
|
|
{
|
|
"epoch": 0.28517110266159695,
|
|
"grad_norm": 0.23292525842968348,
|
|
"learning_rate": 1.6000000000000003e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13985608518123627,
|
|
"step": 75,
|
|
"valid_targets_mean": 5246.9,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 0.3041825095057034,
|
|
"grad_norm": 0.21583976978780783,
|
|
"learning_rate": 1.7081081081081083e-05,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13641823828220367,
|
|
"step": 80,
|
|
"valid_targets_mean": 4957.9,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 0.3231939163498099,
|
|
"grad_norm": 0.21703167466244688,
|
|
"learning_rate": 1.8162162162162164e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12753139436244965,
|
|
"step": 85,
|
|
"valid_targets_mean": 4951.1,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 0.34220532319391633,
|
|
"grad_norm": 0.21798199551190492,
|
|
"learning_rate": 1.9243243243243244e-05,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13001735508441925,
|
|
"step": 90,
|
|
"valid_targets_mean": 5112.8,
|
|
"valid_targets_min": 2968
|
|
},
|
|
{
|
|
"epoch": 0.3612167300380228,
|
|
"grad_norm": 0.23287202361449336,
|
|
"learning_rate": 2.0324324324324328e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13635805249214172,
|
|
"step": 95,
|
|
"valid_targets_mean": 5321.8,
|
|
"valid_targets_min": 2382
|
|
},
|
|
{
|
|
"epoch": 0.38022813688212925,
|
|
"grad_norm": 0.2771402834224665,
|
|
"learning_rate": 2.1405405405405405e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12471064180135727,
|
|
"step": 100,
|
|
"valid_targets_mean": 4986.9,
|
|
"valid_targets_min": 2576
|
|
},
|
|
{
|
|
"epoch": 0.39923954372623577,
|
|
"grad_norm": 0.22487804577069473,
|
|
"learning_rate": 2.248648648648649e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1233469620347023,
|
|
"step": 105,
|
|
"valid_targets_mean": 5325.8,
|
|
"valid_targets_min": 2928
|
|
},
|
|
{
|
|
"epoch": 0.41825095057034223,
|
|
"grad_norm": 0.22857152253610444,
|
|
"learning_rate": 2.356756756756757e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1288713663816452,
|
|
"step": 110,
|
|
"valid_targets_mean": 5161.7,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 0.4372623574144487,
|
|
"grad_norm": 0.24338824886805402,
|
|
"learning_rate": 2.4648648648648654e-05,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11510897427797318,
|
|
"step": 115,
|
|
"valid_targets_mean": 4941.3,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 0.45627376425855515,
|
|
"grad_norm": 0.24437893301310154,
|
|
"learning_rate": 2.572972972972973e-05,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12720325589179993,
|
|
"step": 120,
|
|
"valid_targets_mean": 5260.5,
|
|
"valid_targets_min": 2625
|
|
},
|
|
{
|
|
"epoch": 0.4752851711026616,
|
|
"grad_norm": 0.3917084342269015,
|
|
"learning_rate": 2.6810810810810815e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17161251604557037,
|
|
"step": 125,
|
|
"valid_targets_mean": 4271.5,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 0.49429657794676807,
|
|
"grad_norm": 0.4677580966290995,
|
|
"learning_rate": 2.7891891891891892e-05,
|
|
"loss": 0.4642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2109134942293167,
|
|
"step": 130,
|
|
"valid_targets_mean": 3648.6,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 0.5133079847908745,
|
|
"grad_norm": 0.37088030476958894,
|
|
"learning_rate": 2.8972972972972976e-05,
|
|
"loss": 0.4581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2217126041650772,
|
|
"step": 135,
|
|
"valid_targets_mean": 3692.1,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 0.532319391634981,
|
|
"grad_norm": 0.34834316541712207,
|
|
"learning_rate": 3.0054054054054056e-05,
|
|
"loss": 0.4374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21792852878570557,
|
|
"step": 140,
|
|
"valid_targets_mean": 4084.4,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 0.5513307984790875,
|
|
"grad_norm": 0.3764288574103832,
|
|
"learning_rate": 3.113513513513514e-05,
|
|
"loss": 0.4365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22260896861553192,
|
|
"step": 145,
|
|
"valid_targets_mean": 3740.4,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 0.5703422053231939,
|
|
"grad_norm": 0.30664676408221175,
|
|
"learning_rate": 3.221621621621622e-05,
|
|
"loss": 0.4129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20132417976856232,
|
|
"step": 150,
|
|
"valid_targets_mean": 3882.0,
|
|
"valid_targets_min": 1821
|
|
},
|
|
{
|
|
"epoch": 0.5893536121673004,
|
|
"grad_norm": 0.335985881141292,
|
|
"learning_rate": 3.3297297297297305e-05,
|
|
"loss": 0.4231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21901392936706543,
|
|
"step": 155,
|
|
"valid_targets_mean": 4136.7,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 0.6083650190114068,
|
|
"grad_norm": 0.3447522223458868,
|
|
"learning_rate": 3.437837837837838e-05,
|
|
"loss": 0.4275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22367675602436066,
|
|
"step": 160,
|
|
"valid_targets_mean": 7437.8,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.6273764258555133,
|
|
"grad_norm": 0.29372893235049985,
|
|
"learning_rate": 3.5459459459459466e-05,
|
|
"loss": 0.4306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2104015201330185,
|
|
"step": 165,
|
|
"valid_targets_mean": 7844.2,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 0.6463878326996197,
|
|
"grad_norm": 0.29079875739620076,
|
|
"learning_rate": 3.654054054054054e-05,
|
|
"loss": 0.4102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21834035217761993,
|
|
"step": 170,
|
|
"valid_targets_mean": 7960.9,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 0.6653992395437263,
|
|
"grad_norm": 0.27135578893433554,
|
|
"learning_rate": 3.762162162162163e-05,
|
|
"loss": 0.4087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22146134078502655,
|
|
"step": 175,
|
|
"valid_targets_mean": 8483.1,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 0.6844106463878327,
|
|
"grad_norm": 0.2744187448490044,
|
|
"learning_rate": 3.8702702702702704e-05,
|
|
"loss": 0.3907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19432826340198517,
|
|
"step": 180,
|
|
"valid_targets_mean": 7507.5,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 0.7034220532319392,
|
|
"grad_norm": 0.3048374023380346,
|
|
"learning_rate": 3.978378378378379e-05,
|
|
"loss": 0.3923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18287861347198486,
|
|
"step": 185,
|
|
"valid_targets_mean": 6985.9,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.7224334600760456,
|
|
"grad_norm": 0.2649862681572022,
|
|
"learning_rate": 3.999942416643093e-05,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20548956096172333,
|
|
"step": 190,
|
|
"valid_targets_mean": 7685.7,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 0.7414448669201521,
|
|
"grad_norm": 0.2992563120534786,
|
|
"learning_rate": 3.999708489938559e-05,
|
|
"loss": 0.3871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17796725034713745,
|
|
"step": 195,
|
|
"valid_targets_mean": 7207.1,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.7604562737642585,
|
|
"grad_norm": 0.2616409196519949,
|
|
"learning_rate": 3.999294641957663e-05,
|
|
"loss": 0.3852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19631274044513702,
|
|
"step": 200,
|
|
"valid_targets_mean": 7820.0,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 0.779467680608365,
|
|
"grad_norm": 0.2641716839624123,
|
|
"learning_rate": 3.998700909935863e-05,
|
|
"loss": 0.3833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21066488325595856,
|
|
"step": 205,
|
|
"valid_targets_mean": 8022.3,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.7984790874524715,
|
|
"grad_norm": 0.24998064714538126,
|
|
"learning_rate": 3.9979273472934556e-05,
|
|
"loss": 0.3774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17630921304225922,
|
|
"step": 210,
|
|
"valid_targets_mean": 7562.2,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 0.8174904942965779,
|
|
"grad_norm": 0.261840054565909,
|
|
"learning_rate": 3.9969740236307746e-05,
|
|
"loss": 0.3786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20479388535022736,
|
|
"step": 215,
|
|
"valid_targets_mean": 8467.6,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 0.8365019011406845,
|
|
"grad_norm": 0.2526323676493677,
|
|
"learning_rate": 3.9958410247219265e-05,
|
|
"loss": 0.3701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20014218986034393,
|
|
"step": 220,
|
|
"valid_targets_mean": 7542.6,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 0.8555133079847909,
|
|
"grad_norm": 0.5173558603802552,
|
|
"learning_rate": 3.994528452507076e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12122941017150879,
|
|
"step": 225,
|
|
"valid_targets_mean": 5823.9,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 0.8745247148288974,
|
|
"grad_norm": 0.28893785124827975,
|
|
"learning_rate": 3.993036425083269e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12518875300884247,
|
|
"step": 230,
|
|
"valid_targets_mean": 6544.8,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 0.8935361216730038,
|
|
"grad_norm": 0.26737459303895006,
|
|
"learning_rate": 3.9913650766938115e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12098179012537003,
|
|
"step": 235,
|
|
"valid_targets_mean": 6424.3,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 0.9125475285171103,
|
|
"grad_norm": 0.2234023108439984,
|
|
"learning_rate": 3.98951455771619e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10649286955595016,
|
|
"step": 240,
|
|
"valid_targets_mean": 5882.0,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 0.9315589353612167,
|
|
"grad_norm": 0.22867133634974954,
|
|
"learning_rate": 3.987485034648541e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11067407578229904,
|
|
"step": 245,
|
|
"valid_targets_mean": 6166.6,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 0.9505703422053232,
|
|
"grad_norm": 0.18046687603251277,
|
|
"learning_rate": 3.985276690094669e-05,
|
|
"loss": 0.2183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11107806116342545,
|
|
"step": 250,
|
|
"valid_targets_mean": 5971.0,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 0.9695817490494296,
|
|
"grad_norm": 0.22379606980948552,
|
|
"learning_rate": 3.982889722747621e-05,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1035776138305664,
|
|
"step": 255,
|
|
"valid_targets_mean": 6159.1,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 0.9885931558935361,
|
|
"grad_norm": 0.17909010829485558,
|
|
"learning_rate": 3.980324347371806e-05,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09941697865724564,
|
|
"step": 260,
|
|
"valid_targets_mean": 6163.3,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 1.0076045627376427,
|
|
"grad_norm": 0.33780995576565503,
|
|
"learning_rate": 3.977580794783672e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18427270650863647,
|
|
"step": 265,
|
|
"valid_targets_mean": 4252.4,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 1.026615969581749,
|
|
"grad_norm": 0.2883511493855013,
|
|
"learning_rate": 3.97465931183094e-05,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19222092628479004,
|
|
"step": 270,
|
|
"valid_targets_mean": 4523.0,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 1.0456273764258555,
|
|
"grad_norm": 0.3124414858734432,
|
|
"learning_rate": 3.971560161370393e-05,
|
|
"loss": 0.3725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.178070530295372,
|
|
"step": 275,
|
|
"valid_targets_mean": 4048.7,
|
|
"valid_targets_min": 2023
|
|
},
|
|
{
|
|
"epoch": 1.064638783269962,
|
|
"grad_norm": 0.29980707122304473,
|
|
"learning_rate": 3.968283622244229e-05,
|
|
"loss": 0.3666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17472650110721588,
|
|
"step": 280,
|
|
"valid_targets_mean": 4060.6,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 1.0836501901140685,
|
|
"grad_norm": 0.2855083601218697,
|
|
"learning_rate": 3.9648299892549654e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1766706258058548,
|
|
"step": 285,
|
|
"valid_targets_mean": 4122.0,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 1.102661596958175,
|
|
"grad_norm": 0.26544618383492363,
|
|
"learning_rate": 3.961199573138923e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17382532358169556,
|
|
"step": 290,
|
|
"valid_targets_mean": 4094.8,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 1.1216730038022813,
|
|
"grad_norm": 0.27395793149443254,
|
|
"learning_rate": 3.957392700538261e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17056365311145782,
|
|
"step": 295,
|
|
"valid_targets_mean": 3793.4,
|
|
"valid_targets_min": 2184
|
|
},
|
|
{
|
|
"epoch": 1.1406844106463878,
|
|
"grad_norm": 0.2694202154601782,
|
|
"learning_rate": 3.9534097139715926e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17225591838359833,
|
|
"step": 300,
|
|
"valid_targets_mean": 4053.2,
|
|
"valid_targets_min": 1891
|
|
},
|
|
{
|
|
"epoch": 1.1596958174904943,
|
|
"grad_norm": 0.2640197797512894,
|
|
"learning_rate": 3.9492509718031645e-05,
|
|
"loss": 0.3386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17667798697948456,
|
|
"step": 305,
|
|
"valid_targets_mean": 4274.1,
|
|
"valid_targets_min": 1885
|
|
},
|
|
{
|
|
"epoch": 1.1787072243346008,
|
|
"grad_norm": 0.25361187076817665,
|
|
"learning_rate": 3.944916848210614e-05,
|
|
"loss": 0.3374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17618222534656525,
|
|
"step": 310,
|
|
"valid_targets_mean": 4422.8,
|
|
"valid_targets_min": 1908
|
|
},
|
|
{
|
|
"epoch": 1.1977186311787071,
|
|
"grad_norm": 0.2338076023050613,
|
|
"learning_rate": 3.9404077331513044e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924882978200912,
|
|
"step": 315,
|
|
"valid_targets_mean": 5303.7,
|
|
"valid_targets_min": 2553
|
|
},
|
|
{
|
|
"epoch": 1.2167300380228137,
|
|
"grad_norm": 0.20686129841395856,
|
|
"learning_rate": 3.9357240323272367e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1147601380944252,
|
|
"step": 320,
|
|
"valid_targets_mean": 5254.6,
|
|
"valid_targets_min": 2168
|
|
},
|
|
{
|
|
"epoch": 1.2357414448669202,
|
|
"grad_norm": 0.21736762319432792,
|
|
"learning_rate": 3.930866167148549e-05,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10893138498067856,
|
|
"step": 325,
|
|
"valid_targets_mean": 4881.5,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 1.2547528517110267,
|
|
"grad_norm": 0.20801984857584946,
|
|
"learning_rate": 3.925834574695599e-05,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11135486513376236,
|
|
"step": 330,
|
|
"valid_targets_mean": 5599.6,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 1.2737642585551332,
|
|
"grad_norm": 0.19325104513890953,
|
|
"learning_rate": 3.920629707679641e-05,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10907386988401413,
|
|
"step": 335,
|
|
"valid_targets_mean": 4711.3,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 1.2927756653992395,
|
|
"grad_norm": 0.18491316478382414,
|
|
"learning_rate": 3.915252034402089e-05,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11171620339155197,
|
|
"step": 340,
|
|
"valid_targets_mean": 5566.0,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 1.311787072243346,
|
|
"grad_norm": 0.19249485970179556,
|
|
"learning_rate": 3.9097020387123876e-05,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08895137161016464,
|
|
"step": 345,
|
|
"valid_targets_mean": 4913.7,
|
|
"valid_targets_min": 2127
|
|
},
|
|
{
|
|
"epoch": 1.3307984790874525,
|
|
"grad_norm": 0.1976193837635321,
|
|
"learning_rate": 3.903980219964474e-05,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0967416986823082,
|
|
"step": 350,
|
|
"valid_targets_mean": 4946.9,
|
|
"valid_targets_min": 2188
|
|
},
|
|
{
|
|
"epoch": 1.3498098859315588,
|
|
"grad_norm": 0.18884045418843914,
|
|
"learning_rate": 3.898087092971851e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10309580713510513,
|
|
"step": 355,
|
|
"valid_targets_mean": 5133.6,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 1.3688212927756653,
|
|
"grad_norm": 0.2150636741987042,
|
|
"learning_rate": 3.892023187961268e-05,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0983353927731514,
|
|
"step": 360,
|
|
"valid_targets_mean": 5151.9,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 1.3878326996197718,
|
|
"grad_norm": 0.18862534780965,
|
|
"learning_rate": 3.8857890505250103e-05,
|
|
"loss": 0.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09774443507194519,
|
|
"step": 365,
|
|
"valid_targets_mean": 4959.3,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 1.4068441064638784,
|
|
"grad_norm": 0.18583559102022223,
|
|
"learning_rate": 3.879385241571817e-05,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09217417240142822,
|
|
"step": 370,
|
|
"valid_targets_mean": 5263.2,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 1.4258555133079849,
|
|
"grad_norm": 0.23173843670661723,
|
|
"learning_rate": 3.8728123372764085e-05,
|
|
"loss": 0.1968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09147930145263672,
|
|
"step": 375,
|
|
"valid_targets_mean": 5157.2,
|
|
"valid_targets_min": 1833
|
|
},
|
|
{
|
|
"epoch": 1.4448669201520912,
|
|
"grad_norm": 0.2160702092104236,
|
|
"learning_rate": 3.866070929027647e-05,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10929874330759048,
|
|
"step": 380,
|
|
"valid_targets_mean": 5271.4,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 1.4638783269961977,
|
|
"grad_norm": 0.19288064073755715,
|
|
"learning_rate": 3.85916162337533e-05,
|
|
"loss": 0.2053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10217779874801636,
|
|
"step": 385,
|
|
"valid_targets_mean": 5138.1,
|
|
"valid_targets_min": 1934
|
|
},
|
|
{
|
|
"epoch": 1.4828897338403042,
|
|
"grad_norm": 0.37858755377369957,
|
|
"learning_rate": 3.8520850419756104e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1849764585494995,
|
|
"step": 390,
|
|
"valid_targets_mean": 3941.6,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 1.5019011406844105,
|
|
"grad_norm": 0.35470045423494073,
|
|
"learning_rate": 3.8448418215350726e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1646745651960373,
|
|
"step": 395,
|
|
"valid_targets_mean": 4081.7,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 1.5209125475285172,
|
|
"grad_norm": 0.2876074912459836,
|
|
"learning_rate": 3.837432613753438e-05,
|
|
"loss": 0.3386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1480315774679184,
|
|
"step": 400,
|
|
"valid_targets_mean": 3830.1,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 1.5399239543726235,
|
|
"grad_norm": 0.3611749486973579,
|
|
"learning_rate": 3.8298580852649316e-05,
|
|
"loss": 0.3332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16189919412136078,
|
|
"step": 405,
|
|
"valid_targets_mean": 3843.3,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 1.55893536121673,
|
|
"grad_norm": 0.3206203251498053,
|
|
"learning_rate": 3.822118917578304e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17144060134887695,
|
|
"step": 410,
|
|
"valid_targets_mean": 3924.0,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 1.5779467680608366,
|
|
"grad_norm": 0.3741047714354632,
|
|
"learning_rate": 3.814215807015511e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1597537249326706,
|
|
"step": 415,
|
|
"valid_targets_mean": 3620.2,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 1.5969581749049429,
|
|
"grad_norm": 0.37521631376088815,
|
|
"learning_rate": 3.806149464649066e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14377222955226898,
|
|
"step": 420,
|
|
"valid_targets_mean": 3715.8,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 1.6159695817490496,
|
|
"grad_norm": 0.26671798721350176,
|
|
"learning_rate": 3.797920616238058e-05,
|
|
"loss": 0.3558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1722474843263626,
|
|
"step": 425,
|
|
"valid_targets_mean": 7975.3,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 1.6349809885931559,
|
|
"grad_norm": 0.27985449043512695,
|
|
"learning_rate": 3.789530002162856e-05,
|
|
"loss": 0.3507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16311007738113403,
|
|
"step": 430,
|
|
"valid_targets_mean": 7661.3,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 1.6539923954372624,
|
|
"grad_norm": 0.24727092770252987,
|
|
"learning_rate": 3.780978377358493e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17998184263706207,
|
|
"step": 435,
|
|
"valid_targets_mean": 7740.2,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 1.673003802281369,
|
|
"grad_norm": 0.22934434129529802,
|
|
"learning_rate": 3.77226651124674e-05,
|
|
"loss": 0.3389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16208817064762115,
|
|
"step": 440,
|
|
"valid_targets_mean": 7938.4,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 1.6920152091254752,
|
|
"grad_norm": 0.2257662037497451,
|
|
"learning_rate": 3.7633951876668826e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1535138487815857,
|
|
"step": 445,
|
|
"valid_targets_mean": 6955.9,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 1.7110266159695817,
|
|
"grad_norm": 0.2300354581824267,
|
|
"learning_rate": 3.754365204805189e-05,
|
|
"loss": 0.339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17140650749206543,
|
|
"step": 450,
|
|
"valid_targets_mean": 7178.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.7300380228136882,
|
|
"grad_norm": 0.25849307514203357,
|
|
"learning_rate": 3.745177375123101e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17827041447162628,
|
|
"step": 455,
|
|
"valid_targets_mean": 8065.9,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 1.7490494296577945,
|
|
"grad_norm": 0.23634004384778767,
|
|
"learning_rate": 3.7358325252841326e-05,
|
|
"loss": 0.3325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15933071076869965,
|
|
"step": 460,
|
|
"valid_targets_mean": 7800.7,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 1.7680608365019013,
|
|
"grad_norm": 0.2360782519286374,
|
|
"learning_rate": 3.726331496079486e-05,
|
|
"loss": 0.3411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17583046853542328,
|
|
"step": 465,
|
|
"valid_targets_mean": 7847.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.7870722433460076,
|
|
"grad_norm": 0.23430829246800822,
|
|
"learning_rate": 3.716675142352411e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15302903950214386,
|
|
"step": 470,
|
|
"valid_targets_mean": 7130.6,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 1.806083650190114,
|
|
"grad_norm": 0.22227998277895408,
|
|
"learning_rate": 3.706864332921285e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16763727366924286,
|
|
"step": 475,
|
|
"valid_targets_mean": 7978.8,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.8250950570342206,
|
|
"grad_norm": 0.27022623582106853,
|
|
"learning_rate": 3.696899950501447e-05,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16360555589199066,
|
|
"step": 480,
|
|
"valid_targets_mean": 7157.9,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 1.8441064638783269,
|
|
"grad_norm": 0.24101847996901093,
|
|
"learning_rate": 3.686782891625772e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16875922679901123,
|
|
"step": 485,
|
|
"valid_targets_mean": 7234.4,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 1.8631178707224336,
|
|
"grad_norm": 0.26624531145633745,
|
|
"learning_rate": 3.676514066564009e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08150222152471542,
|
|
"step": 490,
|
|
"valid_targets_mean": 6154.4,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 1.88212927756654,
|
|
"grad_norm": 0.25942794427129934,
|
|
"learning_rate": 3.6660943992408817e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08557536453008652,
|
|
"step": 495,
|
|
"valid_targets_mean": 5865.2,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 1.9011406844106464,
|
|
"grad_norm": 0.24051565314409892,
|
|
"learning_rate": 3.6555248271529554e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09305544942617416,
|
|
"step": 500,
|
|
"valid_targets_mean": 6350.6,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 1.920152091254753,
|
|
"grad_norm": 0.2415618705323024,
|
|
"learning_rate": 3.644806301284293e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08921092748641968,
|
|
"step": 505,
|
|
"valid_targets_mean": 5735.5,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 1.9391634980988592,
|
|
"grad_norm": 0.19616487082311124,
|
|
"learning_rate": 3.633939786020884e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09038201719522476,
|
|
"step": 510,
|
|
"valid_targets_mean": 6206.8,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 1.9581749049429658,
|
|
"grad_norm": 0.21845612335525297,
|
|
"learning_rate": 3.622926259063883e-05,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08562606573104858,
|
|
"step": 515,
|
|
"valid_targets_mean": 6104.3,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 1.9771863117870723,
|
|
"grad_norm": 0.19802351112521951,
|
|
"learning_rate": 3.611766711341636e-05,
|
|
"loss": 0.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08315548300743103,
|
|
"step": 520,
|
|
"valid_targets_mean": 6447.5,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 1.9961977186311786,
|
|
"grad_norm": 0.19485390745124273,
|
|
"learning_rate": 3.600462146920525e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08430048078298569,
|
|
"step": 525,
|
|
"valid_targets_mean": 5833.1,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 2.0152091254752853,
|
|
"grad_norm": 0.3136192928564486,
|
|
"learning_rate": 3.5890135829146294e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1510065495967865,
|
|
"step": 530,
|
|
"valid_targets_mean": 4625.7,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 2.0342205323193916,
|
|
"grad_norm": 0.31107755227200234,
|
|
"learning_rate": 3.577422049394212e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1280999481678009,
|
|
"step": 535,
|
|
"valid_targets_mean": 3831.7,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 2.053231939163498,
|
|
"grad_norm": 0.32564709358200533,
|
|
"learning_rate": 3.5656885892930376e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15929754078388214,
|
|
"step": 540,
|
|
"valid_targets_mean": 4451.9,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 2.0722433460076046,
|
|
"grad_norm": 0.30492913163946755,
|
|
"learning_rate": 3.5538142583145395e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1532464176416397,
|
|
"step": 545,
|
|
"valid_targets_mean": 4672.5,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 2.091254752851711,
|
|
"grad_norm": 0.2959882307284956,
|
|
"learning_rate": 3.5418001248368324e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13413457572460175,
|
|
"step": 550,
|
|
"valid_targets_mean": 4185.8,
|
|
"valid_targets_min": 1891
|
|
},
|
|
{
|
|
"epoch": 2.1102661596958177,
|
|
"grad_norm": 0.27731170863500637,
|
|
"learning_rate": 3.5296472698165856e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13876162469387054,
|
|
"step": 555,
|
|
"valid_targets_mean": 4360.1,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 2.129277566539924,
|
|
"grad_norm": 0.2607950148321866,
|
|
"learning_rate": 3.5173567866917664e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12989114224910736,
|
|
"step": 560,
|
|
"valid_targets_mean": 4039.0,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 2.1482889733840302,
|
|
"grad_norm": 0.2501785431734494,
|
|
"learning_rate": 3.504929781283259e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13388948142528534,
|
|
"step": 565,
|
|
"valid_targets_mean": 4414.8,
|
|
"valid_targets_min": 1740
|
|
},
|
|
{
|
|
"epoch": 2.167300380228137,
|
|
"grad_norm": 0.2590008497305306,
|
|
"learning_rate": 3.4923673716953717e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1188698410987854,
|
|
"step": 570,
|
|
"valid_targets_mean": 3860.4,
|
|
"valid_targets_min": 1987
|
|
},
|
|
{
|
|
"epoch": 2.1863117870722433,
|
|
"grad_norm": 0.22763236836652495,
|
|
"learning_rate": 3.4796706882152304e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09271648526191711,
|
|
"step": 575,
|
|
"valid_targets_mean": 5169.8,
|
|
"valid_targets_min": 2230
|
|
},
|
|
{
|
|
"epoch": 2.20532319391635,
|
|
"grad_norm": 0.2243687505654757,
|
|
"learning_rate": 3.4668408732110915e-05,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08923256397247314,
|
|
"step": 580,
|
|
"valid_targets_mean": 5347.7,
|
|
"valid_targets_min": 2590
|
|
},
|
|
{
|
|
"epoch": 2.2243346007604563,
|
|
"grad_norm": 0.20192534845911445,
|
|
"learning_rate": 3.453879081029552e-05,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08735809475183487,
|
|
"step": 585,
|
|
"valid_targets_mean": 5252.0,
|
|
"valid_targets_min": 2739
|
|
},
|
|
{
|
|
"epoch": 2.2433460076045626,
|
|
"grad_norm": 0.21897534747327288,
|
|
"learning_rate": 3.440786477891691e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07201861590147018,
|
|
"step": 590,
|
|
"valid_targets_mean": 4841.5,
|
|
"valid_targets_min": 2438
|
|
},
|
|
{
|
|
"epoch": 2.2623574144486693,
|
|
"grad_norm": 0.202393296923521,
|
|
"learning_rate": 3.42756424178814e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08041936904191971,
|
|
"step": 595,
|
|
"valid_targets_mean": 5269.5,
|
|
"valid_targets_min": 2799
|
|
},
|
|
{
|
|
"epoch": 2.2813688212927756,
|
|
"grad_norm": 0.1982442046754712,
|
|
"learning_rate": 3.4142135623730954e-05,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08316317200660706,
|
|
"step": 600,
|
|
"valid_targets_mean": 4994.9,
|
|
"valid_targets_min": 2925
|
|
},
|
|
{
|
|
"epoch": 2.3003802281368824,
|
|
"grad_norm": 0.1902314107786306,
|
|
"learning_rate": 3.40073564085728e-05,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07204687595367432,
|
|
"step": 605,
|
|
"valid_targets_mean": 5089.9,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 2.3193916349809887,
|
|
"grad_norm": 0.18912526794105103,
|
|
"learning_rate": 3.387131689899866e-05,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0887041762471199,
|
|
"step": 610,
|
|
"valid_targets_mean": 5424.5,
|
|
"valid_targets_min": 1688
|
|
},
|
|
{
|
|
"epoch": 2.338403041825095,
|
|
"grad_norm": 0.20318427904092756,
|
|
"learning_rate": 3.3734029334993675e-05,
|
|
"loss": 0.1571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08025332540273666,
|
|
"step": 615,
|
|
"valid_targets_mean": 5056.5,
|
|
"valid_targets_min": 1818
|
|
},
|
|
{
|
|
"epoch": 2.3574144486692017,
|
|
"grad_norm": 0.21260147977373586,
|
|
"learning_rate": 3.359550606883511e-05,
|
|
"loss": 0.1567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08076399564743042,
|
|
"step": 620,
|
|
"valid_targets_mean": 5039.8,
|
|
"valid_targets_min": 2218
|
|
},
|
|
{
|
|
"epoch": 2.376425855513308,
|
|
"grad_norm": 0.2005739212333494,
|
|
"learning_rate": 3.3455759563981025e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08449053764343262,
|
|
"step": 625,
|
|
"valid_targets_mean": 4937.0,
|
|
"valid_targets_min": 2290
|
|
},
|
|
{
|
|
"epoch": 2.3954372623574143,
|
|
"grad_norm": 0.1939665480340307,
|
|
"learning_rate": 3.331480239394881e-05,
|
|
"loss": 0.1532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08675984293222427,
|
|
"step": 630,
|
|
"valid_targets_mean": 5262.6,
|
|
"valid_targets_min": 2744
|
|
},
|
|
{
|
|
"epoch": 2.414448669201521,
|
|
"grad_norm": 0.2067313139807621,
|
|
"learning_rate": 3.317264724118399e-05,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07771243900060654,
|
|
"step": 635,
|
|
"valid_targets_mean": 5317.8,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 2.4334600760456273,
|
|
"grad_norm": 0.24438817055982565,
|
|
"learning_rate": 3.3029306895919056e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07272510975599289,
|
|
"step": 640,
|
|
"valid_targets_mean": 4976.9,
|
|
"valid_targets_min": 1959
|
|
},
|
|
{
|
|
"epoch": 2.4524714828897336,
|
|
"grad_norm": 0.21947954421689864,
|
|
"learning_rate": 3.288479425502273e-05,
|
|
"loss": 0.1549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08339103311300278,
|
|
"step": 645,
|
|
"valid_targets_mean": 5133.1,
|
|
"valid_targets_min": 2289
|
|
},
|
|
{
|
|
"epoch": 2.4714828897338403,
|
|
"grad_norm": 0.2361515652158053,
|
|
"learning_rate": 3.2739122320839567e-05,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07445741444826126,
|
|
"step": 650,
|
|
"valid_targets_mean": 4922.0,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 2.4904942965779466,
|
|
"grad_norm": 0.4845308707427055,
|
|
"learning_rate": 3.25923042000201e-05,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11655815690755844,
|
|
"step": 655,
|
|
"valid_targets_mean": 3755.8,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 2.5095057034220534,
|
|
"grad_norm": 0.5041883033987095,
|
|
"learning_rate": 3.244435310234156e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12083180993795395,
|
|
"step": 660,
|
|
"valid_targets_mean": 4149.8,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 2.5285171102661597,
|
|
"grad_norm": 0.4293037233161345,
|
|
"learning_rate": 3.229528233951935e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12094869464635849,
|
|
"step": 665,
|
|
"valid_targets_mean": 4153.3,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 2.5475285171102664,
|
|
"grad_norm": 0.4341690333179762,
|
|
"learning_rate": 3.214510532400939e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1278359293937683,
|
|
"step": 670,
|
|
"valid_targets_mean": 4260.3,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 2.5665399239543727,
|
|
"grad_norm": 0.4497609978338215,
|
|
"learning_rate": 3.1993835567801266e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1090053841471672,
|
|
"step": 675,
|
|
"valid_targets_mean": 3701.6,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 2.585551330798479,
|
|
"grad_norm": 0.36831043021057513,
|
|
"learning_rate": 3.184148668120253e-05,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10701311379671097,
|
|
"step": 680,
|
|
"valid_targets_mean": 3952.4,
|
|
"valid_targets_min": 1854
|
|
},
|
|
{
|
|
"epoch": 2.6045627376425857,
|
|
"grad_norm": 0.40504235757567003,
|
|
"learning_rate": 3.16880723716142e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1709039807319641,
|
|
"step": 685,
|
|
"valid_targets_mean": 7155.4,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 2.623574144486692,
|
|
"grad_norm": 0.3013409218707254,
|
|
"learning_rate": 3.153360644229735e-05,
|
|
"loss": 0.3231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16763341426849365,
|
|
"step": 690,
|
|
"valid_targets_mean": 7369.4,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 2.6425855513307983,
|
|
"grad_norm": 0.26444934651735047,
|
|
"learning_rate": 3.137810279113125e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15889973938465118,
|
|
"step": 695,
|
|
"valid_targets_mean": 7592.1,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 2.661596958174905,
|
|
"grad_norm": 0.22352602705602617,
|
|
"learning_rate": 3.122157540936288e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15156026184558868,
|
|
"step": 700,
|
|
"valid_targets_mean": 7414.5,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 2.6806083650190113,
|
|
"grad_norm": 0.24463272584732818,
|
|
"learning_rate": 3.106403838034815e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15007655322551727,
|
|
"step": 705,
|
|
"valid_targets_mean": 7829.4,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 2.6996197718631176,
|
|
"grad_norm": 0.21946530753675053,
|
|
"learning_rate": 3.090550587828466e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14451086521148682,
|
|
"step": 710,
|
|
"valid_targets_mean": 7851.2,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 2.7186311787072244,
|
|
"grad_norm": 0.22157971443490984,
|
|
"learning_rate": 3.0745992166936484e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14042143523693085,
|
|
"step": 715,
|
|
"valid_targets_mean": 6916.8,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 2.7376425855513307,
|
|
"grad_norm": 0.21721619776557882,
|
|
"learning_rate": 3.058551159835078e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15121476352214813,
|
|
"step": 720,
|
|
"valid_targets_mean": 7713.7,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 2.7566539923954374,
|
|
"grad_norm": 0.2516514737751596,
|
|
"learning_rate": 3.0424078611566484e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15188945829868317,
|
|
"step": 725,
|
|
"valid_targets_mean": 7996.9,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 2.7756653992395437,
|
|
"grad_norm": 0.2378457691057772,
|
|
"learning_rate": 3.026170773131516e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13439372181892395,
|
|
"step": 730,
|
|
"valid_targets_mean": 7238.1,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 2.7946768060836504,
|
|
"grad_norm": 0.25213487345273705,
|
|
"learning_rate": 3.0098413566714165e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14057187736034393,
|
|
"step": 735,
|
|
"valid_targets_mean": 7777.8,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 2.8136882129277567,
|
|
"grad_norm": 0.24989480394630467,
|
|
"learning_rate": 2.9934210809952216e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12304911762475967,
|
|
"step": 740,
|
|
"valid_targets_mean": 6190.7,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 2.832699619771863,
|
|
"grad_norm": 0.2689521231941662,
|
|
"learning_rate": 2.9769114234967486e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12372348457574844,
|
|
"step": 745,
|
|
"valid_targets_mean": 6905.2,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 2.8517110266159698,
|
|
"grad_norm": 0.20927078109216926,
|
|
"learning_rate": 2.9603138696118315e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07049879431724548,
|
|
"step": 750,
|
|
"valid_targets_mean": 5756.4,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 2.870722433460076,
|
|
"grad_norm": 0.2675987794700805,
|
|
"learning_rate": 2.9436299126846726e-05,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07208818197250366,
|
|
"step": 755,
|
|
"valid_targets_mean": 6551.0,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 2.8897338403041823,
|
|
"grad_norm": 0.2651422160068339,
|
|
"learning_rate": 2.92686105383348e-05,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06887445598840714,
|
|
"step": 760,
|
|
"valid_targets_mean": 5875.1,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 2.908745247148289,
|
|
"grad_norm": 0.28796721511055345,
|
|
"learning_rate": 2.910008801815406e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06859635561704636,
|
|
"step": 765,
|
|
"valid_targets_mean": 6292.9,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 2.9277566539923954,
|
|
"grad_norm": 0.23162244917131206,
|
|
"learning_rate": 2.8930746728908002e-05,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07203914225101471,
|
|
"step": 770,
|
|
"valid_targets_mean": 6265.1,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 2.9467680608365017,
|
|
"grad_norm": 0.22762426423237933,
|
|
"learning_rate": 2.876060190686784e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06779056042432785,
|
|
"step": 775,
|
|
"valid_targets_mean": 6037.4,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 2.9657794676806084,
|
|
"grad_norm": 0.2342043607080278,
|
|
"learning_rate": 2.8589668860601643e-05,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06987350434064865,
|
|
"step": 780,
|
|
"valid_targets_mean": 6279.7,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 2.9847908745247147,
|
|
"grad_norm": 0.23814802337043967,
|
|
"learning_rate": 2.8417962969596976e-05,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07049954682588577,
|
|
"step": 785,
|
|
"valid_targets_mean": 6157.9,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 3.0038022813688214,
|
|
"grad_norm": 0.3462432064505725,
|
|
"learning_rate": 2.8245499682877152e-05,
|
|
"loss": 0.1522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11840838193893433,
|
|
"step": 790,
|
|
"valid_targets_mean": 4474.1,
|
|
"valid_targets_min": 2202
|
|
},
|
|
{
|
|
"epoch": 3.0228136882129277,
|
|
"grad_norm": 0.387684012161189,
|
|
"learning_rate": 2.8072294517611208e-05,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11450818926095963,
|
|
"step": 795,
|
|
"valid_targets_mean": 4430.6,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 3.041825095057034,
|
|
"grad_norm": 0.4289500795878552,
|
|
"learning_rate": 2.7898363057717786e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10947731137275696,
|
|
"step": 800,
|
|
"valid_targets_mean": 4507.6,
|
|
"valid_targets_min": 2374
|
|
},
|
|
{
|
|
"epoch": 3.0608365019011408,
|
|
"grad_norm": 0.38143942347647636,
|
|
"learning_rate": 2.772372095246297e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10378646850585938,
|
|
"step": 805,
|
|
"valid_targets_mean": 4175.6,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 3.079847908745247,
|
|
"grad_norm": 0.35205316942486764,
|
|
"learning_rate": 2.7548383915052287e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09711352735757828,
|
|
"step": 810,
|
|
"valid_targets_mean": 4604.5,
|
|
"valid_targets_min": 2300
|
|
},
|
|
{
|
|
"epoch": 3.098859315589354,
|
|
"grad_norm": 0.34334350946854836,
|
|
"learning_rate": 2.7372367721216915e-05,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10052075982093811,
|
|
"step": 815,
|
|
"valid_targets_mean": 4258.6,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 3.11787072243346,
|
|
"grad_norm": 0.31906834116059535,
|
|
"learning_rate": 2.7195688207794277e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09429528564214706,
|
|
"step": 820,
|
|
"valid_targets_mean": 4176.6,
|
|
"valid_targets_min": 2221
|
|
},
|
|
{
|
|
"epoch": 3.1368821292775664,
|
|
"grad_norm": 0.33079871629370033,
|
|
"learning_rate": 2.701836127130314e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0990242138504982,
|
|
"step": 825,
|
|
"valid_targets_mean": 4614.5,
|
|
"valid_targets_min": 1976
|
|
},
|
|
{
|
|
"epoch": 3.155893536121673,
|
|
"grad_norm": 0.31986706842847734,
|
|
"learning_rate": 2.684040286651338e-05,
|
|
"loss": 0.196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09625053405761719,
|
|
"step": 830,
|
|
"valid_targets_mean": 4079.5,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 3.1749049429657794,
|
|
"grad_norm": 0.32660199144818897,
|
|
"learning_rate": 2.666182900501042e-05,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09521114081144333,
|
|
"step": 835,
|
|
"valid_targets_mean": 3921.7,
|
|
"valid_targets_min": 1790
|
|
},
|
|
{
|
|
"epoch": 3.1939163498098857,
|
|
"grad_norm": 0.2789356519847509,
|
|
"learning_rate": 2.6482655753754657e-05,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07062631845474243,
|
|
"step": 840,
|
|
"valid_targets_mean": 5405.6,
|
|
"valid_targets_min": 1854
|
|
},
|
|
{
|
|
"epoch": 3.2129277566539924,
|
|
"grad_norm": 0.2638806676304068,
|
|
"learning_rate": 2.6302899233635803e-05,
|
|
"loss": 0.141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07542570680379868,
|
|
"step": 845,
|
|
"valid_targets_mean": 5283.7,
|
|
"valid_targets_min": 2215
|
|
},
|
|
{
|
|
"epoch": 3.2319391634980987,
|
|
"grad_norm": 0.26528690947633593,
|
|
"learning_rate": 2.6122575618022487e-05,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0592070035636425,
|
|
"step": 850,
|
|
"valid_targets_mean": 5144.5,
|
|
"valid_targets_min": 2072
|
|
},
|
|
{
|
|
"epoch": 3.2509505703422055,
|
|
"grad_norm": 0.2360500251447195,
|
|
"learning_rate": 2.594170113130703e-05,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06634698063135147,
|
|
"step": 855,
|
|
"valid_targets_mean": 5086.4,
|
|
"valid_targets_min": 3113
|
|
},
|
|
{
|
|
"epoch": 3.2699619771863118,
|
|
"grad_norm": 0.2257716074443295,
|
|
"learning_rate": 2.57602920474457e-05,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06248977407813072,
|
|
"step": 860,
|
|
"valid_targets_mean": 4885.1,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 3.288973384030418,
|
|
"grad_norm": 0.23619639963838657,
|
|
"learning_rate": 2.5578364688494475e-05,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05550096556544304,
|
|
"step": 865,
|
|
"valid_targets_mean": 4916.5,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 3.307984790874525,
|
|
"grad_norm": 0.2255346409587296,
|
|
"learning_rate": 2.5395935423140487e-05,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0695202425122261,
|
|
"step": 870,
|
|
"valid_targets_mean": 5242.5,
|
|
"valid_targets_min": 2397
|
|
},
|
|
{
|
|
"epoch": 3.326996197718631,
|
|
"grad_norm": 0.20444394075732844,
|
|
"learning_rate": 2.5213020665229274e-05,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055293649435043335,
|
|
"step": 875,
|
|
"valid_targets_mean": 4999.2,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 3.346007604562738,
|
|
"grad_norm": 0.23554324333284074,
|
|
"learning_rate": 2.5029636872287953e-05,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060592859983444214,
|
|
"step": 880,
|
|
"valid_targets_mean": 5145.7,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 3.365019011406844,
|
|
"grad_norm": 0.24624770501083576,
|
|
"learning_rate": 2.4845800544044483e-05,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05072854831814766,
|
|
"step": 885,
|
|
"valid_targets_mean": 5084.9,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 3.3840304182509504,
|
|
"grad_norm": 0.2386065391922119,
|
|
"learning_rate": 2.4661528220943134e-05,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0552067756652832,
|
|
"step": 890,
|
|
"valid_targets_mean": 5369.3,
|
|
"valid_targets_min": 2995
|
|
},
|
|
{
|
|
"epoch": 3.403041825095057,
|
|
"grad_norm": 0.23227509902254379,
|
|
"learning_rate": 2.4476836482656257e-05,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052853602916002274,
|
|
"step": 895,
|
|
"valid_targets_mean": 5159.8,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 3.4220532319391634,
|
|
"grad_norm": 0.24229052309272628,
|
|
"learning_rate": 2.4291741946592575e-05,
|
|
"loss": 0.1113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05704169347882271,
|
|
"step": 900,
|
|
"valid_targets_mean": 4962.0,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 3.4410646387832697,
|
|
"grad_norm": 0.2474659051806386,
|
|
"learning_rate": 2.4106261266402023e-05,
|
|
"loss": 0.1084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05357471480965614,
|
|
"step": 905,
|
|
"valid_targets_mean": 5149.4,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 3.4600760456273765,
|
|
"grad_norm": 0.2350749738396892,
|
|
"learning_rate": 2.392041113047737e-05,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05722544714808464,
|
|
"step": 910,
|
|
"valid_targets_mean": 4918.9,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 3.4790874524714828,
|
|
"grad_norm": 0.4541902536996299,
|
|
"learning_rate": 2.3734208260452727e-05,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08960682153701782,
|
|
"step": 915,
|
|
"valid_targets_mean": 4053.5,
|
|
"valid_targets_min": 2113
|
|
},
|
|
{
|
|
"epoch": 3.4980988593155895,
|
|
"grad_norm": 0.5243330478051801,
|
|
"learning_rate": 2.354766940969899e-05,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09234726428985596,
|
|
"step": 920,
|
|
"valid_targets_mean": 4056.7,
|
|
"valid_targets_min": 1784
|
|
},
|
|
{
|
|
"epoch": 3.517110266159696,
|
|
"grad_norm": 0.6150184733706467,
|
|
"learning_rate": 2.3360811361816525e-05,
|
|
"loss": 0.1757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07612651586532593,
|
|
"step": 925,
|
|
"valid_targets_mean": 3331.2,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 3.5361216730038025,
|
|
"grad_norm": 0.5165218060285858,
|
|
"learning_rate": 2.317365092912503e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08710366487503052,
|
|
"step": 930,
|
|
"valid_targets_mean": 4188.5,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 3.555133079847909,
|
|
"grad_norm": 0.4638634963858916,
|
|
"learning_rate": 2.2986204951150926e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07611557096242905,
|
|
"step": 935,
|
|
"valid_targets_mean": 3755.5,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 3.574144486692015,
|
|
"grad_norm": 0.4507258054785201,
|
|
"learning_rate": 2.2798490293112216e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08385371416807175,
|
|
"step": 940,
|
|
"valid_targets_mean": 3787.1,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 3.593155893536122,
|
|
"grad_norm": 0.4753708125962138,
|
|
"learning_rate": 2.261052384440104e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08039424568414688,
|
|
"step": 945,
|
|
"valid_targets_mean": 3581.0,
|
|
"valid_targets_min": 2196
|
|
},
|
|
{
|
|
"epoch": 3.612167300380228,
|
|
"grad_norm": 0.30946760254853556,
|
|
"learning_rate": 2.2422322517064084e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1446402221918106,
|
|
"step": 950,
|
|
"valid_targets_mean": 7765.3,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 3.6311787072243344,
|
|
"grad_norm": 0.26912647329408734,
|
|
"learning_rate": 2.2233903244280977e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1367168426513672,
|
|
"step": 955,
|
|
"valid_targets_mean": 7428.0,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 3.650190114068441,
|
|
"grad_norm": 0.25673057743212085,
|
|
"learning_rate": 2.2045282978840684e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14741817116737366,
|
|
"step": 960,
|
|
"valid_targets_mean": 7663.1,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 3.6692015209125475,
|
|
"grad_norm": 0.24695457622644892,
|
|
"learning_rate": 2.1856478691616262e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12863211333751678,
|
|
"step": 965,
|
|
"valid_targets_mean": 7248.7,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 3.6882129277566538,
|
|
"grad_norm": 0.24848314595919005,
|
|
"learning_rate": 2.166750737003787e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12329714745283127,
|
|
"step": 970,
|
|
"valid_targets_mean": 7425.4,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 3.7072243346007605,
|
|
"grad_norm": 0.2382432971462893,
|
|
"learning_rate": 2.1478386016564406e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11289852857589722,
|
|
"step": 975,
|
|
"valid_targets_mean": 6589.8,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 3.726235741444867,
|
|
"grad_norm": 0.2471516002178032,
|
|
"learning_rate": 2.1289131647153664e-05,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12679801881313324,
|
|
"step": 980,
|
|
"valid_targets_mean": 7816.3,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 3.7452471482889735,
|
|
"grad_norm": 0.24941306933409185,
|
|
"learning_rate": 2.109976128973141e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13337580859661102,
|
|
"step": 985,
|
|
"valid_targets_mean": 8164.5,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 3.76425855513308,
|
|
"grad_norm": 0.29226310695744656,
|
|
"learning_rate": 2.0910291982659277e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12007047981023788,
|
|
"step": 990,
|
|
"valid_targets_mean": 7631.5,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 3.7832699619771866,
|
|
"grad_norm": 0.28943535096600376,
|
|
"learning_rate": 2.072074077320177e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12613262236118317,
|
|
"step": 995,
|
|
"valid_targets_mean": 7756.3,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 3.802281368821293,
|
|
"grad_norm": 0.30211188723707383,
|
|
"learning_rate": 2.053112471599245e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1403399258852005,
|
|
"step": 1000,
|
|
"valid_targets_mean": 8261.5,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 3.821292775665399,
|
|
"grad_norm": 0.30427872241401077,
|
|
"learning_rate": 2.03414608714995e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1107184886932373,
|
|
"step": 1005,
|
|
"valid_targets_mean": 7423.7,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 3.840304182509506,
|
|
"grad_norm": 0.28349897038009053,
|
|
"learning_rate": 2.0151766304490668e-05,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12353726476430893,
|
|
"step": 1010,
|
|
"valid_targets_mean": 7188.3,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 3.859315589353612,
|
|
"grad_norm": 0.29812579187843263,
|
|
"learning_rate": 1.9962058082497944e-05,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055223409086465836,
|
|
"step": 1015,
|
|
"valid_targets_mean": 6371.1,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 3.8783269961977185,
|
|
"grad_norm": 0.3305078447005314,
|
|
"learning_rate": 1.9772353274281918e-05,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05937516316771507,
|
|
"step": 1020,
|
|
"valid_targets_mean": 6149.8,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 3.897338403041825,
|
|
"grad_norm": 0.26960355210295867,
|
|
"learning_rate": 1.9582668948295998e-05,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054687947034835815,
|
|
"step": 1025,
|
|
"valid_targets_mean": 6012.3,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 3.9163498098859315,
|
|
"grad_norm": 0.23253306165339072,
|
|
"learning_rate": 1.9393022171150755e-05,
|
|
"loss": 0.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05543830618262291,
|
|
"step": 1030,
|
|
"valid_targets_mean": 6088.7,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 3.935361216730038,
|
|
"grad_norm": 0.2363002943662766,
|
|
"learning_rate": 1.9203430006078348e-05,
|
|
"loss": 0.1084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054433271288871765,
|
|
"step": 1035,
|
|
"valid_targets_mean": 6360.9,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 3.9543726235741445,
|
|
"grad_norm": 0.23732378066315013,
|
|
"learning_rate": 1.9013909511397262e-05,
|
|
"loss": 0.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05254880711436272,
|
|
"step": 1040,
|
|
"valid_targets_mean": 6166.2,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 3.973384030418251,
|
|
"grad_norm": 0.23101729553624345,
|
|
"learning_rate": 1.882447773897755e-05,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054109666496515274,
|
|
"step": 1045,
|
|
"valid_targets_mean": 5871.1,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 3.9923954372623576,
|
|
"grad_norm": 0.24036940107376528,
|
|
"learning_rate": 1.8635151732706586e-05,
|
|
"loss": 0.1062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057960644364356995,
|
|
"step": 1050,
|
|
"valid_targets_mean": 6057.0,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 4.011406844106464,
|
|
"grad_norm": 0.3548766744558099,
|
|
"learning_rate": 1.8445948526955555e-05,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07553637772798538,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4033.6,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 4.030418250950571,
|
|
"grad_norm": 0.43397890059403704,
|
|
"learning_rate": 1.8256885145046837e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08146659284830093,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4246.9,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 4.0494296577946765,
|
|
"grad_norm": 0.43481649208518697,
|
|
"learning_rate": 1.8067978597722325e-05,
|
|
"loss": 0.1487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0713481679558754,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4028.1,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 4.068441064638783,
|
|
"grad_norm": 0.42609901166474995,
|
|
"learning_rate": 1.787924588161291e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0739174485206604,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4385.1,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 4.08745247148289,
|
|
"grad_norm": 0.3613193797257685,
|
|
"learning_rate": 1.7690703977709248e-05,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06789538264274597,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4030.6,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 4.106463878326996,
|
|
"grad_norm": 0.36319797246159663,
|
|
"learning_rate": 1.7502369849833908e-05,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07442685216665268,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4733.2,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 4.1254752851711025,
|
|
"grad_norm": 0.33464784259493086,
|
|
"learning_rate": 1.7314260443115046e-05,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06767291575670242,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4051.0,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 4.144486692015209,
|
|
"grad_norm": 0.3452301476773902,
|
|
"learning_rate": 1.712639268246184e-05,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06405071169137955,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4000.5,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 4.163498098859316,
|
|
"grad_norm": 0.3443207863514706,
|
|
"learning_rate": 1.6938783471041647e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06821317225694656,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4199.9,
|
|
"valid_targets_min": 1924
|
|
},
|
|
{
|
|
"epoch": 4.182509505703422,
|
|
"grad_norm": 0.3598105706642877,
|
|
"learning_rate": 1.6751449688759194e-05,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05240233615040779,
|
|
"step": 1100,
|
|
"valid_targets_mean": 5206.1,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 4.201520912547529,
|
|
"grad_norm": 0.3092390079700352,
|
|
"learning_rate": 1.65644081907378e-05,
|
|
"loss": 0.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054483313113451004,
|
|
"step": 1105,
|
|
"valid_targets_mean": 5036.4,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 4.220532319391635,
|
|
"grad_norm": 0.2763575400218225,
|
|
"learning_rate": 1.6377675805802882e-05,
|
|
"loss": 0.1094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048552319407463074,
|
|
"step": 1110,
|
|
"valid_targets_mean": 5392.4,
|
|
"valid_targets_min": 1778
|
|
},
|
|
{
|
|
"epoch": 4.239543726235741,
|
|
"grad_norm": 0.28272183808385165,
|
|
"learning_rate": 1.6191269334967796e-05,
|
|
"loss": 0.1004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05730822682380676,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5387.4,
|
|
"valid_targets_min": 2268
|
|
},
|
|
{
|
|
"epoch": 4.258555133079848,
|
|
"grad_norm": 0.25657261472183496,
|
|
"learning_rate": 1.6005205549922173e-05,
|
|
"loss": 0.0997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047337133437395096,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5138.0,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 4.277566539923955,
|
|
"grad_norm": 0.25086844602226965,
|
|
"learning_rate": 1.5819501191522917e-05,
|
|
"loss": 0.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04738449677824974,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4930.4,
|
|
"valid_targets_min": 2597
|
|
},
|
|
{
|
|
"epoch": 4.2965779467680605,
|
|
"grad_norm": 0.23414973432884928,
|
|
"learning_rate": 1.5634172968287974e-05,
|
|
"loss": 0.0916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039102550595998764,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4864.0,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 4.315589353612167,
|
|
"grad_norm": 0.22679376081285432,
|
|
"learning_rate": 1.5449237554892997e-05,
|
|
"loss": 0.0879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046861957758665085,
|
|
"step": 1135,
|
|
"valid_targets_mean": 5411.9,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 4.334600760456274,
|
|
"grad_norm": 0.27215497448339554,
|
|
"learning_rate": 1.5264711590671067e-05,
|
|
"loss": 0.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04249775409698486,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5154.8,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 4.35361216730038,
|
|
"grad_norm": 0.23781571763344966,
|
|
"learning_rate": 1.5080611678115585e-05,
|
|
"loss": 0.0855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04478433355689049,
|
|
"step": 1145,
|
|
"valid_targets_mean": 5145.1,
|
|
"valid_targets_min": 3055
|
|
},
|
|
{
|
|
"epoch": 4.3726235741444865,
|
|
"grad_norm": 0.25822483894259435,
|
|
"learning_rate": 1.4896954381386477e-05,
|
|
"loss": 0.0844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043877530843019485,
|
|
"step": 1150,
|
|
"valid_targets_mean": 5636.9,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 4.391634980988593,
|
|
"grad_norm": 0.26714599071066003,
|
|
"learning_rate": 1.4713756224819872e-05,
|
|
"loss": 0.0829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04205765202641487,
|
|
"step": 1155,
|
|
"valid_targets_mean": 5523.9,
|
|
"valid_targets_min": 3190
|
|
},
|
|
{
|
|
"epoch": 4.4106463878327,
|
|
"grad_norm": 0.23416180161057334,
|
|
"learning_rate": 1.453103369144134e-05,
|
|
"loss": 0.0807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03860076516866684,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4908.3,
|
|
"valid_targets_min": 2088
|
|
},
|
|
{
|
|
"epoch": 4.429657794676806,
|
|
"grad_norm": 0.24092755205006228,
|
|
"learning_rate": 1.4348803221482828e-05,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03292986378073692,
|
|
"step": 1165,
|
|
"valid_targets_mean": 5170.9,
|
|
"valid_targets_min": 2206
|
|
},
|
|
{
|
|
"epoch": 4.448669201520913,
|
|
"grad_norm": 0.2769123972364188,
|
|
"learning_rate": 1.4167081210903501e-05,
|
|
"loss": 0.0819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04898636043071747,
|
|
"step": 1170,
|
|
"valid_targets_mean": 5348.1,
|
|
"valid_targets_min": 2959
|
|
},
|
|
{
|
|
"epoch": 4.467680608365019,
|
|
"grad_norm": 0.2687335450112676,
|
|
"learning_rate": 1.3985884009914542e-05,
|
|
"loss": 0.0843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04135872796177864,
|
|
"step": 1175,
|
|
"valid_targets_mean": 5146.4,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 4.486692015209125,
|
|
"grad_norm": 0.44423160941024686,
|
|
"learning_rate": 1.3805227921508018e-05,
|
|
"loss": 0.1042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05701296404004097,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3920.1,
|
|
"valid_targets_min": 2054
|
|
},
|
|
{
|
|
"epoch": 4.505703422053232,
|
|
"grad_norm": 0.4307059905800158,
|
|
"learning_rate": 1.3625129199990083e-05,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06144963577389717,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4123.0,
|
|
"valid_targets_min": 1702
|
|
},
|
|
{
|
|
"epoch": 4.524714828897339,
|
|
"grad_norm": 0.4735169631189255,
|
|
"learning_rate": 1.3445604049518503e-05,
|
|
"loss": 0.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058991312980651855,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4134.7,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 4.5437262357414445,
|
|
"grad_norm": 0.47226895596823215,
|
|
"learning_rate": 1.3266668622644696e-05,
|
|
"loss": 0.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05097019299864769,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3485.5,
|
|
"valid_targets_min": 1581
|
|
},
|
|
{
|
|
"epoch": 4.562737642585551,
|
|
"grad_norm": 0.4233485609105754,
|
|
"learning_rate": 1.3088339018860439e-05,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05011675879359245,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3915.2,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 4.581749049429658,
|
|
"grad_norm": 0.39098121916184353,
|
|
"learning_rate": 1.291063128314934e-05,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04419676959514618,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3678.2,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 4.600760456273765,
|
|
"grad_norm": 0.555201081378785,
|
|
"learning_rate": 1.2733561404543177e-05,
|
|
"loss": 0.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12111985683441162,
|
|
"step": 1210,
|
|
"valid_targets_mean": 6885.0,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 4.619771863117871,
|
|
"grad_norm": 0.36519466564325903,
|
|
"learning_rate": 1.2557145314683364e-05,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12528151273727417,
|
|
"step": 1215,
|
|
"valid_targets_mean": 7251.2,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 4.638783269961977,
|
|
"grad_norm": 0.3187163291454204,
|
|
"learning_rate": 1.2381398886387466e-05,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13393153250217438,
|
|
"step": 1220,
|
|
"valid_targets_mean": 7708.2,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.657794676806084,
|
|
"grad_norm": 0.2853507467833514,
|
|
"learning_rate": 1.2206337932221094e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11738086491823196,
|
|
"step": 1225,
|
|
"valid_targets_mean": 6611.8,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 4.67680608365019,
|
|
"grad_norm": 0.27103089684314385,
|
|
"learning_rate": 1.2031978203075172e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1208362951874733,
|
|
"step": 1230,
|
|
"valid_targets_mean": 7745.3,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 4.695817490494297,
|
|
"grad_norm": 0.26218276824066433,
|
|
"learning_rate": 1.185833538674879e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11319077014923096,
|
|
"step": 1235,
|
|
"valid_targets_mean": 7991.2,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 4.714828897338403,
|
|
"grad_norm": 0.2604914676003978,
|
|
"learning_rate": 1.1685425106537688e-05,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12409955263137817,
|
|
"step": 1240,
|
|
"valid_targets_mean": 8260.1,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 4.733840304182509,
|
|
"grad_norm": 0.3533617817645549,
|
|
"learning_rate": 1.1513262919828603e-05,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12882865965366364,
|
|
"step": 1245,
|
|
"valid_targets_mean": 9195.9,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 4.752851711026616,
|
|
"grad_norm": 0.273675157838543,
|
|
"learning_rate": 1.1341864316699463e-05,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10350463539361954,
|
|
"step": 1250,
|
|
"valid_targets_mean": 6803.8,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 4.771863117870723,
|
|
"grad_norm": 0.3301627103857975,
|
|
"learning_rate": 1.1171244718525726e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09687048941850662,
|
|
"step": 1255,
|
|
"valid_targets_mean": 7715.6,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 4.7908745247148286,
|
|
"grad_norm": 0.3183754012435459,
|
|
"learning_rate": 1.100141947659288e-05,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11653482168912888,
|
|
"step": 1260,
|
|
"valid_targets_mean": 7937.8,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 4.809885931558935,
|
|
"grad_norm": 0.305218952013167,
|
|
"learning_rate": 1.0832403870715153e-05,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10665573924779892,
|
|
"step": 1265,
|
|
"valid_targets_mean": 7325.4,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 4.828897338403042,
|
|
"grad_norm": 0.37392762584119593,
|
|
"learning_rate": 1.0664213107860827e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10478193312883377,
|
|
"step": 1270,
|
|
"valid_targets_mean": 7820.8,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 4.847908745247148,
|
|
"grad_norm": 0.28454754566571105,
|
|
"learning_rate": 1.0496862320783926e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048708055168390274,
|
|
"step": 1275,
|
|
"valid_targets_mean": 6358.3,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 4.866920152091255,
|
|
"grad_norm": 0.2808196037434785,
|
|
"learning_rate": 1.033036656666272e-05,
|
|
"loss": 0.0922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04468848183751106,
|
|
"step": 1280,
|
|
"valid_targets_mean": 6175.8,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 4.885931558935361,
|
|
"grad_norm": 0.26500582432214914,
|
|
"learning_rate": 1.016474082574495e-05,
|
|
"loss": 0.0927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04400516673922539,
|
|
"step": 1285,
|
|
"valid_targets_mean": 5880.3,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 4.904942965779467,
|
|
"grad_norm": 0.24664528337491692,
|
|
"learning_rate": 1.0000000000000006e-05,
|
|
"loss": 0.0871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03696668520569801,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5792.0,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 4.923954372623574,
|
|
"grad_norm": 0.2286048807471563,
|
|
"learning_rate": 9.836158911778132e-06,
|
|
"loss": 0.0859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04410147666931152,
|
|
"step": 1295,
|
|
"valid_targets_mean": 6356.2,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 4.942965779467681,
|
|
"grad_norm": 0.20614916293927305,
|
|
"learning_rate": 9.673232302476819e-06,
|
|
"loss": 0.0837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039480239152908325,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5821.0,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 4.961977186311787,
|
|
"grad_norm": 0.1928349932521555,
|
|
"learning_rate": 9.511234831214464e-06,
|
|
"loss": 0.0836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03988807275891304,
|
|
"step": 1305,
|
|
"valid_targets_mean": 5897.6,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 4.980988593155893,
|
|
"grad_norm": 0.18807335044788295,
|
|
"learning_rate": 9.350181073511412e-06,
|
|
"loss": 0.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039488207548856735,
|
|
"step": 1310,
|
|
"valid_targets_mean": 6550.6,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.21533958753633334,
|
|
"learning_rate": 9.190085519978575e-06,
|
|
"loss": 0.0827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03866899386048317,
|
|
"step": 1315,
|
|
"valid_targets_mean": 5367.0,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 5.019011406844107,
|
|
"grad_norm": 0.35656357662267013,
|
|
"learning_rate": 9.030962575013622e-06,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058930665254592896,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4315.6,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 5.038022813688213,
|
|
"grad_norm": 0.3993925897181664,
|
|
"learning_rate": 8.872826555505012e-06,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05182621255517006,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4631.1,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 5.057034220532319,
|
|
"grad_norm": 0.39173542137242245,
|
|
"learning_rate": 8.715691689543761e-06,
|
|
"loss": 0.1037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049891237169504166,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4061.9,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 5.076045627376426,
|
|
"grad_norm": 0.3346676502787186,
|
|
"learning_rate": 8.559572115143406e-06,
|
|
"loss": 0.1027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053807903081178665,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4492.1,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 5.095057034220532,
|
|
"grad_norm": 0.33046866192129126,
|
|
"learning_rate": 8.404481878967848e-06,
|
|
"loss": 0.0986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04590759798884392,
|
|
"step": 1340,
|
|
"valid_targets_mean": 4387.7,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 5.114068441064639,
|
|
"grad_norm": 0.2862525406517934,
|
|
"learning_rate": 8.250434935067593e-06,
|
|
"loss": 0.0988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050703514367341995,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4403.1,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 5.133079847908745,
|
|
"grad_norm": 0.2830684273551724,
|
|
"learning_rate": 8.09744514362421e-06,
|
|
"loss": 0.0936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0482289083302021,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4255.0,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 5.152091254752852,
|
|
"grad_norm": 0.2936949182763945,
|
|
"learning_rate": 7.945526269703295e-06,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042581137269735336,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4031.0,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 5.171102661596958,
|
|
"grad_norm": 0.32369753015268776,
|
|
"learning_rate": 7.794691982015991e-06,
|
|
"loss": 0.0881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04366425797343254,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4213.6,
|
|
"valid_targets_min": 1827
|
|
},
|
|
{
|
|
"epoch": 5.190114068441065,
|
|
"grad_norm": 0.3876166105871689,
|
|
"learning_rate": 7.644955851689129e-06,
|
|
"loss": 0.0875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04470920190215111,
|
|
"step": 1365,
|
|
"valid_targets_mean": 5610.1,
|
|
"valid_targets_min": 2840
|
|
},
|
|
{
|
|
"epoch": 5.2091254752851714,
|
|
"grad_norm": 0.26999235527739424,
|
|
"learning_rate": 7.496331351044226e-06,
|
|
"loss": 0.0848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04106168821454048,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4945.7,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 5.228136882129277,
|
|
"grad_norm": 0.25109703607220174,
|
|
"learning_rate": 7.348831852385265e-06,
|
|
"loss": 0.082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03378221392631531,
|
|
"step": 1375,
|
|
"valid_targets_mean": 5207.0,
|
|
"valid_targets_min": 2341
|
|
},
|
|
{
|
|
"epoch": 5.247148288973384,
|
|
"grad_norm": 0.2350369927621042,
|
|
"learning_rate": 7.202470626795626e-06,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03785305097699165,
|
|
"step": 1380,
|
|
"valid_targets_mean": 5175.4,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 5.266159695817491,
|
|
"grad_norm": 0.21697067917279458,
|
|
"learning_rate": 7.057260842943949e-06,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031559597700834274,
|
|
"step": 1385,
|
|
"valid_targets_mean": 5396.7,
|
|
"valid_targets_min": 1885
|
|
},
|
|
{
|
|
"epoch": 5.285171102661597,
|
|
"grad_norm": 0.22438779122417016,
|
|
"learning_rate": 6.9132155658993785e-06,
|
|
"loss": 0.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03445998206734657,
|
|
"step": 1390,
|
|
"valid_targets_mean": 5246.9,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 5.304182509505703,
|
|
"grad_norm": 0.2117622397099648,
|
|
"learning_rate": 6.770347755955982e-06,
|
|
"loss": 0.0668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03348778560757637,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4957.9,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 5.32319391634981,
|
|
"grad_norm": 0.200452647832453,
|
|
"learning_rate": 6.628670267466697e-06,
|
|
"loss": 0.0626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028821026906371117,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4951.1,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 5.342205323193916,
|
|
"grad_norm": 0.21955745199536397,
|
|
"learning_rate": 6.488195847686795e-06,
|
|
"loss": 0.061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029862603172659874,
|
|
"step": 1405,
|
|
"valid_targets_mean": 5112.8,
|
|
"valid_targets_min": 2968
|
|
},
|
|
{
|
|
"epoch": 5.361216730038023,
|
|
"grad_norm": 0.22826378564937264,
|
|
"learning_rate": 6.348937135626922e-06,
|
|
"loss": 0.0605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0312732495367527,
|
|
"step": 1410,
|
|
"valid_targets_mean": 5321.8,
|
|
"valid_targets_min": 2382
|
|
},
|
|
{
|
|
"epoch": 5.380228136882129,
|
|
"grad_norm": 0.20427947265228474,
|
|
"learning_rate": 6.210906660915938e-06,
|
|
"loss": 0.0574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02499421499669552,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4986.9,
|
|
"valid_targets_min": 2576
|
|
},
|
|
{
|
|
"epoch": 5.399239543726236,
|
|
"grad_norm": 0.22020819639088524,
|
|
"learning_rate": 6.074116842673585e-06,
|
|
"loss": 0.0583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0311068594455719,
|
|
"step": 1420,
|
|
"valid_targets_mean": 5325.8,
|
|
"valid_targets_min": 2928
|
|
},
|
|
{
|
|
"epoch": 5.418250950570342,
|
|
"grad_norm": 0.22574926260061123,
|
|
"learning_rate": 5.938579988393099e-06,
|
|
"loss": 0.0562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029961155727505684,
|
|
"step": 1425,
|
|
"valid_targets_mean": 5161.7,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 5.437262357414449,
|
|
"grad_norm": 0.20400844513752284,
|
|
"learning_rate": 5.80430829283382e-06,
|
|
"loss": 0.0535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02954328991472721,
|
|
"step": 1430,
|
|
"valid_targets_mean": 4941.3,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 5.4562737642585555,
|
|
"grad_norm": 0.22004866648638607,
|
|
"learning_rate": 5.671313836924039e-06,
|
|
"loss": 0.0581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029622698202729225,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5260.5,
|
|
"valid_targets_min": 2625
|
|
},
|
|
{
|
|
"epoch": 5.475285171102661,
|
|
"grad_norm": 0.23172139547283307,
|
|
"learning_rate": 5.539608586673988e-06,
|
|
"loss": 0.0581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032255351543426514,
|
|
"step": 1440,
|
|
"valid_targets_mean": 4271.5,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 5.494296577946768,
|
|
"grad_norm": 0.30348623919330886,
|
|
"learning_rate": 5.409204392099224e-06,
|
|
"loss": 0.071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029614215716719627,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3648.6,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 5.513307984790875,
|
|
"grad_norm": 0.3446203794591457,
|
|
"learning_rate": 5.280112986154462e-06,
|
|
"loss": 0.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03383517637848854,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3692.1,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 5.532319391634981,
|
|
"grad_norm": 0.304903377781796,
|
|
"learning_rate": 5.152345983677866e-06,
|
|
"loss": 0.067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03009621985256672,
|
|
"step": 1455,
|
|
"valid_targets_mean": 4084.4,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 5.551330798479087,
|
|
"grad_norm": 0.2991592969123906,
|
|
"learning_rate": 5.02591488034609e-06,
|
|
"loss": 0.0671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03557578846812248,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3740.4,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 5.570342205323194,
|
|
"grad_norm": 0.29912614724436565,
|
|
"learning_rate": 4.900831051639892e-06,
|
|
"loss": 0.0599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03164946287870407,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3882.0,
|
|
"valid_targets_min": 1821
|
|
},
|
|
{
|
|
"epoch": 5.589353612167301,
|
|
"grad_norm": 0.27811420728368913,
|
|
"learning_rate": 4.777105751820708e-06,
|
|
"loss": 0.0576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03166574239730835,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4136.7,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 5.608365019011407,
|
|
"grad_norm": 0.9103501807156557,
|
|
"learning_rate": 4.654750112918007e-06,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11657539755105972,
|
|
"step": 1475,
|
|
"valid_targets_mean": 7437.8,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 5.6273764258555135,
|
|
"grad_norm": 0.5130315310560948,
|
|
"learning_rate": 4.533775143727748e-06,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1216835007071495,
|
|
"step": 1480,
|
|
"valid_targets_mean": 7844.2,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 5.64638783269962,
|
|
"grad_norm": 0.38916103997249035,
|
|
"learning_rate": 4.414191728821838e-06,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12430062144994736,
|
|
"step": 1485,
|
|
"valid_targets_mean": 7960.9,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 5.665399239543726,
|
|
"grad_norm": 0.34346190346026684,
|
|
"learning_rate": 4.296010627568823e-06,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1269465535879135,
|
|
"step": 1490,
|
|
"valid_targets_mean": 8483.1,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 5.684410646387833,
|
|
"grad_norm": 0.26852843419670336,
|
|
"learning_rate": 4.17924247316585e-06,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1000281274318695,
|
|
"step": 1495,
|
|
"valid_targets_mean": 7507.5,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 5.7034220532319395,
|
|
"grad_norm": 0.23814374694727244,
|
|
"learning_rate": 4.0638977716819105e-06,
|
|
"loss": 0.2061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09797212481498718,
|
|
"step": 1500,
|
|
"valid_targets_mean": 6985.9,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 5.722433460076045,
|
|
"grad_norm": 0.26229166135458226,
|
|
"learning_rate": 3.949986901112608e-06,
|
|
"loss": 0.197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10039948672056198,
|
|
"step": 1505,
|
|
"valid_targets_mean": 7685.7,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 5.741444866920152,
|
|
"grad_norm": 0.24951933668512646,
|
|
"learning_rate": 3.837520110446391e-06,
|
|
"loss": 0.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09159765392541885,
|
|
"step": 1510,
|
|
"valid_targets_mean": 7207.1,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 5.760456273764259,
|
|
"grad_norm": 0.28127344204747545,
|
|
"learning_rate": 3.7265075187424373e-06,
|
|
"loss": 0.1895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10092798620462418,
|
|
"step": 1515,
|
|
"valid_targets_mean": 7820.0,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 5.779467680608365,
|
|
"grad_norm": 0.2563140724682205,
|
|
"learning_rate": 3.616959114220162e-06,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10091284662485123,
|
|
"step": 1520,
|
|
"valid_targets_mean": 8022.3,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 5.798479087452471,
|
|
"grad_norm": 0.27238599596323204,
|
|
"learning_rate": 3.508884753360593e-06,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08664939552545547,
|
|
"step": 1525,
|
|
"valid_targets_mean": 7562.2,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 5.817490494296578,
|
|
"grad_norm": 0.2922682553948556,
|
|
"learning_rate": 3.402294160019499e-06,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09690836071968079,
|
|
"step": 1530,
|
|
"valid_targets_mean": 8467.6,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 5.836501901140684,
|
|
"grad_norm": 0.2878635630184777,
|
|
"learning_rate": 3.2971969245525215e-06,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09080066531896591,
|
|
"step": 1535,
|
|
"valid_targets_mean": 7542.6,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 5.855513307984791,
|
|
"grad_norm": 0.23862395323068222,
|
|
"learning_rate": 3.193602502952291e-06,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038949862122535706,
|
|
"step": 1540,
|
|
"valid_targets_mean": 5823.9,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 5.8745247148288975,
|
|
"grad_norm": 0.2387967291215592,
|
|
"learning_rate": 3.0915202159976453e-06,
|
|
"loss": 0.0769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042837340384721756,
|
|
"step": 1545,
|
|
"valid_targets_mean": 6544.8,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 5.893536121673003,
|
|
"grad_norm": 0.1881656636618961,
|
|
"learning_rate": 2.9909592484149795e-06,
|
|
"loss": 0.0752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0399971567094326,
|
|
"step": 1550,
|
|
"valid_targets_mean": 6424.3,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 5.91254752851711,
|
|
"grad_norm": 0.17058202140819237,
|
|
"learning_rate": 2.8919286480518803e-06,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03311106935143471,
|
|
"step": 1555,
|
|
"valid_targets_mean": 5882.0,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 5.931558935361217,
|
|
"grad_norm": 0.1711908706695631,
|
|
"learning_rate": 2.794437325063064e-06,
|
|
"loss": 0.0703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03513427823781967,
|
|
"step": 1560,
|
|
"valid_targets_mean": 6166.6,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 5.9505703422053235,
|
|
"grad_norm": 0.17206848728883706,
|
|
"learning_rate": 2.6984940511086665e-06,
|
|
"loss": 0.0689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03399783745408058,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5971.0,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 5.969581749049429,
|
|
"grad_norm": 0.16216773283513025,
|
|
"learning_rate": 2.604107458565066e-06,
|
|
"loss": 0.0667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03391076996922493,
|
|
"step": 1570,
|
|
"valid_targets_mean": 6159.1,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 5.988593155893536,
|
|
"grad_norm": 0.16031421414918762,
|
|
"learning_rate": 2.5112860397481553e-06,
|
|
"loss": 0.0655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032533951103687286,
|
|
"step": 1575,
|
|
"valid_targets_mean": 6163.3,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 6.007604562737643,
|
|
"grad_norm": 0.3155276653530433,
|
|
"learning_rate": 2.4200381461492817e-06,
|
|
"loss": 0.0711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03919023647904396,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4252.4,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 6.026615969581749,
|
|
"grad_norm": 0.28335210258711535,
|
|
"learning_rate": 2.330371987683815e-06,
|
|
"loss": 0.0839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04079696908593178,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4523.0,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 6.0456273764258555,
|
|
"grad_norm": 0.3167321882363807,
|
|
"learning_rate": 2.242295631952496e-06,
|
|
"loss": 0.0764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03657423332333565,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4048.7,
|
|
"valid_targets_min": 2023
|
|
},
|
|
{
|
|
"epoch": 6.064638783269962,
|
|
"grad_norm": 0.278519071520387,
|
|
"learning_rate": 2.155817003515539e-06,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03454483672976494,
|
|
"step": 1595,
|
|
"valid_targets_mean": 4060.6,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 6.083650190114068,
|
|
"grad_norm": 0.2629161013037092,
|
|
"learning_rate": 2.0709438831796303e-06,
|
|
"loss": 0.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03606827184557915,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4122.0,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 6.102661596958175,
|
|
"grad_norm": 0.23665623570717814,
|
|
"learning_rate": 1.987683907297888e-06,
|
|
"loss": 0.0665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03397540748119354,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4094.8,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 6.1216730038022815,
|
|
"grad_norm": 0.23188699309209682,
|
|
"learning_rate": 1.9060445670827477e-06,
|
|
"loss": 0.0678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03223345801234245,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3793.4,
|
|
"valid_targets_min": 2184
|
|
},
|
|
{
|
|
"epoch": 6.140684410646388,
|
|
"grad_norm": 0.22419403701656915,
|
|
"learning_rate": 1.826033207932001e-06,
|
|
"loss": 0.0619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027402834966778755,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4053.2,
|
|
"valid_targets_min": 1891
|
|
},
|
|
{
|
|
"epoch": 6.159695817490494,
|
|
"grad_norm": 0.22022828114085813,
|
|
"learning_rate": 1.7476570287678396e-06,
|
|
"loss": 0.0606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02872721664607525,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4274.1,
|
|
"valid_targets_min": 1885
|
|
},
|
|
{
|
|
"epoch": 6.178707224334601,
|
|
"grad_norm": 0.21969595617322446,
|
|
"learning_rate": 1.6709230813892042e-06,
|
|
"loss": 0.0583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03280468285083771,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4422.8,
|
|
"valid_targets_min": 1908
|
|
},
|
|
{
|
|
"epoch": 6.197718631178708,
|
|
"grad_norm": 0.35529496426467005,
|
|
"learning_rate": 1.5958382698372644e-06,
|
|
"loss": 0.0645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033318690955638885,
|
|
"step": 1630,
|
|
"valid_targets_mean": 5303.7,
|
|
"valid_targets_min": 2553
|
|
},
|
|
{
|
|
"epoch": 6.216730038022813,
|
|
"grad_norm": 0.27558919056523234,
|
|
"learning_rate": 1.5224093497742654e-06,
|
|
"loss": 0.0705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035232577472925186,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5254.6,
|
|
"valid_targets_min": 2168
|
|
},
|
|
{
|
|
"epoch": 6.23574144486692,
|
|
"grad_norm": 0.22612201515640176,
|
|
"learning_rate": 1.4506429278756672e-06,
|
|
"loss": 0.0617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029211828485131264,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4881.5,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 6.254752851711027,
|
|
"grad_norm": 0.20633288036705308,
|
|
"learning_rate": 1.380545461235736e-06,
|
|
"loss": 0.0607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029766544699668884,
|
|
"step": 1645,
|
|
"valid_targets_mean": 5599.6,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 6.273764258555133,
|
|
"grad_norm": 0.2016643153825409,
|
|
"learning_rate": 1.3121232567865793e-06,
|
|
"loss": 0.0542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025733182206749916,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4711.3,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 6.2927756653992395,
|
|
"grad_norm": 0.18952082398350228,
|
|
"learning_rate": 1.2453824707306628e-06,
|
|
"loss": 0.0538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030064953491091728,
|
|
"step": 1655,
|
|
"valid_targets_mean": 5566.0,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 6.311787072243346,
|
|
"grad_norm": 0.16436076167259167,
|
|
"learning_rate": 1.180329107986955e-06,
|
|
"loss": 0.0504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019878484308719635,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4913.7,
|
|
"valid_targets_min": 2127
|
|
},
|
|
{
|
|
"epoch": 6.330798479087452,
|
|
"grad_norm": 0.15922497867390237,
|
|
"learning_rate": 1.1169690216505846e-06,
|
|
"loss": 0.0477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021651925519108772,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4946.9,
|
|
"valid_targets_min": 2188
|
|
},
|
|
{
|
|
"epoch": 6.349809885931559,
|
|
"grad_norm": 0.16748076773602358,
|
|
"learning_rate": 1.0553079124662768e-06,
|
|
"loss": 0.0476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02340800315141678,
|
|
"step": 1670,
|
|
"valid_targets_mean": 5133.6,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 6.3688212927756656,
|
|
"grad_norm": 0.16380388178076224,
|
|
"learning_rate": 9.953513283153905e-07,
|
|
"loss": 0.0454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02057066187262535,
|
|
"step": 1675,
|
|
"valid_targets_mean": 5151.9,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 6.387832699619771,
|
|
"grad_norm": 0.16325774776610502,
|
|
"learning_rate": 9.371046637167835e-07,
|
|
"loss": 0.0438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023373225703835487,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4959.3,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 6.406844106463878,
|
|
"grad_norm": 0.16441190471893835,
|
|
"learning_rate": 8.805731593414268e-07,
|
|
"loss": 0.0442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.018379444256424904,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5263.2,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 6.425855513307985,
|
|
"grad_norm": 0.1638841692314567,
|
|
"learning_rate": 8.25761901540889e-07,
|
|
"loss": 0.043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01931699924170971,
|
|
"step": 1690,
|
|
"valid_targets_mean": 5157.2,
|
|
"valid_targets_min": 1833
|
|
},
|
|
{
|
|
"epoch": 6.444866920152092,
|
|
"grad_norm": 0.1735206177979583,
|
|
"learning_rate": 7.726758218897079e-07,
|
|
"loss": 0.0401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02396308444440365,
|
|
"step": 1695,
|
|
"valid_targets_mean": 5271.4,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 6.4638783269961975,
|
|
"grad_norm": 0.17296056006964222,
|
|
"learning_rate": 7.213196967416624e-07,
|
|
"loss": 0.0448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02491677738726139,
|
|
"step": 1700,
|
|
"valid_targets_mean": 5138.1,
|
|
"valid_targets_min": 1934
|
|
},
|
|
{
|
|
"epoch": 6.482889733840304,
|
|
"grad_norm": 0.25309281952445806,
|
|
"learning_rate": 6.716981468000372e-07,
|
|
"loss": 0.0453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025967717170715332,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3941.6,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 6.501901140684411,
|
|
"grad_norm": 0.2599714526800321,
|
|
"learning_rate": 6.238156367018744e-07,
|
|
"loss": 0.0495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025150617584586143,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4081.7,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 6.520912547528517,
|
|
"grad_norm": 0.2665439212567454,
|
|
"learning_rate": 5.776764746162778e-07,
|
|
"loss": 0.0516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02574659138917923,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3830.1,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 6.5399239543726235,
|
|
"grad_norm": 0.23963977576827142,
|
|
"learning_rate": 5.332848118567891e-07,
|
|
"loss": 0.0464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023752614855766296,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3843.3,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 6.55893536121673,
|
|
"grad_norm": 0.2133991211840642,
|
|
"learning_rate": 4.906446425078782e-07,
|
|
"loss": 0.046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02487846277654171,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3924.0,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 6.577946768060836,
|
|
"grad_norm": 0.19507916434897585,
|
|
"learning_rate": 4.497598030655814e-07,
|
|
"loss": 0.0402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.018777957186102867,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3620.2,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 6.596958174904943,
|
|
"grad_norm": 0.18710157722199877,
|
|
"learning_rate": 4.106339720923136e-07,
|
|
"loss": 0.0375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01577587239444256,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3715.8,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 6.61596958174905,
|
|
"grad_norm": 1.1221845368347991,
|
|
"learning_rate": 3.732706698859012e-07,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11630845069885254,
|
|
"step": 1740,
|
|
"valid_targets_mean": 7975.3,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 6.634980988593156,
|
|
"grad_norm": 1.173169716414322,
|
|
"learning_rate": 3.376732581628406e-07,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11798709630966187,
|
|
"step": 1745,
|
|
"valid_targets_mean": 7661.3,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 6.653992395437262,
|
|
"grad_norm": 0.7806879785562468,
|
|
"learning_rate": 3.038449397558396e-07,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11574181169271469,
|
|
"step": 1750,
|
|
"valid_targets_mean": 7740.2,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 6.673003802281369,
|
|
"grad_norm": 0.6320222275437308,
|
|
"learning_rate": 2.7178875832563734e-07,
|
|
"loss": 0.216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10704654455184937,
|
|
"step": 1755,
|
|
"valid_targets_mean": 7938.4,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 6.692015209125476,
|
|
"grad_norm": 0.45835070478875656,
|
|
"learning_rate": 2.4150759808716283e-07,
|
|
"loss": 0.199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09011327475309372,
|
|
"step": 1760,
|
|
"valid_targets_mean": 6955.9,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 6.7110266159695815,
|
|
"grad_norm": 0.3833941721398251,
|
|
"learning_rate": 2.1300418355002296e-07,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09300309419631958,
|
|
"step": 1765,
|
|
"valid_targets_mean": 7178.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 6.730038022813688,
|
|
"grad_norm": 0.3737621062405827,
|
|
"learning_rate": 1.862810792733849e-07,
|
|
"loss": 0.1848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10089872032403946,
|
|
"step": 1770,
|
|
"valid_targets_mean": 8065.9,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 6.749049429657795,
|
|
"grad_norm": 0.337929681633567,
|
|
"learning_rate": 1.6134068963520988e-07,
|
|
"loss": 0.1869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08463618904352188,
|
|
"step": 1775,
|
|
"valid_targets_mean": 7800.7,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 6.768060836501901,
|
|
"grad_norm": 0.3300710696811267,
|
|
"learning_rate": 1.381852586159349e-07,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09186343103647232,
|
|
"step": 1780,
|
|
"valid_targets_mean": 7847.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 6.787072243346008,
|
|
"grad_norm": 0.30122393387768587,
|
|
"learning_rate": 1.1681686959657879e-07,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07956251502037048,
|
|
"step": 1785,
|
|
"valid_targets_mean": 7130.6,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 6.806083650190114,
|
|
"grad_norm": 0.2834417787280566,
|
|
"learning_rate": 9.723744517128098e-08,
|
|
"loss": 0.1725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08768697828054428,
|
|
"step": 1790,
|
|
"valid_targets_mean": 7978.8,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 6.82509505703422,
|
|
"grad_norm": 0.30669459692474976,
|
|
"learning_rate": 7.944874697432436e-08,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07766545563936234,
|
|
"step": 1795,
|
|
"valid_targets_mean": 7157.9,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 6.844106463878327,
|
|
"grad_norm": 0.30499859672371843,
|
|
"learning_rate": 6.345237552163541e-08,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08147227019071579,
|
|
"step": 1800,
|
|
"valid_targets_mean": 7234.4,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 6.863117870722434,
|
|
"grad_norm": 0.2567755838541607,
|
|
"learning_rate": 4.9249770066777113e-08,
|
|
"loss": 0.0762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03280698135495186,
|
|
"step": 1805,
|
|
"valid_targets_mean": 6154.4,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 6.8821292775665395,
|
|
"grad_norm": 0.22981178838040223,
|
|
"learning_rate": 3.684220847145481e-08,
|
|
"loss": 0.072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03351025655865669,
|
|
"step": 1810,
|
|
"valid_targets_mean": 5865.2,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 6.901140684410646,
|
|
"grad_norm": 0.23048498681871035,
|
|
"learning_rate": 2.623080709054149e-08,
|
|
"loss": 0.0685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03717043623328209,
|
|
"step": 1815,
|
|
"valid_targets_mean": 6350.6,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 6.920152091254753,
|
|
"grad_norm": 0.198959962331718,
|
|
"learning_rate": 1.7416520671635905e-08,
|
|
"loss": 0.0647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03324683755636215,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5735.5,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 6.93916349809886,
|
|
"grad_norm": 0.186783859845399,
|
|
"learning_rate": 1.0400142269164637e-08,
|
|
"loss": 0.0654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03211604431271553,
|
|
"step": 1825,
|
|
"valid_targets_mean": 6206.8,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 6.9581749049429655,
|
|
"grad_norm": 0.19629442490410282,
|
|
"learning_rate": 5.182303173016934e-09,
|
|
"loss": 0.0644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03166944161057472,
|
|
"step": 1830,
|
|
"valid_targets_mean": 6104.3,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 6.977186311787072,
|
|
"grad_norm": 0.1829179481431056,
|
|
"learning_rate": 1.7634728517545996e-09,
|
|
"loss": 0.0629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029660126194357872,
|
|
"step": 1835,
|
|
"valid_targets_mean": 6447.5,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 6.996197718631179,
|
|
"grad_norm": 0.18366914112103155,
|
|
"learning_rate": 1.439589103724437e-10,
|
|
"loss": 0.0618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031334131956100464,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5833.1,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022832343354821205,
|
|
"step": 1841,
|
|
"total_flos": 4.836413774367818e+18,
|
|
"train_loss": 0.19618723029549,
|
|
"train_runtime": 26839.4318,
|
|
"train_samples_per_second": 6.577,
|
|
"train_steps_per_second": 0.069,
|
|
"valid_targets_mean": 5367.0,
|
|
"valid_targets_min": 1264
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1841,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 750,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 4.836413774367818e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|