Files
g1_clean_hybrid_25k_8b/trainer_state.json
ModelHub XC 86555376d8 初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/g1_clean_hybrid_25k_8b
Source: Original Platform
2026-05-01 10:50:12 +08:00

4096 lines
114 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1841,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019011406844106463,
"grad_norm": 18.768985433269826,
"learning_rate": 8.64864864864865e-07,
"loss": 0.8865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.45824751257896423,
"step": 5,
"valid_targets_mean": 4315.6,
"valid_targets_min": 1507
},
{
"epoch": 0.03802281368821293,
"grad_norm": 4.161641756419172,
"learning_rate": 1.945945945945946e-06,
"loss": 0.7829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32671618461608887,
"step": 10,
"valid_targets_mean": 4631.1,
"valid_targets_min": 1345
},
{
"epoch": 0.057034220532319393,
"grad_norm": 1.6968311772903435,
"learning_rate": 3.0270270270270274e-06,
"loss": 0.6528,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3004853129386902,
"step": 15,
"valid_targets_mean": 4061.9,
"valid_targets_min": 1929
},
{
"epoch": 0.07604562737642585,
"grad_norm": 0.9491466398184562,
"learning_rate": 4.108108108108108e-06,
"loss": 0.5875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27897870540618896,
"step": 20,
"valid_targets_mean": 4492.1,
"valid_targets_min": 1512
},
{
"epoch": 0.09505703422053231,
"grad_norm": 0.5548287452373166,
"learning_rate": 5.18918918918919e-06,
"loss": 0.5445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2667747437953949,
"step": 25,
"valid_targets_mean": 4387.7,
"valid_targets_min": 1324
},
{
"epoch": 0.11406844106463879,
"grad_norm": 0.49652319636920533,
"learning_rate": 6.270270270270271e-06,
"loss": 0.4983,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2525099217891693,
"step": 30,
"valid_targets_mean": 4403.1,
"valid_targets_min": 2036
},
{
"epoch": 0.13307984790874525,
"grad_norm": 0.40788904142748184,
"learning_rate": 7.3513513513513525e-06,
"loss": 0.4857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23482461273670197,
"step": 35,
"valid_targets_mean": 4255.0,
"valid_targets_min": 2083
},
{
"epoch": 0.1520912547528517,
"grad_norm": 0.3460099864906639,
"learning_rate": 8.432432432432434e-06,
"loss": 0.464,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21213467419147491,
"step": 40,
"valid_targets_mean": 4031.0,
"valid_targets_min": 1761
},
{
"epoch": 0.17110266159695817,
"grad_norm": 0.3294697867452768,
"learning_rate": 9.513513513513514e-06,
"loss": 0.4457,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20797699689865112,
"step": 45,
"valid_targets_mean": 4213.6,
"valid_targets_min": 1827
},
{
"epoch": 0.19011406844106463,
"grad_norm": 0.3007798207728767,
"learning_rate": 1.0594594594594597e-05,
"loss": 0.3791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1763700693845749,
"step": 50,
"valid_targets_mean": 5610.1,
"valid_targets_min": 2840
},
{
"epoch": 0.20912547528517111,
"grad_norm": 0.3135648808425615,
"learning_rate": 1.1675675675675677e-05,
"loss": 0.3193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15119099617004395,
"step": 55,
"valid_targets_mean": 4945.7,
"valid_targets_min": 1853
},
{
"epoch": 0.22813688212927757,
"grad_norm": 0.3001927364971795,
"learning_rate": 1.2756756756756758e-05,
"loss": 0.3044,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13437247276306152,
"step": 60,
"valid_targets_mean": 5207.0,
"valid_targets_min": 2341
},
{
"epoch": 0.24714828897338403,
"grad_norm": 0.26327896731667505,
"learning_rate": 1.383783783783784e-05,
"loss": 0.2891,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14838241040706635,
"step": 65,
"valid_targets_mean": 5175.4,
"valid_targets_min": 2093
},
{
"epoch": 0.2661596958174905,
"grad_norm": 0.23565637422517188,
"learning_rate": 1.491891891891892e-05,
"loss": 0.2835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1273541897535324,
"step": 70,
"valid_targets_mean": 5396.7,
"valid_targets_min": 1885
},
{
"epoch": 0.28517110266159695,
"grad_norm": 0.23292525842968348,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.2734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13985608518123627,
"step": 75,
"valid_targets_mean": 5246.9,
"valid_targets_min": 2203
},
{
"epoch": 0.3041825095057034,
"grad_norm": 0.21583976978780783,
"learning_rate": 1.7081081081081083e-05,
"loss": 0.2672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13641823828220367,
"step": 80,
"valid_targets_mean": 4957.9,
"valid_targets_min": 1708
},
{
"epoch": 0.3231939163498099,
"grad_norm": 0.21703167466244688,
"learning_rate": 1.8162162162162164e-05,
"loss": 0.2635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12753139436244965,
"step": 85,
"valid_targets_mean": 4951.1,
"valid_targets_min": 1826
},
{
"epoch": 0.34220532319391633,
"grad_norm": 0.21798199551190492,
"learning_rate": 1.9243243243243244e-05,
"loss": 0.2607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13001735508441925,
"step": 90,
"valid_targets_mean": 5112.8,
"valid_targets_min": 2968
},
{
"epoch": 0.3612167300380228,
"grad_norm": 0.23287202361449336,
"learning_rate": 2.0324324324324328e-05,
"loss": 0.2588,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13635805249214172,
"step": 95,
"valid_targets_mean": 5321.8,
"valid_targets_min": 2382
},
{
"epoch": 0.38022813688212925,
"grad_norm": 0.2771402834224665,
"learning_rate": 2.1405405405405405e-05,
"loss": 0.2552,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12471064180135727,
"step": 100,
"valid_targets_mean": 4986.9,
"valid_targets_min": 2576
},
{
"epoch": 0.39923954372623577,
"grad_norm": 0.22487804577069473,
"learning_rate": 2.248648648648649e-05,
"loss": 0.2503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1233469620347023,
"step": 105,
"valid_targets_mean": 5325.8,
"valid_targets_min": 2928
},
{
"epoch": 0.41825095057034223,
"grad_norm": 0.22857152253610444,
"learning_rate": 2.356756756756757e-05,
"loss": 0.2445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1288713663816452,
"step": 110,
"valid_targets_mean": 5161.7,
"valid_targets_min": 1862
},
{
"epoch": 0.4372623574144487,
"grad_norm": 0.24338824886805402,
"learning_rate": 2.4648648648648654e-05,
"loss": 0.239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11510897427797318,
"step": 115,
"valid_targets_mean": 4941.3,
"valid_targets_min": 1507
},
{
"epoch": 0.45627376425855515,
"grad_norm": 0.24437893301310154,
"learning_rate": 2.572972972972973e-05,
"loss": 0.2539,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12720325589179993,
"step": 120,
"valid_targets_mean": 5260.5,
"valid_targets_min": 2625
},
{
"epoch": 0.4752851711026616,
"grad_norm": 0.3917084342269015,
"learning_rate": 2.6810810810810815e-05,
"loss": 0.2547,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17161251604557037,
"step": 125,
"valid_targets_mean": 4271.5,
"valid_targets_min": 1373
},
{
"epoch": 0.49429657794676807,
"grad_norm": 0.4677580966290995,
"learning_rate": 2.7891891891891892e-05,
"loss": 0.4642,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2109134942293167,
"step": 130,
"valid_targets_mean": 3648.6,
"valid_targets_min": 1151
},
{
"epoch": 0.5133079847908745,
"grad_norm": 0.37088030476958894,
"learning_rate": 2.8972972972972976e-05,
"loss": 0.4581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2217126041650772,
"step": 135,
"valid_targets_mean": 3692.1,
"valid_targets_min": 1583
},
{
"epoch": 0.532319391634981,
"grad_norm": 0.34834316541712207,
"learning_rate": 3.0054054054054056e-05,
"loss": 0.4374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21792852878570557,
"step": 140,
"valid_targets_mean": 4084.4,
"valid_targets_min": 1601
},
{
"epoch": 0.5513307984790875,
"grad_norm": 0.3764288574103832,
"learning_rate": 3.113513513513514e-05,
"loss": 0.4365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22260896861553192,
"step": 145,
"valid_targets_mean": 3740.4,
"valid_targets_min": 1825
},
{
"epoch": 0.5703422053231939,
"grad_norm": 0.30664676408221175,
"learning_rate": 3.221621621621622e-05,
"loss": 0.4129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20132417976856232,
"step": 150,
"valid_targets_mean": 3882.0,
"valid_targets_min": 1821
},
{
"epoch": 0.5893536121673004,
"grad_norm": 0.335985881141292,
"learning_rate": 3.3297297297297305e-05,
"loss": 0.4231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21901392936706543,
"step": 155,
"valid_targets_mean": 4136.7,
"valid_targets_min": 1743
},
{
"epoch": 0.6083650190114068,
"grad_norm": 0.3447522223458868,
"learning_rate": 3.437837837837838e-05,
"loss": 0.4275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22367675602436066,
"step": 160,
"valid_targets_mean": 7437.8,
"valid_targets_min": 591
},
{
"epoch": 0.6273764258555133,
"grad_norm": 0.29372893235049985,
"learning_rate": 3.5459459459459466e-05,
"loss": 0.4306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2104015201330185,
"step": 165,
"valid_targets_mean": 7844.2,
"valid_targets_min": 715
},
{
"epoch": 0.6463878326996197,
"grad_norm": 0.29079875739620076,
"learning_rate": 3.654054054054054e-05,
"loss": 0.4102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21834035217761993,
"step": 170,
"valid_targets_mean": 7960.9,
"valid_targets_min": 733
},
{
"epoch": 0.6653992395437263,
"grad_norm": 0.27135578893433554,
"learning_rate": 3.762162162162163e-05,
"loss": 0.4087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22146134078502655,
"step": 175,
"valid_targets_mean": 8483.1,
"valid_targets_min": 759
},
{
"epoch": 0.6844106463878327,
"grad_norm": 0.2744187448490044,
"learning_rate": 3.8702702702702704e-05,
"loss": 0.3907,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19432826340198517,
"step": 180,
"valid_targets_mean": 7507.5,
"valid_targets_min": 702
},
{
"epoch": 0.7034220532319392,
"grad_norm": 0.3048374023380346,
"learning_rate": 3.978378378378379e-05,
"loss": 0.3923,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18287861347198486,
"step": 185,
"valid_targets_mean": 6985.9,
"valid_targets_min": 647
},
{
"epoch": 0.7224334600760456,
"grad_norm": 0.2649862681572022,
"learning_rate": 3.999942416643093e-05,
"loss": 0.3985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20548956096172333,
"step": 190,
"valid_targets_mean": 7685.7,
"valid_targets_min": 592
},
{
"epoch": 0.7414448669201521,
"grad_norm": 0.2992563120534786,
"learning_rate": 3.999708489938559e-05,
"loss": 0.3871,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17796725034713745,
"step": 195,
"valid_targets_mean": 7207.1,
"valid_targets_min": 682
},
{
"epoch": 0.7604562737642585,
"grad_norm": 0.2616409196519949,
"learning_rate": 3.999294641957663e-05,
"loss": 0.3852,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19631274044513702,
"step": 200,
"valid_targets_mean": 7820.0,
"valid_targets_min": 747
},
{
"epoch": 0.779467680608365,
"grad_norm": 0.2641716839624123,
"learning_rate": 3.998700909935863e-05,
"loss": 0.3833,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21066488325595856,
"step": 205,
"valid_targets_mean": 8022.3,
"valid_targets_min": 727
},
{
"epoch": 0.7984790874524715,
"grad_norm": 0.24998064714538126,
"learning_rate": 3.9979273472934556e-05,
"loss": 0.3774,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17630921304225922,
"step": 210,
"valid_targets_mean": 7562.2,
"valid_targets_min": 661
},
{
"epoch": 0.8174904942965779,
"grad_norm": 0.261840054565909,
"learning_rate": 3.9969740236307746e-05,
"loss": 0.3786,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20479388535022736,
"step": 215,
"valid_targets_mean": 8467.6,
"valid_targets_min": 409
},
{
"epoch": 0.8365019011406845,
"grad_norm": 0.2526323676493677,
"learning_rate": 3.9958410247219265e-05,
"loss": 0.3701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20014218986034393,
"step": 220,
"valid_targets_mean": 7542.6,
"valid_targets_min": 781
},
{
"epoch": 0.8555133079847909,
"grad_norm": 0.5173558603802552,
"learning_rate": 3.994528452507076e-05,
"loss": 0.3121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12122941017150879,
"step": 225,
"valid_targets_mean": 5823.9,
"valid_targets_min": 1136
},
{
"epoch": 0.8745247148288974,
"grad_norm": 0.28893785124827975,
"learning_rate": 3.993036425083269e-05,
"loss": 0.2407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12518875300884247,
"step": 230,
"valid_targets_mean": 6544.8,
"valid_targets_min": 1446
},
{
"epoch": 0.8935361216730038,
"grad_norm": 0.26737459303895006,
"learning_rate": 3.9913650766938115e-05,
"loss": 0.2334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12098179012537003,
"step": 235,
"valid_targets_mean": 6424.3,
"valid_targets_min": 1291
},
{
"epoch": 0.9125475285171103,
"grad_norm": 0.2234023108439984,
"learning_rate": 3.98951455771619e-05,
"loss": 0.2243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10649286955595016,
"step": 240,
"valid_targets_mean": 5882.0,
"valid_targets_min": 979
},
{
"epoch": 0.9315589353612167,
"grad_norm": 0.22867133634974954,
"learning_rate": 3.987485034648541e-05,
"loss": 0.2219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11067407578229904,
"step": 245,
"valid_targets_mean": 6166.6,
"valid_targets_min": 933
},
{
"epoch": 0.9505703422053232,
"grad_norm": 0.18046687603251277,
"learning_rate": 3.985276690094669e-05,
"loss": 0.2183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11107806116342545,
"step": 250,
"valid_targets_mean": 5971.0,
"valid_targets_min": 1196
},
{
"epoch": 0.9695817490494296,
"grad_norm": 0.22379606980948552,
"learning_rate": 3.982889722747621e-05,
"loss": 0.2129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1035776138305664,
"step": 255,
"valid_targets_mean": 6159.1,
"valid_targets_min": 1238
},
{
"epoch": 0.9885931558935361,
"grad_norm": 0.17909010829485558,
"learning_rate": 3.980324347371806e-05,
"loss": 0.211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09941697865724564,
"step": 260,
"valid_targets_mean": 6163.3,
"valid_targets_min": 1300
},
{
"epoch": 1.0076045627376427,
"grad_norm": 0.33780995576565503,
"learning_rate": 3.977580794783672e-05,
"loss": 0.2922,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18427270650863647,
"step": 265,
"valid_targets_mean": 4252.4,
"valid_targets_min": 2118
},
{
"epoch": 1.026615969581749,
"grad_norm": 0.2883511493855013,
"learning_rate": 3.97465931183094e-05,
"loss": 0.3966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19222092628479004,
"step": 270,
"valid_targets_mean": 4523.0,
"valid_targets_min": 1550
},
{
"epoch": 1.0456273764258555,
"grad_norm": 0.3124414858734432,
"learning_rate": 3.971560161370393e-05,
"loss": 0.3725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.178070530295372,
"step": 275,
"valid_targets_mean": 4048.7,
"valid_targets_min": 2023
},
{
"epoch": 1.064638783269962,
"grad_norm": 0.29980707122304473,
"learning_rate": 3.968283622244229e-05,
"loss": 0.3666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17472650110721588,
"step": 280,
"valid_targets_mean": 4060.6,
"valid_targets_min": 1807
},
{
"epoch": 1.0836501901140685,
"grad_norm": 0.2855083601218697,
"learning_rate": 3.9648299892549654e-05,
"loss": 0.3522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1766706258058548,
"step": 285,
"valid_targets_mean": 4122.0,
"valid_targets_min": 2089
},
{
"epoch": 1.102661596958175,
"grad_norm": 0.26544618383492363,
"learning_rate": 3.961199573138923e-05,
"loss": 0.3541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17382532358169556,
"step": 290,
"valid_targets_mean": 4094.8,
"valid_targets_min": 1660
},
{
"epoch": 1.1216730038022813,
"grad_norm": 0.27395793149443254,
"learning_rate": 3.957392700538261e-05,
"loss": 0.3462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17056365311145782,
"step": 295,
"valid_targets_mean": 3793.4,
"valid_targets_min": 2184
},
{
"epoch": 1.1406844106463878,
"grad_norm": 0.2694202154601782,
"learning_rate": 3.9534097139715926e-05,
"loss": 0.3407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17225591838359833,
"step": 300,
"valid_targets_mean": 4053.2,
"valid_targets_min": 1891
},
{
"epoch": 1.1596958174904943,
"grad_norm": 0.2640197797512894,
"learning_rate": 3.9492509718031645e-05,
"loss": 0.3386,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17667798697948456,
"step": 305,
"valid_targets_mean": 4274.1,
"valid_targets_min": 1885
},
{
"epoch": 1.1787072243346008,
"grad_norm": 0.25361187076817665,
"learning_rate": 3.944916848210614e-05,
"loss": 0.3374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17618222534656525,
"step": 310,
"valid_targets_mean": 4422.8,
"valid_targets_min": 1908
},
{
"epoch": 1.1977186311787071,
"grad_norm": 0.2338076023050613,
"learning_rate": 3.9404077331513044e-05,
"loss": 0.2354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11924882978200912,
"step": 315,
"valid_targets_mean": 5303.7,
"valid_targets_min": 2553
},
{
"epoch": 1.2167300380228137,
"grad_norm": 0.20686129841395856,
"learning_rate": 3.9357240323272367e-05,
"loss": 0.2326,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1147601380944252,
"step": 320,
"valid_targets_mean": 5254.6,
"valid_targets_min": 2168
},
{
"epoch": 1.2357414448669202,
"grad_norm": 0.21736762319432792,
"learning_rate": 3.930866167148549e-05,
"loss": 0.2193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10893138498067856,
"step": 325,
"valid_targets_mean": 4881.5,
"valid_targets_min": 1899
},
{
"epoch": 1.2547528517110267,
"grad_norm": 0.20801984857584946,
"learning_rate": 3.925834574695599e-05,
"loss": 0.2205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11135486513376236,
"step": 330,
"valid_targets_mean": 5599.6,
"valid_targets_min": 2212
},
{
"epoch": 1.2737642585551332,
"grad_norm": 0.19325104513890953,
"learning_rate": 3.920629707679641e-05,
"loss": 0.2151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10907386988401413,
"step": 335,
"valid_targets_mean": 4711.3,
"valid_targets_min": 1625
},
{
"epoch": 1.2927756653992395,
"grad_norm": 0.18491316478382414,
"learning_rate": 3.915252034402089e-05,
"loss": 0.2132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11171620339155197,
"step": 340,
"valid_targets_mean": 5566.0,
"valid_targets_min": 1935
},
{
"epoch": 1.311787072243346,
"grad_norm": 0.19249485970179556,
"learning_rate": 3.9097020387123876e-05,
"loss": 0.2043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08895137161016464,
"step": 345,
"valid_targets_mean": 4913.7,
"valid_targets_min": 2127
},
{
"epoch": 1.3307984790874525,
"grad_norm": 0.1976193837635321,
"learning_rate": 3.903980219964474e-05,
"loss": 0.2084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0967416986823082,
"step": 350,
"valid_targets_mean": 4946.9,
"valid_targets_min": 2188
},
{
"epoch": 1.3498098859315588,
"grad_norm": 0.18884045418843914,
"learning_rate": 3.898087092971851e-05,
"loss": 0.2063,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10309580713510513,
"step": 355,
"valid_targets_mean": 5133.6,
"valid_targets_min": 1787
},
{
"epoch": 1.3688212927756653,
"grad_norm": 0.2150636741987042,
"learning_rate": 3.892023187961268e-05,
"loss": 0.2071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0983353927731514,
"step": 360,
"valid_targets_mean": 5151.9,
"valid_targets_min": 2159
},
{
"epoch": 1.3878326996197718,
"grad_norm": 0.18862534780965,
"learning_rate": 3.8857890505250103e-05,
"loss": 0.2035,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09774443507194519,
"step": 365,
"valid_targets_mean": 4959.3,
"valid_targets_min": 1217
},
{
"epoch": 1.4068441064638784,
"grad_norm": 0.18583559102022223,
"learning_rate": 3.879385241571817e-05,
"loss": 0.2001,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09217417240142822,
"step": 370,
"valid_targets_mean": 5263.2,
"valid_targets_min": 2115
},
{
"epoch": 1.4258555133079849,
"grad_norm": 0.23173843670661723,
"learning_rate": 3.8728123372764085e-05,
"loss": 0.1968,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09147930145263672,
"step": 375,
"valid_targets_mean": 5157.2,
"valid_targets_min": 1833
},
{
"epoch": 1.4448669201520912,
"grad_norm": 0.2160702092104236,
"learning_rate": 3.866070929027647e-05,
"loss": 0.1932,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10929874330759048,
"step": 380,
"valid_targets_mean": 5271.4,
"valid_targets_min": 2529
},
{
"epoch": 1.4638783269961977,
"grad_norm": 0.19288064073755715,
"learning_rate": 3.85916162337533e-05,
"loss": 0.2053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10217779874801636,
"step": 385,
"valid_targets_mean": 5138.1,
"valid_targets_min": 1934
},
{
"epoch": 1.4828897338403042,
"grad_norm": 0.37858755377369957,
"learning_rate": 3.8520850419756104e-05,
"loss": 0.2644,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1849764585494995,
"step": 390,
"valid_targets_mean": 3941.6,
"valid_targets_min": 1710
},
{
"epoch": 1.5019011406844105,
"grad_norm": 0.35470045423494073,
"learning_rate": 3.8448418215350726e-05,
"loss": 0.3449,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1646745651960373,
"step": 395,
"valid_targets_mean": 4081.7,
"valid_targets_min": 1714
},
{
"epoch": 1.5209125475285172,
"grad_norm": 0.2876074912459836,
"learning_rate": 3.837432613753438e-05,
"loss": 0.3386,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1480315774679184,
"step": 400,
"valid_targets_mean": 3830.1,
"valid_targets_min": 1672
},
{
"epoch": 1.5399239543726235,
"grad_norm": 0.3611749486973579,
"learning_rate": 3.8298580852649316e-05,
"loss": 0.3332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16189919412136078,
"step": 405,
"valid_targets_mean": 3843.3,
"valid_targets_min": 1308
},
{
"epoch": 1.55893536121673,
"grad_norm": 0.3206203251498053,
"learning_rate": 3.822118917578304e-05,
"loss": 0.3258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17144060134887695,
"step": 410,
"valid_targets_mean": 3924.0,
"valid_targets_min": 1308
},
{
"epoch": 1.5779467680608366,
"grad_norm": 0.3741047714354632,
"learning_rate": 3.814215807015511e-05,
"loss": 0.3159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1597537249326706,
"step": 415,
"valid_targets_mean": 3620.2,
"valid_targets_min": 1428
},
{
"epoch": 1.5969581749049429,
"grad_norm": 0.37521631376088815,
"learning_rate": 3.806149464649066e-05,
"loss": 0.3184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14377222955226898,
"step": 420,
"valid_targets_mean": 3715.8,
"valid_targets_min": 1498
},
{
"epoch": 1.6159695817490496,
"grad_norm": 0.26671798721350176,
"learning_rate": 3.797920616238058e-05,
"loss": 0.3558,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1722474843263626,
"step": 425,
"valid_targets_mean": 7975.3,
"valid_targets_min": 657
},
{
"epoch": 1.6349809885931559,
"grad_norm": 0.27985449043512695,
"learning_rate": 3.789530002162856e-05,
"loss": 0.3507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16311007738113403,
"step": 430,
"valid_targets_mean": 7661.3,
"valid_targets_min": 711
},
{
"epoch": 1.6539923954372624,
"grad_norm": 0.24727092770252987,
"learning_rate": 3.780978377358493e-05,
"loss": 0.3425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17998184263706207,
"step": 435,
"valid_targets_mean": 7740.2,
"valid_targets_min": 588
},
{
"epoch": 1.673003802281369,
"grad_norm": 0.22934434129529802,
"learning_rate": 3.77226651124674e-05,
"loss": 0.3389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16208817064762115,
"step": 440,
"valid_targets_mean": 7938.4,
"valid_targets_min": 531
},
{
"epoch": 1.6920152091254752,
"grad_norm": 0.2257662037497451,
"learning_rate": 3.7633951876668826e-05,
"loss": 0.3333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1535138487815857,
"step": 445,
"valid_targets_mean": 6955.9,
"valid_targets_min": 674
},
{
"epoch": 1.7110266159695817,
"grad_norm": 0.2300354581824267,
"learning_rate": 3.754365204805189e-05,
"loss": 0.339,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17140650749206543,
"step": 450,
"valid_targets_mean": 7178.0,
"valid_targets_min": 605
},
{
"epoch": 1.7300380228136882,
"grad_norm": 0.25849307514203357,
"learning_rate": 3.745177375123101e-05,
"loss": 0.3361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17827041447162628,
"step": 455,
"valid_targets_mean": 8065.9,
"valid_targets_min": 693
},
{
"epoch": 1.7490494296577945,
"grad_norm": 0.23634004384778767,
"learning_rate": 3.7358325252841326e-05,
"loss": 0.3325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15933071076869965,
"step": 460,
"valid_targets_mean": 7800.7,
"valid_targets_min": 413
},
{
"epoch": 1.7680608365019013,
"grad_norm": 0.2360782519286374,
"learning_rate": 3.726331496079486e-05,
"loss": 0.3411,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17583046853542328,
"step": 465,
"valid_targets_mean": 7847.2,
"valid_targets_min": 695
},
{
"epoch": 1.7870722433460076,
"grad_norm": 0.23430829246800822,
"learning_rate": 3.716675142352411e-05,
"loss": 0.3246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15302903950214386,
"step": 470,
"valid_targets_mean": 7130.6,
"valid_targets_min": 614
},
{
"epoch": 1.806083650190114,
"grad_norm": 0.22227998277895408,
"learning_rate": 3.706864332921285e-05,
"loss": 0.333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16763727366924286,
"step": 475,
"valid_targets_mean": 7978.8,
"valid_targets_min": 598
},
{
"epoch": 1.8250950570342206,
"grad_norm": 0.27022623582106853,
"learning_rate": 3.696899950501447e-05,
"loss": 0.325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16360555589199066,
"step": 480,
"valid_targets_mean": 7157.9,
"valid_targets_min": 589
},
{
"epoch": 1.8441064638783269,
"grad_norm": 0.24101847996901093,
"learning_rate": 3.686782891625772e-05,
"loss": 0.3253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16875922679901123,
"step": 485,
"valid_targets_mean": 7234.4,
"valid_targets_min": 593
},
{
"epoch": 1.8631178707224336,
"grad_norm": 0.26624531145633745,
"learning_rate": 3.676514066564009e-05,
"loss": 0.1908,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08150222152471542,
"step": 490,
"valid_targets_mean": 6154.4,
"valid_targets_min": 1363
},
{
"epoch": 1.88212927756654,
"grad_norm": 0.25942794427129934,
"learning_rate": 3.6660943992408817e-05,
"loss": 0.1785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08557536453008652,
"step": 495,
"valid_targets_mean": 5865.2,
"valid_targets_min": 970
},
{
"epoch": 1.9011406844106464,
"grad_norm": 0.24051565314409892,
"learning_rate": 3.6555248271529554e-05,
"loss": 0.177,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09305544942617416,
"step": 500,
"valid_targets_mean": 6350.6,
"valid_targets_min": 972
},
{
"epoch": 1.920152091254753,
"grad_norm": 0.2415618705323024,
"learning_rate": 3.644806301284293e-05,
"loss": 0.1741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08921092748641968,
"step": 505,
"valid_targets_mean": 5735.5,
"valid_targets_min": 959
},
{
"epoch": 1.9391634980988592,
"grad_norm": 0.19616487082311124,
"learning_rate": 3.633939786020884e-05,
"loss": 0.1729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09038201719522476,
"step": 510,
"valid_targets_mean": 6206.8,
"valid_targets_min": 1281
},
{
"epoch": 1.9581749049429658,
"grad_norm": 0.21845612335525297,
"learning_rate": 3.622926259063883e-05,
"loss": 0.1702,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08562606573104858,
"step": 515,
"valid_targets_mean": 6104.3,
"valid_targets_min": 1326
},
{
"epoch": 1.9771863117870723,
"grad_norm": 0.19802351112521951,
"learning_rate": 3.611766711341636e-05,
"loss": 0.1709,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08315548300743103,
"step": 520,
"valid_targets_mean": 6447.5,
"valid_targets_min": 1353
},
{
"epoch": 1.9961977186311786,
"grad_norm": 0.19485390745124273,
"learning_rate": 3.600462146920525e-05,
"loss": 0.17,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08430048078298569,
"step": 525,
"valid_targets_mean": 5833.1,
"valid_targets_min": 1011
},
{
"epoch": 2.0152091254752853,
"grad_norm": 0.3136192928564486,
"learning_rate": 3.5890135829146294e-05,
"loss": 0.2736,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1510065495967865,
"step": 530,
"valid_targets_mean": 4625.7,
"valid_targets_min": 1421
},
{
"epoch": 2.0342205323193916,
"grad_norm": 0.31107755227200234,
"learning_rate": 3.577422049394212e-05,
"loss": 0.2852,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1280999481678009,
"step": 535,
"valid_targets_mean": 3831.7,
"valid_targets_min": 1240
},
{
"epoch": 2.053231939163498,
"grad_norm": 0.32564709358200533,
"learning_rate": 3.5656885892930376e-05,
"loss": 0.2803,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15929754078388214,
"step": 540,
"valid_targets_mean": 4451.9,
"valid_targets_min": 2180
},
{
"epoch": 2.0722433460076046,
"grad_norm": 0.30492913163946755,
"learning_rate": 3.5538142583145395e-05,
"loss": 0.2741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1532464176416397,
"step": 545,
"valid_targets_mean": 4672.5,
"valid_targets_min": 1869
},
{
"epoch": 2.091254752851711,
"grad_norm": 0.2959882307284956,
"learning_rate": 3.5418001248368324e-05,
"loss": 0.2689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13413457572460175,
"step": 550,
"valid_targets_mean": 4185.8,
"valid_targets_min": 1891
},
{
"epoch": 2.1102661596958177,
"grad_norm": 0.27731170863500637,
"learning_rate": 3.5296472698165856e-05,
"loss": 0.2684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13876162469387054,
"step": 555,
"valid_targets_mean": 4360.1,
"valid_targets_min": 1878
},
{
"epoch": 2.129277566539924,
"grad_norm": 0.2607950148321866,
"learning_rate": 3.5173567866917664e-05,
"loss": 0.2643,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12989114224910736,
"step": 560,
"valid_targets_mean": 4039.0,
"valid_targets_min": 2212
},
{
"epoch": 2.1482889733840302,
"grad_norm": 0.2501785431734494,
"learning_rate": 3.504929781283259e-05,
"loss": 0.2645,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13388948142528534,
"step": 565,
"valid_targets_mean": 4414.8,
"valid_targets_min": 1740
},
{
"epoch": 2.167300380228137,
"grad_norm": 0.2590008497305306,
"learning_rate": 3.4923673716953717e-05,
"loss": 0.2606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1188698410987854,
"step": 570,
"valid_targets_mean": 3860.4,
"valid_targets_min": 1987
},
{
"epoch": 2.1863117870722433,
"grad_norm": 0.22763236836652495,
"learning_rate": 3.4796706882152304e-05,
"loss": 0.2317,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09271648526191711,
"step": 575,
"valid_targets_mean": 5169.8,
"valid_targets_min": 2230
},
{
"epoch": 2.20532319391635,
"grad_norm": 0.2243687505654757,
"learning_rate": 3.4668408732110915e-05,
"loss": 0.1786,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08923256397247314,
"step": 580,
"valid_targets_mean": 5347.7,
"valid_targets_min": 2590
},
{
"epoch": 2.2243346007604563,
"grad_norm": 0.20192534845911445,
"learning_rate": 3.453879081029552e-05,
"loss": 0.1705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08735809475183487,
"step": 585,
"valid_targets_mean": 5252.0,
"valid_targets_min": 2739
},
{
"epoch": 2.2433460076045626,
"grad_norm": 0.21897534747327288,
"learning_rate": 3.440786477891691e-05,
"loss": 0.1663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07201861590147018,
"step": 590,
"valid_targets_mean": 4841.5,
"valid_targets_min": 2438
},
{
"epoch": 2.2623574144486693,
"grad_norm": 0.202393296923521,
"learning_rate": 3.42756424178814e-05,
"loss": 0.17,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08041936904191971,
"step": 595,
"valid_targets_mean": 5269.5,
"valid_targets_min": 2799
},
{
"epoch": 2.2813688212927756,
"grad_norm": 0.1982442046754712,
"learning_rate": 3.4142135623730954e-05,
"loss": 0.1598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08316317200660706,
"step": 600,
"valid_targets_mean": 4994.9,
"valid_targets_min": 2925
},
{
"epoch": 2.3003802281368824,
"grad_norm": 0.1902314107786306,
"learning_rate": 3.40073564085728e-05,
"loss": 0.1613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07204687595367432,
"step": 605,
"valid_targets_mean": 5089.9,
"valid_targets_min": 1798
},
{
"epoch": 2.3193916349809887,
"grad_norm": 0.18912526794105103,
"learning_rate": 3.387131689899866e-05,
"loss": 0.1583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0887041762471199,
"step": 610,
"valid_targets_mean": 5424.5,
"valid_targets_min": 1688
},
{
"epoch": 2.338403041825095,
"grad_norm": 0.20318427904092756,
"learning_rate": 3.3734029334993675e-05,
"loss": 0.1571,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08025332540273666,
"step": 615,
"valid_targets_mean": 5056.5,
"valid_targets_min": 1818
},
{
"epoch": 2.3574144486692017,
"grad_norm": 0.21260147977373586,
"learning_rate": 3.359550606883511e-05,
"loss": 0.1567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08076399564743042,
"step": 620,
"valid_targets_mean": 5039.8,
"valid_targets_min": 2218
},
{
"epoch": 2.376425855513308,
"grad_norm": 0.2005739212333494,
"learning_rate": 3.3455759563981025e-05,
"loss": 0.1576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08449053764343262,
"step": 625,
"valid_targets_mean": 4937.0,
"valid_targets_min": 2290
},
{
"epoch": 2.3954372623574143,
"grad_norm": 0.1939665480340307,
"learning_rate": 3.331480239394881e-05,
"loss": 0.1532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08675984293222427,
"step": 630,
"valid_targets_mean": 5262.6,
"valid_targets_min": 2744
},
{
"epoch": 2.414448669201521,
"grad_norm": 0.2067313139807621,
"learning_rate": 3.317264724118399e-05,
"loss": 0.151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07771243900060654,
"step": 635,
"valid_targets_mean": 5317.8,
"valid_targets_min": 2213
},
{
"epoch": 2.4334600760456273,
"grad_norm": 0.24438817055982565,
"learning_rate": 3.3029306895919056e-05,
"loss": 0.1468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07272510975599289,
"step": 640,
"valid_targets_mean": 4976.9,
"valid_targets_min": 1959
},
{
"epoch": 2.4524714828897336,
"grad_norm": 0.21947954421689864,
"learning_rate": 3.288479425502273e-05,
"loss": 0.1549,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08339103311300278,
"step": 645,
"valid_targets_mean": 5133.1,
"valid_targets_min": 2289
},
{
"epoch": 2.4714828897338403,
"grad_norm": 0.2361515652158053,
"learning_rate": 3.2739122320839567e-05,
"loss": 0.1514,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07445741444826126,
"step": 650,
"valid_targets_mean": 4922.0,
"valid_targets_min": 1899
},
{
"epoch": 2.4904942965779466,
"grad_norm": 0.4845308707427055,
"learning_rate": 3.25923042000201e-05,
"loss": 0.2337,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11655815690755844,
"step": 655,
"valid_targets_mean": 3755.8,
"valid_targets_min": 1436
},
{
"epoch": 2.5095057034220534,
"grad_norm": 0.5041883033987095,
"learning_rate": 3.244435310234156e-05,
"loss": 0.2444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12083180993795395,
"step": 660,
"valid_targets_mean": 4149.8,
"valid_targets_min": 2001
},
{
"epoch": 2.5285171102661597,
"grad_norm": 0.4293037233161345,
"learning_rate": 3.229528233951935e-05,
"loss": 0.2334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12094869464635849,
"step": 665,
"valid_targets_mean": 4153.3,
"valid_targets_min": 1894
},
{
"epoch": 2.5475285171102664,
"grad_norm": 0.4341690333179762,
"learning_rate": 3.214510532400939e-05,
"loss": 0.2389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1278359293937683,
"step": 670,
"valid_targets_mean": 4260.3,
"valid_targets_min": 1553
},
{
"epoch": 2.5665399239543727,
"grad_norm": 0.4497609978338215,
"learning_rate": 3.1993835567801266e-05,
"loss": 0.2236,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1090053841471672,
"step": 675,
"valid_targets_mean": 3701.6,
"valid_targets_min": 1071
},
{
"epoch": 2.585551330798479,
"grad_norm": 0.36831043021057513,
"learning_rate": 3.184148668120253e-05,
"loss": 0.2238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10701311379671097,
"step": 680,
"valid_targets_mean": 3952.4,
"valid_targets_min": 1854
},
{
"epoch": 2.6045627376425857,
"grad_norm": 0.40504235757567003,
"learning_rate": 3.16880723716142e-05,
"loss": 0.256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1709039807319641,
"step": 685,
"valid_targets_mean": 7155.4,
"valid_targets_min": 483
},
{
"epoch": 2.623574144486692,
"grad_norm": 0.3013409218707254,
"learning_rate": 3.153360644229735e-05,
"loss": 0.3231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16763341426849365,
"step": 690,
"valid_targets_mean": 7369.4,
"valid_targets_min": 631
},
{
"epoch": 2.6425855513307983,
"grad_norm": 0.26444934651735047,
"learning_rate": 3.137810279113125e-05,
"loss": 0.3117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15889973938465118,
"step": 695,
"valid_targets_mean": 7592.1,
"valid_targets_min": 806
},
{
"epoch": 2.661596958174905,
"grad_norm": 0.22352602705602617,
"learning_rate": 3.122157540936288e-05,
"loss": 0.3003,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15156026184558868,
"step": 700,
"valid_targets_mean": 7414.5,
"valid_targets_min": 724
},
{
"epoch": 2.6806083650190113,
"grad_norm": 0.24463272584732818,
"learning_rate": 3.106403838034815e-05,
"loss": 0.296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15007655322551727,
"step": 705,
"valid_targets_mean": 7829.4,
"valid_targets_min": 495
},
{
"epoch": 2.6996197718631176,
"grad_norm": 0.21946530753675053,
"learning_rate": 3.090550587828466e-05,
"loss": 0.2931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14451086521148682,
"step": 710,
"valid_targets_mean": 7851.2,
"valid_targets_min": 660
},
{
"epoch": 2.7186311787072244,
"grad_norm": 0.22157971443490984,
"learning_rate": 3.0745992166936484e-05,
"loss": 0.2986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14042143523693085,
"step": 715,
"valid_targets_mean": 6916.8,
"valid_targets_min": 499
},
{
"epoch": 2.7376425855513307,
"grad_norm": 0.21721619776557882,
"learning_rate": 3.058551159835078e-05,
"loss": 0.2942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15121476352214813,
"step": 720,
"valid_targets_mean": 7713.7,
"valid_targets_min": 679
},
{
"epoch": 2.7566539923954374,
"grad_norm": 0.2516514737751596,
"learning_rate": 3.0424078611566484e-05,
"loss": 0.2884,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15188945829868317,
"step": 725,
"valid_targets_mean": 7996.9,
"valid_targets_min": 739
},
{
"epoch": 2.7756653992395437,
"grad_norm": 0.2378457691057772,
"learning_rate": 3.026170773131516e-05,
"loss": 0.2947,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13439372181892395,
"step": 730,
"valid_targets_mean": 7238.1,
"valid_targets_min": 604
},
{
"epoch": 2.7946768060836504,
"grad_norm": 0.25213487345273705,
"learning_rate": 3.0098413566714165e-05,
"loss": 0.2846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14057187736034393,
"step": 735,
"valid_targets_mean": 7777.8,
"valid_targets_min": 678
},
{
"epoch": 2.8136882129277567,
"grad_norm": 0.24989480394630467,
"learning_rate": 2.9934210809952216e-05,
"loss": 0.2831,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12304911762475967,
"step": 740,
"valid_targets_mean": 6190.7,
"valid_targets_min": 755
},
{
"epoch": 2.832699619771863,
"grad_norm": 0.2689521231941662,
"learning_rate": 2.9769114234967486e-05,
"loss": 0.2805,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12372348457574844,
"step": 745,
"valid_targets_mean": 6905.2,
"valid_targets_min": 538
},
{
"epoch": 2.8517110266159698,
"grad_norm": 0.20927078109216926,
"learning_rate": 2.9603138696118315e-05,
"loss": 0.2343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07049879431724548,
"step": 750,
"valid_targets_mean": 5756.4,
"valid_targets_min": 1271
},
{
"epoch": 2.870722433460076,
"grad_norm": 0.2675987794700805,
"learning_rate": 2.9436299126846726e-05,
"loss": 0.1416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07208818197250366,
"step": 755,
"valid_targets_mean": 6551.0,
"valid_targets_min": 1261
},
{
"epoch": 2.8897338403041823,
"grad_norm": 0.2651422160068339,
"learning_rate": 2.92686105383348e-05,
"loss": 0.1403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06887445598840714,
"step": 760,
"valid_targets_mean": 5875.1,
"valid_targets_min": 1343
},
{
"epoch": 2.908745247148289,
"grad_norm": 0.28796721511055345,
"learning_rate": 2.910008801815406e-05,
"loss": 0.1355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06859635561704636,
"step": 765,
"valid_targets_mean": 6292.9,
"valid_targets_min": 1329
},
{
"epoch": 2.9277566539923954,
"grad_norm": 0.23162244917131206,
"learning_rate": 2.8930746728908002e-05,
"loss": 0.1361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07203914225101471,
"step": 770,
"valid_targets_mean": 6265.1,
"valid_targets_min": 1092
},
{
"epoch": 2.9467680608365017,
"grad_norm": 0.22762426423237933,
"learning_rate": 2.876060190686784e-05,
"loss": 0.1368,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06779056042432785,
"step": 775,
"valid_targets_mean": 6037.4,
"valid_targets_min": 1063
},
{
"epoch": 2.9657794676806084,
"grad_norm": 0.2342043607080278,
"learning_rate": 2.8589668860601643e-05,
"loss": 0.1331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06987350434064865,
"step": 780,
"valid_targets_mean": 6279.7,
"valid_targets_min": 1052
},
{
"epoch": 2.9847908745247147,
"grad_norm": 0.23814802337043967,
"learning_rate": 2.8417962969596976e-05,
"loss": 0.1349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07049954682588577,
"step": 785,
"valid_targets_mean": 6157.9,
"valid_targets_min": 787
},
{
"epoch": 3.0038022813688214,
"grad_norm": 0.3462432064505725,
"learning_rate": 2.8245499682877152e-05,
"loss": 0.1522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11840838193893433,
"step": 790,
"valid_targets_mean": 4474.1,
"valid_targets_min": 2202
},
{
"epoch": 3.0228136882129277,
"grad_norm": 0.387684012161189,
"learning_rate": 2.8072294517611208e-05,
"loss": 0.2226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11450818926095963,
"step": 795,
"valid_targets_mean": 4430.6,
"valid_targets_min": 1899
},
{
"epoch": 3.041825095057034,
"grad_norm": 0.4289500795878552,
"learning_rate": 2.7898363057717786e-05,
"loss": 0.2082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10947731137275696,
"step": 800,
"valid_targets_mean": 4507.6,
"valid_targets_min": 2374
},
{
"epoch": 3.0608365019011408,
"grad_norm": 0.38143942347647636,
"learning_rate": 2.772372095246297e-05,
"loss": 0.2094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10378646850585938,
"step": 805,
"valid_targets_mean": 4175.6,
"valid_targets_min": 1694
},
{
"epoch": 3.079847908745247,
"grad_norm": 0.35205316942486764,
"learning_rate": 2.7548383915052287e-05,
"loss": 0.2033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09711352735757828,
"step": 810,
"valid_targets_mean": 4604.5,
"valid_targets_min": 2300
},
{
"epoch": 3.098859315589354,
"grad_norm": 0.34334350946854836,
"learning_rate": 2.7372367721216915e-05,
"loss": 0.2016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10052075982093811,
"step": 815,
"valid_targets_mean": 4258.6,
"valid_targets_min": 1637
},
{
"epoch": 3.11787072243346,
"grad_norm": 0.31906834116059535,
"learning_rate": 2.7195688207794277e-05,
"loss": 0.1994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09429528564214706,
"step": 820,
"valid_targets_mean": 4176.6,
"valid_targets_min": 2221
},
{
"epoch": 3.1368821292775664,
"grad_norm": 0.33079871629370033,
"learning_rate": 2.701836127130314e-05,
"loss": 0.1948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0990242138504982,
"step": 825,
"valid_targets_mean": 4614.5,
"valid_targets_min": 1976
},
{
"epoch": 3.155893536121673,
"grad_norm": 0.31986706842847734,
"learning_rate": 2.684040286651338e-05,
"loss": 0.196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09625053405761719,
"step": 830,
"valid_targets_mean": 4079.5,
"valid_targets_min": 1191
},
{
"epoch": 3.1749049429657794,
"grad_norm": 0.32660199144818897,
"learning_rate": 2.666182900501042e-05,
"loss": 0.1909,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09521114081144333,
"step": 835,
"valid_targets_mean": 3921.7,
"valid_targets_min": 1790
},
{
"epoch": 3.1939163498098857,
"grad_norm": 0.2789356519847509,
"learning_rate": 2.6482655753754657e-05,
"loss": 0.1535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07062631845474243,
"step": 840,
"valid_targets_mean": 5405.6,
"valid_targets_min": 1854
},
{
"epoch": 3.2129277566539924,
"grad_norm": 0.2638806676304068,
"learning_rate": 2.6302899233635803e-05,
"loss": 0.141,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07542570680379868,
"step": 845,
"valid_targets_mean": 5283.7,
"valid_targets_min": 2215
},
{
"epoch": 3.2319391634980987,
"grad_norm": 0.26528690947633593,
"learning_rate": 2.6122575618022487e-05,
"loss": 0.1271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0592070035636425,
"step": 850,
"valid_targets_mean": 5144.5,
"valid_targets_min": 2072
},
{
"epoch": 3.2509505703422055,
"grad_norm": 0.2360500251447195,
"learning_rate": 2.594170113130703e-05,
"loss": 0.1289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06634698063135147,
"step": 855,
"valid_targets_mean": 5086.4,
"valid_targets_min": 3113
},
{
"epoch": 3.2699619771863118,
"grad_norm": 0.2257716074443295,
"learning_rate": 2.57602920474457e-05,
"loss": 0.1258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06248977407813072,
"step": 860,
"valid_targets_mean": 4885.1,
"valid_targets_min": 1735
},
{
"epoch": 3.288973384030418,
"grad_norm": 0.23619639963838657,
"learning_rate": 2.5578364688494475e-05,
"loss": 0.1223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05550096556544304,
"step": 865,
"valid_targets_mean": 4916.5,
"valid_targets_min": 1659
},
{
"epoch": 3.307984790874525,
"grad_norm": 0.2255346409587296,
"learning_rate": 2.5395935423140487e-05,
"loss": 0.1228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0695202425122261,
"step": 870,
"valid_targets_mean": 5242.5,
"valid_targets_min": 2397
},
{
"epoch": 3.326996197718631,
"grad_norm": 0.20444394075732844,
"learning_rate": 2.5213020665229274e-05,
"loss": 0.1154,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.055293649435043335,
"step": 875,
"valid_targets_mean": 4999.2,
"valid_targets_min": 1210
},
{
"epoch": 3.346007604562738,
"grad_norm": 0.23554324333284074,
"learning_rate": 2.5029636872287953e-05,
"loss": 0.1168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.060592859983444214,
"step": 880,
"valid_targets_mean": 5145.7,
"valid_targets_min": 1139
},
{
"epoch": 3.365019011406844,
"grad_norm": 0.24624770501083576,
"learning_rate": 2.4845800544044483e-05,
"loss": 0.116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05072854831814766,
"step": 885,
"valid_targets_mean": 5084.9,
"valid_targets_min": 1197
},
{
"epoch": 3.3840304182509504,
"grad_norm": 0.2386065391922119,
"learning_rate": 2.4661528220943134e-05,
"loss": 0.1153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0552067756652832,
"step": 890,
"valid_targets_mean": 5369.3,
"valid_targets_min": 2995
},
{
"epoch": 3.403041825095057,
"grad_norm": 0.23227509902254379,
"learning_rate": 2.4476836482656257e-05,
"loss": 0.1142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.052853602916002274,
"step": 895,
"valid_targets_mean": 5159.8,
"valid_targets_min": 1840
},
{
"epoch": 3.4220532319391634,
"grad_norm": 0.24229052309272628,
"learning_rate": 2.4291741946592575e-05,
"loss": 0.1113,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05704169347882271,
"step": 900,
"valid_targets_mean": 4962.0,
"valid_targets_min": 1021
},
{
"epoch": 3.4410646387832697,
"grad_norm": 0.2474659051806386,
"learning_rate": 2.4106261266402023e-05,
"loss": 0.1084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05357471480965614,
"step": 905,
"valid_targets_mean": 5149.4,
"valid_targets_min": 1377
},
{
"epoch": 3.4600760456273765,
"grad_norm": 0.2350749738396892,
"learning_rate": 2.392041113047737e-05,
"loss": 0.1169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05722544714808464,
"step": 910,
"valid_targets_mean": 4918.9,
"valid_targets_min": 1444
},
{
"epoch": 3.4790874524714828,
"grad_norm": 0.4541902536996299,
"learning_rate": 2.3734208260452727e-05,
"loss": 0.1288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08960682153701782,
"step": 915,
"valid_targets_mean": 4053.5,
"valid_targets_min": 2113
},
{
"epoch": 3.4980988593155895,
"grad_norm": 0.5243330478051801,
"learning_rate": 2.354766940969899e-05,
"loss": 0.1682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09234726428985596,
"step": 920,
"valid_targets_mean": 4056.7,
"valid_targets_min": 1784
},
{
"epoch": 3.517110266159696,
"grad_norm": 0.6150184733706467,
"learning_rate": 2.3360811361816525e-05,
"loss": 0.1757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07612651586532593,
"step": 925,
"valid_targets_mean": 3331.2,
"valid_targets_min": 1107
},
{
"epoch": 3.5361216730038025,
"grad_norm": 0.5165218060285858,
"learning_rate": 2.317365092912503e-05,
"loss": 0.1707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08710366487503052,
"step": 930,
"valid_targets_mean": 4188.5,
"valid_targets_min": 1299
},
{
"epoch": 3.555133079847909,
"grad_norm": 0.4638634963858916,
"learning_rate": 2.2986204951150926e-05,
"loss": 0.1692,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07611557096242905,
"step": 935,
"valid_targets_mean": 3755.5,
"valid_targets_min": 1590
},
{
"epoch": 3.574144486692015,
"grad_norm": 0.4507258054785201,
"learning_rate": 2.2798490293112216e-05,
"loss": 0.1581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08385371416807175,
"step": 940,
"valid_targets_mean": 3787.1,
"valid_targets_min": 1520
},
{
"epoch": 3.593155893536122,
"grad_norm": 0.4753708125962138,
"learning_rate": 2.261052384440104e-05,
"loss": 0.1576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08039424568414688,
"step": 945,
"valid_targets_mean": 3581.0,
"valid_targets_min": 2196
},
{
"epoch": 3.612167300380228,
"grad_norm": 0.30946760254853556,
"learning_rate": 2.2422322517064084e-05,
"loss": 0.2454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1446402221918106,
"step": 950,
"valid_targets_mean": 7765.3,
"valid_targets_min": 434
},
{
"epoch": 3.6311787072243344,
"grad_norm": 0.26912647329408734,
"learning_rate": 2.2233903244280977e-05,
"loss": 0.2918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1367168426513672,
"step": 955,
"valid_targets_mean": 7428.0,
"valid_targets_min": 583
},
{
"epoch": 3.650190114068441,
"grad_norm": 0.25673057743212085,
"learning_rate": 2.2045282978840684e-05,
"loss": 0.2806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14741817116737366,
"step": 960,
"valid_targets_mean": 7663.1,
"valid_targets_min": 667
},
{
"epoch": 3.6692015209125475,
"grad_norm": 0.24695457622644892,
"learning_rate": 2.1856478691616262e-05,
"loss": 0.2674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12863211333751678,
"step": 965,
"valid_targets_mean": 7248.7,
"valid_targets_min": 620
},
{
"epoch": 3.6882129277566538,
"grad_norm": 0.24848314595919005,
"learning_rate": 2.166750737003787e-05,
"loss": 0.2568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12329714745283127,
"step": 970,
"valid_targets_mean": 7425.4,
"valid_targets_min": 622
},
{
"epoch": 3.7072243346007605,
"grad_norm": 0.2382432971462893,
"learning_rate": 2.1478386016564406e-05,
"loss": 0.2603,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11289852857589722,
"step": 975,
"valid_targets_mean": 6589.8,
"valid_targets_min": 705
},
{
"epoch": 3.726235741444867,
"grad_norm": 0.2471516002178032,
"learning_rate": 2.1289131647153664e-05,
"loss": 0.2567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12679801881313324,
"step": 980,
"valid_targets_mean": 7816.3,
"valid_targets_min": 471
},
{
"epoch": 3.7452471482889735,
"grad_norm": 0.24941306933409185,
"learning_rate": 2.109976128973141e-05,
"loss": 0.2557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13337580859661102,
"step": 985,
"valid_targets_mean": 8164.5,
"valid_targets_min": 752
},
{
"epoch": 3.76425855513308,
"grad_norm": 0.29226310695744656,
"learning_rate": 2.0910291982659277e-05,
"loss": 0.2559,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12007047981023788,
"step": 990,
"valid_targets_mean": 7631.5,
"valid_targets_min": 575
},
{
"epoch": 3.7832699619771866,
"grad_norm": 0.28943535096600376,
"learning_rate": 2.072074077320177e-05,
"loss": 0.2475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12613262236118317,
"step": 995,
"valid_targets_mean": 7756.3,
"valid_targets_min": 590
},
{
"epoch": 3.802281368821293,
"grad_norm": 0.30211188723707383,
"learning_rate": 2.053112471599245e-05,
"loss": 0.2459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1403399258852005,
"step": 1000,
"valid_targets_mean": 8261.5,
"valid_targets_min": 575
},
{
"epoch": 3.821292775665399,
"grad_norm": 0.30427872241401077,
"learning_rate": 2.03414608714995e-05,
"loss": 0.2371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1107184886932373,
"step": 1005,
"valid_targets_mean": 7423.7,
"valid_targets_min": 632
},
{
"epoch": 3.840304182509506,
"grad_norm": 0.28349897038009053,
"learning_rate": 2.0151766304490668e-05,
"loss": 0.2374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12353726476430893,
"step": 1010,
"valid_targets_mean": 7188.3,
"valid_targets_min": 537
},
{
"epoch": 3.859315589353612,
"grad_norm": 0.29812579187843263,
"learning_rate": 1.9962058082497944e-05,
"loss": 0.1475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.055223409086465836,
"step": 1015,
"valid_targets_mean": 6371.1,
"valid_targets_min": 1436
},
{
"epoch": 3.8783269961977185,
"grad_norm": 0.3305078447005314,
"learning_rate": 1.9772353274281918e-05,
"loss": 0.1138,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05937516316771507,
"step": 1020,
"valid_targets_mean": 6149.8,
"valid_targets_min": 990
},
{
"epoch": 3.897338403041825,
"grad_norm": 0.26960355210295867,
"learning_rate": 1.9582668948295998e-05,
"loss": 0.11,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.054687947034835815,
"step": 1025,
"valid_targets_mean": 6012.3,
"valid_targets_min": 1088
},
{
"epoch": 3.9163498098859315,
"grad_norm": 0.23253306165339072,
"learning_rate": 1.9393022171150755e-05,
"loss": 0.1063,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05543830618262291,
"step": 1030,
"valid_targets_mean": 6088.7,
"valid_targets_min": 1120
},
{
"epoch": 3.935361216730038,
"grad_norm": 0.2363002943662766,
"learning_rate": 1.9203430006078348e-05,
"loss": 0.1084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.054433271288871765,
"step": 1035,
"valid_targets_mean": 6360.9,
"valid_targets_min": 1028
},
{
"epoch": 3.9543726235741445,
"grad_norm": 0.23732378066315013,
"learning_rate": 1.9013909511397262e-05,
"loss": 0.1071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05254880711436272,
"step": 1040,
"valid_targets_mean": 6166.2,
"valid_targets_min": 1260
},
{
"epoch": 3.973384030418251,
"grad_norm": 0.23101729553624345,
"learning_rate": 1.882447773897755e-05,
"loss": 0.1058,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.054109666496515274,
"step": 1045,
"valid_targets_mean": 5871.1,
"valid_targets_min": 1075
},
{
"epoch": 3.9923954372623576,
"grad_norm": 0.24036940107376528,
"learning_rate": 1.8635151732706586e-05,
"loss": 0.1062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.057960644364356995,
"step": 1050,
"valid_targets_mean": 6057.0,
"valid_targets_min": 902
},
{
"epoch": 4.011406844106464,
"grad_norm": 0.3548766744558099,
"learning_rate": 1.8445948526955555e-05,
"loss": 0.1426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07553637772798538,
"step": 1055,
"valid_targets_mean": 4033.6,
"valid_targets_min": 1864
},
{
"epoch": 4.030418250950571,
"grad_norm": 0.43397890059403704,
"learning_rate": 1.8256885145046837e-05,
"loss": 0.1599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08146659284830093,
"step": 1060,
"valid_targets_mean": 4246.9,
"valid_targets_min": 1518
},
{
"epoch": 4.0494296577946765,
"grad_norm": 0.43481649208518697,
"learning_rate": 1.8067978597722325e-05,
"loss": 0.1487,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0713481679558754,
"step": 1065,
"valid_targets_mean": 4028.1,
"valid_targets_min": 1955
},
{
"epoch": 4.068441064638783,
"grad_norm": 0.42609901166474995,
"learning_rate": 1.787924588161291e-05,
"loss": 0.1531,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0739174485206604,
"step": 1070,
"valid_targets_mean": 4385.1,
"valid_targets_min": 1335
},
{
"epoch": 4.08745247148289,
"grad_norm": 0.3613193797257685,
"learning_rate": 1.7690703977709248e-05,
"loss": 0.1458,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06789538264274597,
"step": 1075,
"valid_targets_mean": 4030.6,
"valid_targets_min": 1185
},
{
"epoch": 4.106463878326996,
"grad_norm": 0.36319797246159663,
"learning_rate": 1.7502369849833908e-05,
"loss": 0.1458,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07442685216665268,
"step": 1080,
"valid_targets_mean": 4733.2,
"valid_targets_min": 2193
},
{
"epoch": 4.1254752851711025,
"grad_norm": 0.33464784259493086,
"learning_rate": 1.7314260443115046e-05,
"loss": 0.1421,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06767291575670242,
"step": 1085,
"valid_targets_mean": 4051.0,
"valid_targets_min": 1738
},
{
"epoch": 4.144486692015209,
"grad_norm": 0.3452301476773902,
"learning_rate": 1.712639268246184e-05,
"loss": 0.1363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06405071169137955,
"step": 1090,
"valid_targets_mean": 4000.5,
"valid_targets_min": 1674
},
{
"epoch": 4.163498098859316,
"grad_norm": 0.3443207863514706,
"learning_rate": 1.6938783471041647e-05,
"loss": 0.1378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06821317225694656,
"step": 1095,
"valid_targets_mean": 4199.9,
"valid_targets_min": 1924
},
{
"epoch": 4.182509505703422,
"grad_norm": 0.3598105706642877,
"learning_rate": 1.6751449688759194e-05,
"loss": 0.1315,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05240233615040779,
"step": 1100,
"valid_targets_mean": 5206.1,
"valid_targets_min": 2822
},
{
"epoch": 4.201520912547529,
"grad_norm": 0.3092390079700352,
"learning_rate": 1.65644081907378e-05,
"loss": 0.1107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.054483313113451004,
"step": 1105,
"valid_targets_mean": 5036.4,
"valid_targets_min": 1421
},
{
"epoch": 4.220532319391635,
"grad_norm": 0.2763575400218225,
"learning_rate": 1.6377675805802882e-05,
"loss": 0.1094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048552319407463074,
"step": 1110,
"valid_targets_mean": 5392.4,
"valid_targets_min": 1778
},
{
"epoch": 4.239543726235741,
"grad_norm": 0.28272183808385165,
"learning_rate": 1.6191269334967796e-05,
"loss": 0.1004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05730822682380676,
"step": 1115,
"valid_targets_mean": 5387.4,
"valid_targets_min": 2268
},
{
"epoch": 4.258555133079848,
"grad_norm": 0.25657261472183496,
"learning_rate": 1.6005205549922173e-05,
"loss": 0.0997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.047337133437395096,
"step": 1120,
"valid_targets_mean": 5138.0,
"valid_targets_min": 1548
},
{
"epoch": 4.277566539923955,
"grad_norm": 0.25086844602226965,
"learning_rate": 1.5819501191522917e-05,
"loss": 0.0918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04738449677824974,
"step": 1125,
"valid_targets_mean": 4930.4,
"valid_targets_min": 2597
},
{
"epoch": 4.2965779467680605,
"grad_norm": 0.23414973432884928,
"learning_rate": 1.5634172968287974e-05,
"loss": 0.0916,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.039102550595998764,
"step": 1130,
"valid_targets_mean": 4864.0,
"valid_targets_min": 2904
},
{
"epoch": 4.315589353612167,
"grad_norm": 0.22679376081285432,
"learning_rate": 1.5449237554892997e-05,
"loss": 0.0879,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.046861957758665085,
"step": 1135,
"valid_targets_mean": 5411.9,
"valid_targets_min": 2204
},
{
"epoch": 4.334600760456274,
"grad_norm": 0.27215497448339554,
"learning_rate": 1.5264711590671067e-05,
"loss": 0.0864,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04249775409698486,
"step": 1140,
"valid_targets_mean": 5154.8,
"valid_targets_min": 1423
},
{
"epoch": 4.35361216730038,
"grad_norm": 0.23781571763344966,
"learning_rate": 1.5080611678115585e-05,
"loss": 0.0855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04478433355689049,
"step": 1145,
"valid_targets_mean": 5145.1,
"valid_targets_min": 3055
},
{
"epoch": 4.3726235741444865,
"grad_norm": 0.25822483894259435,
"learning_rate": 1.4896954381386477e-05,
"loss": 0.0844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.043877530843019485,
"step": 1150,
"valid_targets_mean": 5636.9,
"valid_targets_min": 2620
},
{
"epoch": 4.391634980988593,
"grad_norm": 0.26714599071066003,
"learning_rate": 1.4713756224819872e-05,
"loss": 0.0829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04205765202641487,
"step": 1155,
"valid_targets_mean": 5523.9,
"valid_targets_min": 3190
},
{
"epoch": 4.4106463878327,
"grad_norm": 0.23416180161057334,
"learning_rate": 1.453103369144134e-05,
"loss": 0.0807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03860076516866684,
"step": 1160,
"valid_targets_mean": 4908.3,
"valid_targets_min": 2088
},
{
"epoch": 4.429657794676806,
"grad_norm": 0.24092755205006228,
"learning_rate": 1.4348803221482828e-05,
"loss": 0.0797,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03292986378073692,
"step": 1165,
"valid_targets_mean": 5170.9,
"valid_targets_min": 2206
},
{
"epoch": 4.448669201520913,
"grad_norm": 0.2769123972364188,
"learning_rate": 1.4167081210903501e-05,
"loss": 0.0819,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04898636043071747,
"step": 1170,
"valid_targets_mean": 5348.1,
"valid_targets_min": 2959
},
{
"epoch": 4.467680608365019,
"grad_norm": 0.2687335450112676,
"learning_rate": 1.3985884009914542e-05,
"loss": 0.0843,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04135872796177864,
"step": 1175,
"valid_targets_mean": 5146.4,
"valid_targets_min": 1864
},
{
"epoch": 4.486692015209125,
"grad_norm": 0.44423160941024686,
"learning_rate": 1.3805227921508018e-05,
"loss": 0.1042,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05701296404004097,
"step": 1180,
"valid_targets_mean": 3920.1,
"valid_targets_min": 2054
},
{
"epoch": 4.505703422053232,
"grad_norm": 0.4307059905800158,
"learning_rate": 1.3625129199990083e-05,
"loss": 0.1137,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06144963577389717,
"step": 1185,
"valid_targets_mean": 4123.0,
"valid_targets_min": 1702
},
{
"epoch": 4.524714828897339,
"grad_norm": 0.4735169631189255,
"learning_rate": 1.3445604049518503e-05,
"loss": 0.1136,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.058991312980651855,
"step": 1190,
"valid_targets_mean": 4134.7,
"valid_targets_min": 1894
},
{
"epoch": 4.5437262357414445,
"grad_norm": 0.47226895596823215,
"learning_rate": 1.3266668622644696e-05,
"loss": 0.1083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05097019299864769,
"step": 1195,
"valid_targets_mean": 3485.5,
"valid_targets_min": 1581
},
{
"epoch": 4.562737642585551,
"grad_norm": 0.4233485609105754,
"learning_rate": 1.3088339018860439e-05,
"loss": 0.1067,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05011675879359245,
"step": 1200,
"valid_targets_mean": 3915.2,
"valid_targets_min": 1647
},
{
"epoch": 4.581749049429658,
"grad_norm": 0.39098121916184353,
"learning_rate": 1.291063128314934e-05,
"loss": 0.0977,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04419676959514618,
"step": 1205,
"valid_targets_mean": 3678.2,
"valid_targets_min": 1771
},
{
"epoch": 4.600760456273765,
"grad_norm": 0.555201081378785,
"learning_rate": 1.2733561404543177e-05,
"loss": 0.1111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12111985683441162,
"step": 1210,
"valid_targets_mean": 6885.0,
"valid_targets_min": 675
},
{
"epoch": 4.619771863117871,
"grad_norm": 0.36519466564325903,
"learning_rate": 1.2557145314683364e-05,
"loss": 0.2596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12528151273727417,
"step": 1215,
"valid_targets_mean": 7251.2,
"valid_targets_min": 511
},
{
"epoch": 4.638783269961977,
"grad_norm": 0.3187163291454204,
"learning_rate": 1.2381398886387466e-05,
"loss": 0.2636,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13393153250217438,
"step": 1220,
"valid_targets_mean": 7708.2,
"valid_targets_min": 667
},
{
"epoch": 4.657794676806084,
"grad_norm": 0.2853507467833514,
"learning_rate": 1.2206337932221094e-05,
"loss": 0.2495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11738086491823196,
"step": 1225,
"valid_targets_mean": 6611.8,
"valid_targets_min": 377
},
{
"epoch": 4.67680608365019,
"grad_norm": 0.27103089684314385,
"learning_rate": 1.2031978203075172e-05,
"loss": 0.2356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1208362951874733,
"step": 1230,
"valid_targets_mean": 7745.3,
"valid_targets_min": 726
},
{
"epoch": 4.695817490494297,
"grad_norm": 0.26218276824066433,
"learning_rate": 1.185833538674879e-05,
"loss": 0.2291,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11319077014923096,
"step": 1235,
"valid_targets_mean": 7991.2,
"valid_targets_min": 647
},
{
"epoch": 4.714828897338403,
"grad_norm": 0.2604914676003978,
"learning_rate": 1.1685425106537688e-05,
"loss": 0.2306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12409955263137817,
"step": 1240,
"valid_targets_mean": 8260.1,
"valid_targets_min": 762
},
{
"epoch": 4.733840304182509,
"grad_norm": 0.3533617817645549,
"learning_rate": 1.1513262919828603e-05,
"loss": 0.2205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12882865965366364,
"step": 1245,
"valid_targets_mean": 9195.9,
"valid_targets_min": 815
},
{
"epoch": 4.752851711026616,
"grad_norm": 0.273675157838543,
"learning_rate": 1.1341864316699463e-05,
"loss": 0.2165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10350463539361954,
"step": 1250,
"valid_targets_mean": 6803.8,
"valid_targets_min": 553
},
{
"epoch": 4.771863117870723,
"grad_norm": 0.3301627103857975,
"learning_rate": 1.1171244718525726e-05,
"loss": 0.2198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09687048941850662,
"step": 1255,
"valid_targets_mean": 7715.6,
"valid_targets_min": 686
},
{
"epoch": 4.7908745247148286,
"grad_norm": 0.3183754012435459,
"learning_rate": 1.100141947659288e-05,
"loss": 0.2085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11653482168912888,
"step": 1260,
"valid_targets_mean": 7937.8,
"valid_targets_min": 657
},
{
"epoch": 4.809885931558935,
"grad_norm": 0.305218952013167,
"learning_rate": 1.0832403870715153e-05,
"loss": 0.2085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10665573924779892,
"step": 1265,
"valid_targets_mean": 7325.4,
"valid_targets_min": 636
},
{
"epoch": 4.828897338403042,
"grad_norm": 0.37392762584119593,
"learning_rate": 1.0664213107860827e-05,
"loss": 0.2021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10478193312883377,
"step": 1270,
"valid_targets_mean": 7820.8,
"valid_targets_min": 527
},
{
"epoch": 4.847908745247148,
"grad_norm": 0.28454754566571105,
"learning_rate": 1.0496862320783926e-05,
"loss": 0.1835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048708055168390274,
"step": 1275,
"valid_targets_mean": 6358.3,
"valid_targets_min": 1151
},
{
"epoch": 4.866920152091255,
"grad_norm": 0.2808196037434785,
"learning_rate": 1.033036656666272e-05,
"loss": 0.0922,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04468848183751106,
"step": 1280,
"valid_targets_mean": 6175.8,
"valid_targets_min": 1314
},
{
"epoch": 4.885931558935361,
"grad_norm": 0.26500582432214914,
"learning_rate": 1.016474082574495e-05,
"loss": 0.0927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04400516673922539,
"step": 1285,
"valid_targets_mean": 5880.3,
"valid_targets_min": 1151
},
{
"epoch": 4.904942965779467,
"grad_norm": 0.24664528337491692,
"learning_rate": 1.0000000000000006e-05,
"loss": 0.0871,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03696668520569801,
"step": 1290,
"valid_targets_mean": 5792.0,
"valid_targets_min": 1054
},
{
"epoch": 4.923954372623574,
"grad_norm": 0.2286048807471563,
"learning_rate": 9.836158911778132e-06,
"loss": 0.0859,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04410147666931152,
"step": 1295,
"valid_targets_mean": 6356.2,
"valid_targets_min": 980
},
{
"epoch": 4.942965779467681,
"grad_norm": 0.20614916293927305,
"learning_rate": 9.673232302476819e-06,
"loss": 0.0837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.039480239152908325,
"step": 1300,
"valid_targets_mean": 5821.0,
"valid_targets_min": 905
},
{
"epoch": 4.961977186311787,
"grad_norm": 0.1928349932521555,
"learning_rate": 9.511234831214464e-06,
"loss": 0.0836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03988807275891304,
"step": 1305,
"valid_targets_mean": 5897.6,
"valid_targets_min": 1223
},
{
"epoch": 4.980988593155893,
"grad_norm": 0.18807335044788295,
"learning_rate": 9.350181073511412e-06,
"loss": 0.0814,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.039488207548856735,
"step": 1310,
"valid_targets_mean": 6550.6,
"valid_targets_min": 1179
},
{
"epoch": 5.0,
"grad_norm": 0.21533958753633334,
"learning_rate": 9.190085519978575e-06,
"loss": 0.0827,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03866899386048317,
"step": 1315,
"valid_targets_mean": 5367.0,
"valid_targets_min": 1264
},
{
"epoch": 5.019011406844107,
"grad_norm": 0.35656357662267013,
"learning_rate": 9.030962575013622e-06,
"loss": 0.119,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.058930665254592896,
"step": 1320,
"valid_targets_mean": 4315.6,
"valid_targets_min": 1507
},
{
"epoch": 5.038022813688213,
"grad_norm": 0.3993925897181664,
"learning_rate": 8.872826555505012e-06,
"loss": 0.1081,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05182621255517006,
"step": 1325,
"valid_targets_mean": 4631.1,
"valid_targets_min": 1345
},
{
"epoch": 5.057034220532319,
"grad_norm": 0.39173542137242245,
"learning_rate": 8.715691689543761e-06,
"loss": 0.1037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.049891237169504166,
"step": 1330,
"valid_targets_mean": 4061.9,
"valid_targets_min": 1929
},
{
"epoch": 5.076045627376426,
"grad_norm": 0.3346676502787186,
"learning_rate": 8.559572115143406e-06,
"loss": 0.1027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.053807903081178665,
"step": 1335,
"valid_targets_mean": 4492.1,
"valid_targets_min": 1512
},
{
"epoch": 5.095057034220532,
"grad_norm": 0.33046866192129126,
"learning_rate": 8.404481878967848e-06,
"loss": 0.0986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04590759798884392,
"step": 1340,
"valid_targets_mean": 4387.7,
"valid_targets_min": 1324
},
{
"epoch": 5.114068441064639,
"grad_norm": 0.2862525406517934,
"learning_rate": 8.250434935067593e-06,
"loss": 0.0988,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050703514367341995,
"step": 1345,
"valid_targets_mean": 4403.1,
"valid_targets_min": 2036
},
{
"epoch": 5.133079847908745,
"grad_norm": 0.2830684273551724,
"learning_rate": 8.09744514362421e-06,
"loss": 0.0936,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0482289083302021,
"step": 1350,
"valid_targets_mean": 4255.0,
"valid_targets_min": 2083
},
{
"epoch": 5.152091254752852,
"grad_norm": 0.2936949182763945,
"learning_rate": 7.945526269703295e-06,
"loss": 0.0897,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.042581137269735336,
"step": 1355,
"valid_targets_mean": 4031.0,
"valid_targets_min": 1761
},
{
"epoch": 5.171102661596958,
"grad_norm": 0.32369753015268776,
"learning_rate": 7.794691982015991e-06,
"loss": 0.0881,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04366425797343254,
"step": 1360,
"valid_targets_mean": 4213.6,
"valid_targets_min": 1827
},
{
"epoch": 5.190114068441065,
"grad_norm": 0.3876166105871689,
"learning_rate": 7.644955851689129e-06,
"loss": 0.0875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04470920190215111,
"step": 1365,
"valid_targets_mean": 5610.1,
"valid_targets_min": 2840
},
{
"epoch": 5.2091254752851714,
"grad_norm": 0.26999235527739424,
"learning_rate": 7.496331351044226e-06,
"loss": 0.0848,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04106168821454048,
"step": 1370,
"valid_targets_mean": 4945.7,
"valid_targets_min": 1853
},
{
"epoch": 5.228136882129277,
"grad_norm": 0.25109703607220174,
"learning_rate": 7.348831852385265e-06,
"loss": 0.082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03378221392631531,
"step": 1375,
"valid_targets_mean": 5207.0,
"valid_targets_min": 2341
},
{
"epoch": 5.247148288973384,
"grad_norm": 0.2350369927621042,
"learning_rate": 7.202470626795626e-06,
"loss": 0.076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03785305097699165,
"step": 1380,
"valid_targets_mean": 5175.4,
"valid_targets_min": 2093
},
{
"epoch": 5.266159695817491,
"grad_norm": 0.21697067917279458,
"learning_rate": 7.057260842943949e-06,
"loss": 0.0728,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.031559597700834274,
"step": 1385,
"valid_targets_mean": 5396.7,
"valid_targets_min": 1885
},
{
"epoch": 5.285171102661597,
"grad_norm": 0.22438779122417016,
"learning_rate": 6.9132155658993785e-06,
"loss": 0.0676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03445998206734657,
"step": 1390,
"valid_targets_mean": 5246.9,
"valid_targets_min": 2203
},
{
"epoch": 5.304182509505703,
"grad_norm": 0.2117622397099648,
"learning_rate": 6.770347755955982e-06,
"loss": 0.0668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03348778560757637,
"step": 1395,
"valid_targets_mean": 4957.9,
"valid_targets_min": 1708
},
{
"epoch": 5.32319391634981,
"grad_norm": 0.200452647832453,
"learning_rate": 6.628670267466697e-06,
"loss": 0.0626,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.028821026906371117,
"step": 1400,
"valid_targets_mean": 4951.1,
"valid_targets_min": 1826
},
{
"epoch": 5.342205323193916,
"grad_norm": 0.21955745199536397,
"learning_rate": 6.488195847686795e-06,
"loss": 0.061,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.029862603172659874,
"step": 1405,
"valid_targets_mean": 5112.8,
"valid_targets_min": 2968
},
{
"epoch": 5.361216730038023,
"grad_norm": 0.22826378564937264,
"learning_rate": 6.348937135626922e-06,
"loss": 0.0605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0312732495367527,
"step": 1410,
"valid_targets_mean": 5321.8,
"valid_targets_min": 2382
},
{
"epoch": 5.380228136882129,
"grad_norm": 0.20427947265228474,
"learning_rate": 6.210906660915938e-06,
"loss": 0.0574,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02499421499669552,
"step": 1415,
"valid_targets_mean": 4986.9,
"valid_targets_min": 2576
},
{
"epoch": 5.399239543726236,
"grad_norm": 0.22020819639088524,
"learning_rate": 6.074116842673585e-06,
"loss": 0.0583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0311068594455719,
"step": 1420,
"valid_targets_mean": 5325.8,
"valid_targets_min": 2928
},
{
"epoch": 5.418250950570342,
"grad_norm": 0.22574926260061123,
"learning_rate": 5.938579988393099e-06,
"loss": 0.0562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.029961155727505684,
"step": 1425,
"valid_targets_mean": 5161.7,
"valid_targets_min": 1862
},
{
"epoch": 5.437262357414449,
"grad_norm": 0.20400844513752284,
"learning_rate": 5.80430829283382e-06,
"loss": 0.0535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02954328991472721,
"step": 1430,
"valid_targets_mean": 4941.3,
"valid_targets_min": 1507
},
{
"epoch": 5.4562737642585555,
"grad_norm": 0.22004866648638607,
"learning_rate": 5.671313836924039e-06,
"loss": 0.0581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.029622698202729225,
"step": 1435,
"valid_targets_mean": 5260.5,
"valid_targets_min": 2625
},
{
"epoch": 5.475285171102661,
"grad_norm": 0.23172139547283307,
"learning_rate": 5.539608586673988e-06,
"loss": 0.0581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.032255351543426514,
"step": 1440,
"valid_targets_mean": 4271.5,
"valid_targets_min": 1373
},
{
"epoch": 5.494296577946768,
"grad_norm": 0.30348623919330886,
"learning_rate": 5.409204392099224e-06,
"loss": 0.071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.029614215716719627,
"step": 1445,
"valid_targets_mean": 3648.6,
"valid_targets_min": 1151
},
{
"epoch": 5.513307984790875,
"grad_norm": 0.3446203794591457,
"learning_rate": 5.280112986154462e-06,
"loss": 0.0721,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03383517637848854,
"step": 1450,
"valid_targets_mean": 3692.1,
"valid_targets_min": 1583
},
{
"epoch": 5.532319391634981,
"grad_norm": 0.304903377781796,
"learning_rate": 5.152345983677866e-06,
"loss": 0.067,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03009621985256672,
"step": 1455,
"valid_targets_mean": 4084.4,
"valid_targets_min": 1601
},
{
"epoch": 5.551330798479087,
"grad_norm": 0.2991592969123906,
"learning_rate": 5.02591488034609e-06,
"loss": 0.0671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03557578846812248,
"step": 1460,
"valid_targets_mean": 3740.4,
"valid_targets_min": 1825
},
{
"epoch": 5.570342205323194,
"grad_norm": 0.29912614724436565,
"learning_rate": 4.900831051639892e-06,
"loss": 0.0599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03164946287870407,
"step": 1465,
"valid_targets_mean": 3882.0,
"valid_targets_min": 1821
},
{
"epoch": 5.589353612167301,
"grad_norm": 0.27811420728368913,
"learning_rate": 4.777105751820708e-06,
"loss": 0.0576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03166574239730835,
"step": 1470,
"valid_targets_mean": 4136.7,
"valid_targets_min": 1743
},
{
"epoch": 5.608365019011407,
"grad_norm": 0.9103501807156557,
"learning_rate": 4.654750112918007e-06,
"loss": 0.1307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11657539755105972,
"step": 1475,
"valid_targets_mean": 7437.8,
"valid_targets_min": 591
},
{
"epoch": 5.6273764258555135,
"grad_norm": 0.5130315310560948,
"learning_rate": 4.533775143727748e-06,
"loss": 0.2505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1216835007071495,
"step": 1480,
"valid_targets_mean": 7844.2,
"valid_targets_min": 715
},
{
"epoch": 5.64638783269962,
"grad_norm": 0.38916103997249035,
"learning_rate": 4.414191728821838e-06,
"loss": 0.2382,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12430062144994736,
"step": 1485,
"valid_targets_mean": 7960.9,
"valid_targets_min": 733
},
{
"epoch": 5.665399239543726,
"grad_norm": 0.34346190346026684,
"learning_rate": 4.296010627568823e-06,
"loss": 0.2253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1269465535879135,
"step": 1490,
"valid_targets_mean": 8483.1,
"valid_targets_min": 759
},
{
"epoch": 5.684410646387833,
"grad_norm": 0.26852843419670336,
"learning_rate": 4.17924247316585e-06,
"loss": 0.2085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1000281274318695,
"step": 1495,
"valid_targets_mean": 7507.5,
"valid_targets_min": 702
},
{
"epoch": 5.7034220532319395,
"grad_norm": 0.23814374694727244,
"learning_rate": 4.0638977716819105e-06,
"loss": 0.2061,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09797212481498718,
"step": 1500,
"valid_targets_mean": 6985.9,
"valid_targets_min": 647
},
{
"epoch": 5.722433460076045,
"grad_norm": 0.26229166135458226,
"learning_rate": 3.949986901112608e-06,
"loss": 0.197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10039948672056198,
"step": 1505,
"valid_targets_mean": 7685.7,
"valid_targets_min": 592
},
{
"epoch": 5.741444866920152,
"grad_norm": 0.24951933668512646,
"learning_rate": 3.837520110446391e-06,
"loss": 0.1978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09159765392541885,
"step": 1510,
"valid_targets_mean": 7207.1,
"valid_targets_min": 682
},
{
"epoch": 5.760456273764259,
"grad_norm": 0.28127344204747545,
"learning_rate": 3.7265075187424373e-06,
"loss": 0.1895,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10092798620462418,
"step": 1515,
"valid_targets_mean": 7820.0,
"valid_targets_min": 747
},
{
"epoch": 5.779467680608365,
"grad_norm": 0.2563140724682205,
"learning_rate": 3.616959114220162e-06,
"loss": 0.1867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10091284662485123,
"step": 1520,
"valid_targets_mean": 8022.3,
"valid_targets_min": 727
},
{
"epoch": 5.798479087452471,
"grad_norm": 0.27238599596323204,
"learning_rate": 3.508884753360593e-06,
"loss": 0.1801,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08664939552545547,
"step": 1525,
"valid_targets_mean": 7562.2,
"valid_targets_min": 661
},
{
"epoch": 5.817490494296578,
"grad_norm": 0.2922682553948556,
"learning_rate": 3.402294160019499e-06,
"loss": 0.1771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09690836071968079,
"step": 1530,
"valid_targets_mean": 8467.6,
"valid_targets_min": 409
},
{
"epoch": 5.836501901140684,
"grad_norm": 0.2878635630184777,
"learning_rate": 3.2971969245525215e-06,
"loss": 0.1682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09080066531896591,
"step": 1535,
"valid_targets_mean": 7542.6,
"valid_targets_min": 781
},
{
"epoch": 5.855513307984791,
"grad_norm": 0.23862395323068222,
"learning_rate": 3.193602502952291e-06,
"loss": 0.1197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.038949862122535706,
"step": 1540,
"valid_targets_mean": 5823.9,
"valid_targets_min": 1136
},
{
"epoch": 5.8745247148288975,
"grad_norm": 0.2387967291215592,
"learning_rate": 3.0915202159976453e-06,
"loss": 0.0769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.042837340384721756,
"step": 1545,
"valid_targets_mean": 6544.8,
"valid_targets_min": 1446
},
{
"epoch": 5.893536121673003,
"grad_norm": 0.1881656636618961,
"learning_rate": 2.9909592484149795e-06,
"loss": 0.0752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0399971567094326,
"step": 1550,
"valid_targets_mean": 6424.3,
"valid_targets_min": 1291
},
{
"epoch": 5.91254752851711,
"grad_norm": 0.17058202140819237,
"learning_rate": 2.8919286480518803e-06,
"loss": 0.07,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03311106935143471,
"step": 1555,
"valid_targets_mean": 5882.0,
"valid_targets_min": 979
},
{
"epoch": 5.931558935361217,
"grad_norm": 0.1711908706695631,
"learning_rate": 2.794437325063064e-06,
"loss": 0.0703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03513427823781967,
"step": 1560,
"valid_targets_mean": 6166.6,
"valid_targets_min": 933
},
{
"epoch": 5.9505703422053235,
"grad_norm": 0.17206848728883706,
"learning_rate": 2.6984940511086665e-06,
"loss": 0.0689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03399783745408058,
"step": 1565,
"valid_targets_mean": 5971.0,
"valid_targets_min": 1196
},
{
"epoch": 5.969581749049429,
"grad_norm": 0.16216773283513025,
"learning_rate": 2.604107458565066e-06,
"loss": 0.0667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03391076996922493,
"step": 1570,
"valid_targets_mean": 6159.1,
"valid_targets_min": 1238
},
{
"epoch": 5.988593155893536,
"grad_norm": 0.16031421414918762,
"learning_rate": 2.5112860397481553e-06,
"loss": 0.0655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.032533951103687286,
"step": 1575,
"valid_targets_mean": 6163.3,
"valid_targets_min": 1300
},
{
"epoch": 6.007604562737643,
"grad_norm": 0.3155276653530433,
"learning_rate": 2.4200381461492817e-06,
"loss": 0.0711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03919023647904396,
"step": 1580,
"valid_targets_mean": 4252.4,
"valid_targets_min": 2118
},
{
"epoch": 6.026615969581749,
"grad_norm": 0.28335210258711535,
"learning_rate": 2.330371987683815e-06,
"loss": 0.0839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04079696908593178,
"step": 1585,
"valid_targets_mean": 4523.0,
"valid_targets_min": 1550
},
{
"epoch": 6.0456273764258555,
"grad_norm": 0.3167321882363807,
"learning_rate": 2.242295631952496e-06,
"loss": 0.0764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03657423332333565,
"step": 1590,
"valid_targets_mean": 4048.7,
"valid_targets_min": 2023
},
{
"epoch": 6.064638783269962,
"grad_norm": 0.278519071520387,
"learning_rate": 2.155817003515539e-06,
"loss": 0.0716,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03454483672976494,
"step": 1595,
"valid_targets_mean": 4060.6,
"valid_targets_min": 1807
},
{
"epoch": 6.083650190114068,
"grad_norm": 0.2629161013037092,
"learning_rate": 2.0709438831796303e-06,
"loss": 0.0686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03606827184557915,
"step": 1600,
"valid_targets_mean": 4122.0,
"valid_targets_min": 2089
},
{
"epoch": 6.102661596958175,
"grad_norm": 0.23665623570717814,
"learning_rate": 1.987683907297888e-06,
"loss": 0.0665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03397540748119354,
"step": 1605,
"valid_targets_mean": 4094.8,
"valid_targets_min": 1660
},
{
"epoch": 6.1216730038022815,
"grad_norm": 0.23188699309209682,
"learning_rate": 1.9060445670827477e-06,
"loss": 0.0678,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03223345801234245,
"step": 1610,
"valid_targets_mean": 3793.4,
"valid_targets_min": 2184
},
{
"epoch": 6.140684410646388,
"grad_norm": 0.22419403701656915,
"learning_rate": 1.826033207932001e-06,
"loss": 0.0619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.027402834966778755,
"step": 1615,
"valid_targets_mean": 4053.2,
"valid_targets_min": 1891
},
{
"epoch": 6.159695817490494,
"grad_norm": 0.22022828114085813,
"learning_rate": 1.7476570287678396e-06,
"loss": 0.0606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02872721664607525,
"step": 1620,
"valid_targets_mean": 4274.1,
"valid_targets_min": 1885
},
{
"epoch": 6.178707224334601,
"grad_norm": 0.21969595617322446,
"learning_rate": 1.6709230813892042e-06,
"loss": 0.0583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03280468285083771,
"step": 1625,
"valid_targets_mean": 4422.8,
"valid_targets_min": 1908
},
{
"epoch": 6.197718631178708,
"grad_norm": 0.35529496426467005,
"learning_rate": 1.5958382698372644e-06,
"loss": 0.0645,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.033318690955638885,
"step": 1630,
"valid_targets_mean": 5303.7,
"valid_targets_min": 2553
},
{
"epoch": 6.216730038022813,
"grad_norm": 0.27558919056523234,
"learning_rate": 1.5224093497742654e-06,
"loss": 0.0705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.035232577472925186,
"step": 1635,
"valid_targets_mean": 5254.6,
"valid_targets_min": 2168
},
{
"epoch": 6.23574144486692,
"grad_norm": 0.22612201515640176,
"learning_rate": 1.4506429278756672e-06,
"loss": 0.0617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.029211828485131264,
"step": 1640,
"valid_targets_mean": 4881.5,
"valid_targets_min": 1899
},
{
"epoch": 6.254752851711027,
"grad_norm": 0.20633288036705308,
"learning_rate": 1.380545461235736e-06,
"loss": 0.0607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.029766544699668884,
"step": 1645,
"valid_targets_mean": 5599.6,
"valid_targets_min": 2212
},
{
"epoch": 6.273764258555133,
"grad_norm": 0.2016643153825409,
"learning_rate": 1.3121232567865793e-06,
"loss": 0.0542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025733182206749916,
"step": 1650,
"valid_targets_mean": 4711.3,
"valid_targets_min": 1625
},
{
"epoch": 6.2927756653992395,
"grad_norm": 0.18952082398350228,
"learning_rate": 1.2453824707306628e-06,
"loss": 0.0538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.030064953491091728,
"step": 1655,
"valid_targets_mean": 5566.0,
"valid_targets_min": 1935
},
{
"epoch": 6.311787072243346,
"grad_norm": 0.16436076167259167,
"learning_rate": 1.180329107986955e-06,
"loss": 0.0504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.019878484308719635,
"step": 1660,
"valid_targets_mean": 4913.7,
"valid_targets_min": 2127
},
{
"epoch": 6.330798479087452,
"grad_norm": 0.15922497867390237,
"learning_rate": 1.1169690216505846e-06,
"loss": 0.0477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.021651925519108772,
"step": 1665,
"valid_targets_mean": 4946.9,
"valid_targets_min": 2188
},
{
"epoch": 6.349809885931559,
"grad_norm": 0.16748076773602358,
"learning_rate": 1.0553079124662768e-06,
"loss": 0.0476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02340800315141678,
"step": 1670,
"valid_targets_mean": 5133.6,
"valid_targets_min": 1787
},
{
"epoch": 6.3688212927756656,
"grad_norm": 0.16380388178076224,
"learning_rate": 9.953513283153905e-07,
"loss": 0.0454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02057066187262535,
"step": 1675,
"valid_targets_mean": 5151.9,
"valid_targets_min": 2159
},
{
"epoch": 6.387832699619771,
"grad_norm": 0.16325774776610502,
"learning_rate": 9.371046637167835e-07,
"loss": 0.0438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023373225703835487,
"step": 1680,
"valid_targets_mean": 4959.3,
"valid_targets_min": 1217
},
{
"epoch": 6.406844106463878,
"grad_norm": 0.16441190471893835,
"learning_rate": 8.805731593414268e-07,
"loss": 0.0442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.018379444256424904,
"step": 1685,
"valid_targets_mean": 5263.2,
"valid_targets_min": 2115
},
{
"epoch": 6.425855513307985,
"grad_norm": 0.1638841692314567,
"learning_rate": 8.25761901540889e-07,
"loss": 0.043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01931699924170971,
"step": 1690,
"valid_targets_mean": 5157.2,
"valid_targets_min": 1833
},
{
"epoch": 6.444866920152092,
"grad_norm": 0.1735206177979583,
"learning_rate": 7.726758218897079e-07,
"loss": 0.0401,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02396308444440365,
"step": 1695,
"valid_targets_mean": 5271.4,
"valid_targets_min": 2529
},
{
"epoch": 6.4638783269961975,
"grad_norm": 0.17296056006964222,
"learning_rate": 7.213196967416624e-07,
"loss": 0.0448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02491677738726139,
"step": 1700,
"valid_targets_mean": 5138.1,
"valid_targets_min": 1934
},
{
"epoch": 6.482889733840304,
"grad_norm": 0.25309281952445806,
"learning_rate": 6.716981468000372e-07,
"loss": 0.0453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025967717170715332,
"step": 1705,
"valid_targets_mean": 3941.6,
"valid_targets_min": 1710
},
{
"epoch": 6.501901140684411,
"grad_norm": 0.2599714526800321,
"learning_rate": 6.238156367018744e-07,
"loss": 0.0495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025150617584586143,
"step": 1710,
"valid_targets_mean": 4081.7,
"valid_targets_min": 1714
},
{
"epoch": 6.520912547528517,
"grad_norm": 0.2665439212567454,
"learning_rate": 5.776764746162778e-07,
"loss": 0.0516,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02574659138917923,
"step": 1715,
"valid_targets_mean": 3830.1,
"valid_targets_min": 1672
},
{
"epoch": 6.5399239543726235,
"grad_norm": 0.23963977576827142,
"learning_rate": 5.332848118567891e-07,
"loss": 0.0464,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023752614855766296,
"step": 1720,
"valid_targets_mean": 3843.3,
"valid_targets_min": 1308
},
{
"epoch": 6.55893536121673,
"grad_norm": 0.2133991211840642,
"learning_rate": 4.906446425078782e-07,
"loss": 0.046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02487846277654171,
"step": 1725,
"valid_targets_mean": 3924.0,
"valid_targets_min": 1308
},
{
"epoch": 6.577946768060836,
"grad_norm": 0.19507916434897585,
"learning_rate": 4.497598030655814e-07,
"loss": 0.0402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.018777957186102867,
"step": 1730,
"valid_targets_mean": 3620.2,
"valid_targets_min": 1428
},
{
"epoch": 6.596958174904943,
"grad_norm": 0.18710157722199877,
"learning_rate": 4.106339720923136e-07,
"loss": 0.0375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01577587239444256,
"step": 1735,
"valid_targets_mean": 3715.8,
"valid_targets_min": 1498
},
{
"epoch": 6.61596958174905,
"grad_norm": 1.1221845368347991,
"learning_rate": 3.732706698859012e-07,
"loss": 0.1824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11630845069885254,
"step": 1740,
"valid_targets_mean": 7975.3,
"valid_targets_min": 657
},
{
"epoch": 6.634980988593156,
"grad_norm": 1.173169716414322,
"learning_rate": 3.376732581628406e-07,
"loss": 0.2483,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11798709630966187,
"step": 1745,
"valid_targets_mean": 7661.3,
"valid_targets_min": 711
},
{
"epoch": 6.653992395437262,
"grad_norm": 0.7806879785562468,
"learning_rate": 3.038449397558396e-07,
"loss": 0.2357,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11574181169271469,
"step": 1750,
"valid_targets_mean": 7740.2,
"valid_targets_min": 588
},
{
"epoch": 6.673003802281369,
"grad_norm": 0.6320222275437308,
"learning_rate": 2.7178875832563734e-07,
"loss": 0.216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10704654455184937,
"step": 1755,
"valid_targets_mean": 7938.4,
"valid_targets_min": 531
},
{
"epoch": 6.692015209125476,
"grad_norm": 0.45835070478875656,
"learning_rate": 2.4150759808716283e-07,
"loss": 0.199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09011327475309372,
"step": 1760,
"valid_targets_mean": 6955.9,
"valid_targets_min": 674
},
{
"epoch": 6.7110266159695815,
"grad_norm": 0.3833941721398251,
"learning_rate": 2.1300418355002296e-07,
"loss": 0.1962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09300309419631958,
"step": 1765,
"valid_targets_mean": 7178.0,
"valid_targets_min": 605
},
{
"epoch": 6.730038022813688,
"grad_norm": 0.3737621062405827,
"learning_rate": 1.862810792733849e-07,
"loss": 0.1848,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10089872032403946,
"step": 1770,
"valid_targets_mean": 8065.9,
"valid_targets_min": 693
},
{
"epoch": 6.749049429657795,
"grad_norm": 0.337929681633567,
"learning_rate": 1.6134068963520988e-07,
"loss": 0.1869,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08463618904352188,
"step": 1775,
"valid_targets_mean": 7800.7,
"valid_targets_min": 413
},
{
"epoch": 6.768060836501901,
"grad_norm": 0.3300710696811267,
"learning_rate": 1.381852586159349e-07,
"loss": 0.1839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09186343103647232,
"step": 1780,
"valid_targets_mean": 7847.2,
"valid_targets_min": 695
},
{
"epoch": 6.787072243346008,
"grad_norm": 0.30122393387768587,
"learning_rate": 1.1681686959657879e-07,
"loss": 0.1703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07956251502037048,
"step": 1785,
"valid_targets_mean": 7130.6,
"valid_targets_min": 614
},
{
"epoch": 6.806083650190114,
"grad_norm": 0.2834417787280566,
"learning_rate": 9.723744517128098e-08,
"loss": 0.1725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08768697828054428,
"step": 1790,
"valid_targets_mean": 7978.8,
"valid_targets_min": 598
},
{
"epoch": 6.82509505703422,
"grad_norm": 0.30669459692474976,
"learning_rate": 7.944874697432436e-08,
"loss": 0.1565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07766545563936234,
"step": 1795,
"valid_targets_mean": 7157.9,
"valid_targets_min": 589
},
{
"epoch": 6.844106463878327,
"grad_norm": 0.30499859672371843,
"learning_rate": 6.345237552163541e-08,
"loss": 0.1559,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08147227019071579,
"step": 1800,
"valid_targets_mean": 7234.4,
"valid_targets_min": 593
},
{
"epoch": 6.863117870722434,
"grad_norm": 0.2567755838541607,
"learning_rate": 4.9249770066777113e-08,
"loss": 0.0762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03280698135495186,
"step": 1805,
"valid_targets_mean": 6154.4,
"valid_targets_min": 1363
},
{
"epoch": 6.8821292775665395,
"grad_norm": 0.22981178838040223,
"learning_rate": 3.684220847145481e-08,
"loss": 0.072,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03351025655865669,
"step": 1810,
"valid_targets_mean": 5865.2,
"valid_targets_min": 970
},
{
"epoch": 6.901140684410646,
"grad_norm": 0.23048498681871035,
"learning_rate": 2.623080709054149e-08,
"loss": 0.0685,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03717043623328209,
"step": 1815,
"valid_targets_mean": 6350.6,
"valid_targets_min": 972
},
{
"epoch": 6.920152091254753,
"grad_norm": 0.198959962331718,
"learning_rate": 1.7416520671635905e-08,
"loss": 0.0647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03324683755636215,
"step": 1820,
"valid_targets_mean": 5735.5,
"valid_targets_min": 959
},
{
"epoch": 6.93916349809886,
"grad_norm": 0.186783859845399,
"learning_rate": 1.0400142269164637e-08,
"loss": 0.0654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03211604431271553,
"step": 1825,
"valid_targets_mean": 6206.8,
"valid_targets_min": 1281
},
{
"epoch": 6.9581749049429655,
"grad_norm": 0.19629442490410282,
"learning_rate": 5.182303173016934e-09,
"loss": 0.0644,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03166944161057472,
"step": 1830,
"valid_targets_mean": 6104.3,
"valid_targets_min": 1326
},
{
"epoch": 6.977186311787072,
"grad_norm": 0.1829179481431056,
"learning_rate": 1.7634728517545996e-09,
"loss": 0.0629,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.029660126194357872,
"step": 1835,
"valid_targets_mean": 6447.5,
"valid_targets_min": 1353
},
{
"epoch": 6.996197718631179,
"grad_norm": 0.18366914112103155,
"learning_rate": 1.439589103724437e-10,
"loss": 0.0618,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.031334131956100464,
"step": 1840,
"valid_targets_mean": 5833.1,
"valid_targets_min": 1011
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022832343354821205,
"step": 1841,
"total_flos": 4.836413774367818e+18,
"train_loss": 0.19618723029549,
"train_runtime": 26839.4318,
"train_samples_per_second": 6.577,
"train_steps_per_second": 0.069,
"valid_targets_mean": 5367.0,
"valid_targets_min": 1264
}
],
"logging_steps": 5,
"max_steps": 1841,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 750,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.836413774367818e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}