Files
nemotron-terminal-debugging…/trainer_state.json
ModelHub XC ebb105f116 初始化项目,由ModelHub XC社区提供模型
Model: laion/nemotron-terminal-debugging__Qwen3-8B
Source: Original Platform
2026-04-26 09:29:14 +08:00

2838 lines
78 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1274,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.027573529411764705,
"grad_norm": 12.922369370251287,
"learning_rate": 1.25e-06,
"loss": 1.0219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35352784395217896,
"step": 5,
"valid_targets_mean": 10593.7,
"valid_targets_min": 1314
},
{
"epoch": 0.05514705882352941,
"grad_norm": 9.914692363999333,
"learning_rate": 2.8125e-06,
"loss": 1.0055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33466315269470215,
"step": 10,
"valid_targets_mean": 9719.9,
"valid_targets_min": 4041
},
{
"epoch": 0.08272058823529412,
"grad_norm": 5.140457433964579,
"learning_rate": 4.3750000000000005e-06,
"loss": 0.9532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31088295578956604,
"step": 15,
"valid_targets_mean": 9489.2,
"valid_targets_min": 3677
},
{
"epoch": 0.11029411764705882,
"grad_norm": 2.3405453339381914,
"learning_rate": 5.9375e-06,
"loss": 0.8851,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28772222995758057,
"step": 20,
"valid_targets_mean": 10487.8,
"valid_targets_min": 1794
},
{
"epoch": 0.13786764705882354,
"grad_norm": 1.6997672536663537,
"learning_rate": 7.500000000000001e-06,
"loss": 0.8505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2901165187358856,
"step": 25,
"valid_targets_mean": 10282.9,
"valid_targets_min": 1881
},
{
"epoch": 0.16544117647058823,
"grad_norm": 1.3985790541526377,
"learning_rate": 9.0625e-06,
"loss": 0.8194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2608858346939087,
"step": 30,
"valid_targets_mean": 9144.1,
"valid_targets_min": 2515
},
{
"epoch": 0.19301470588235295,
"grad_norm": 0.8623722371179741,
"learning_rate": 1.0625e-05,
"loss": 0.79,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2887074649333954,
"step": 35,
"valid_targets_mean": 10813.7,
"valid_targets_min": 3092
},
{
"epoch": 0.22058823529411764,
"grad_norm": 0.8093983860510723,
"learning_rate": 1.2187500000000001e-05,
"loss": 0.7763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26193925738334656,
"step": 40,
"valid_targets_mean": 10127.0,
"valid_targets_min": 2496
},
{
"epoch": 0.24816176470588236,
"grad_norm": 0.5974300980069079,
"learning_rate": 1.375e-05,
"loss": 0.7367,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2483624666929245,
"step": 45,
"valid_targets_mean": 10489.3,
"valid_targets_min": 3368
},
{
"epoch": 0.2757352941176471,
"grad_norm": 0.46168263473020144,
"learning_rate": 1.5312500000000003e-05,
"loss": 0.7257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2395600974559784,
"step": 50,
"valid_targets_mean": 10156.6,
"valid_targets_min": 1452
},
{
"epoch": 0.30330882352941174,
"grad_norm": 0.4028635985676079,
"learning_rate": 1.6875e-05,
"loss": 0.7035,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20824620127677917,
"step": 55,
"valid_targets_mean": 8890.8,
"valid_targets_min": 1913
},
{
"epoch": 0.33088235294117646,
"grad_norm": 0.3895890718140441,
"learning_rate": 1.84375e-05,
"loss": 0.6873,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22504115104675293,
"step": 60,
"valid_targets_mean": 9894.5,
"valid_targets_min": 3118
},
{
"epoch": 0.3584558823529412,
"grad_norm": 0.30444276365102046,
"learning_rate": 2e-05,
"loss": 0.6644,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20244480669498444,
"step": 65,
"valid_targets_mean": 9749.3,
"valid_targets_min": 1933
},
{
"epoch": 0.3860294117647059,
"grad_norm": 0.2581486013897639,
"learning_rate": 2.1562500000000002e-05,
"loss": 0.6489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2169645130634308,
"step": 70,
"valid_targets_mean": 10628.2,
"valid_targets_min": 1803
},
{
"epoch": 0.41360294117647056,
"grad_norm": 0.2631860893496907,
"learning_rate": 2.3125000000000003e-05,
"loss": 0.6346,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22958728671073914,
"step": 75,
"valid_targets_mean": 10800.9,
"valid_targets_min": 2064
},
{
"epoch": 0.4411764705882353,
"grad_norm": 0.274808899082167,
"learning_rate": 2.46875e-05,
"loss": 0.6256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23121704161167145,
"step": 80,
"valid_targets_mean": 11487.6,
"valid_targets_min": 4161
},
{
"epoch": 0.46875,
"grad_norm": 0.2583078645972247,
"learning_rate": 2.625e-05,
"loss": 0.612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1923944652080536,
"step": 85,
"valid_targets_mean": 9252.9,
"valid_targets_min": 226
},
{
"epoch": 0.4963235294117647,
"grad_norm": 0.24880268689380583,
"learning_rate": 2.7812500000000002e-05,
"loss": 0.607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19335561990737915,
"step": 90,
"valid_targets_mean": 9072.7,
"valid_targets_min": 1797
},
{
"epoch": 0.5238970588235294,
"grad_norm": 0.2616086521839269,
"learning_rate": 2.9375000000000003e-05,
"loss": 0.5981,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19159752130508423,
"step": 95,
"valid_targets_mean": 9744.3,
"valid_targets_min": 1782
},
{
"epoch": 0.5514705882352942,
"grad_norm": 0.27692625930198184,
"learning_rate": 3.09375e-05,
"loss": 0.5915,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18734252452850342,
"step": 100,
"valid_targets_mean": 8845.1,
"valid_targets_min": 2130
},
{
"epoch": 0.5790441176470589,
"grad_norm": 0.2574148775678182,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.5845,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16766202449798584,
"step": 105,
"valid_targets_mean": 8674.7,
"valid_targets_min": 2213
},
{
"epoch": 0.6066176470588235,
"grad_norm": 0.27869577303780235,
"learning_rate": 3.40625e-05,
"loss": 0.5806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19295048713684082,
"step": 110,
"valid_targets_mean": 10376.7,
"valid_targets_min": 1765
},
{
"epoch": 0.6341911764705882,
"grad_norm": 0.2984993331075873,
"learning_rate": 3.5625000000000005e-05,
"loss": 0.5777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20063826441764832,
"step": 115,
"valid_targets_mean": 9936.5,
"valid_targets_min": 1494
},
{
"epoch": 0.6617647058823529,
"grad_norm": 0.271090279172367,
"learning_rate": 3.71875e-05,
"loss": 0.5672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18824777007102966,
"step": 120,
"valid_targets_mean": 9430.4,
"valid_targets_min": 1915
},
{
"epoch": 0.6893382352941176,
"grad_norm": 0.29538515374006735,
"learning_rate": 3.875e-05,
"loss": 0.5646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1923852413892746,
"step": 125,
"valid_targets_mean": 9131.7,
"valid_targets_min": 1686
},
{
"epoch": 0.7169117647058824,
"grad_norm": 0.3122370627704027,
"learning_rate": 3.999992484978314e-05,
"loss": 0.5644,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20504149794578552,
"step": 130,
"valid_targets_mean": 10173.0,
"valid_targets_min": 1298
},
{
"epoch": 0.7444852941176471,
"grad_norm": 0.34703468911750324,
"learning_rate": 3.999729465149199e-05,
"loss": 0.557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17850443720817566,
"step": 135,
"valid_targets_mean": 9228.8,
"valid_targets_min": 924
},
{
"epoch": 0.7720588235294118,
"grad_norm": 0.332190362036781,
"learning_rate": 3.9990907507094396e-05,
"loss": 0.5497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16969668865203857,
"step": 140,
"valid_targets_mean": 8790.3,
"valid_targets_min": 344
},
{
"epoch": 0.7996323529411765,
"grad_norm": 0.3034703265351846,
"learning_rate": 3.9980764616560544e-05,
"loss": 0.5564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18241870403289795,
"step": 145,
"valid_targets_mean": 9520.5,
"valid_targets_min": 1223
},
{
"epoch": 0.8272058823529411,
"grad_norm": 0.3259015215149292,
"learning_rate": 3.9966867885462854e-05,
"loss": 0.5555,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19907459616661072,
"step": 150,
"valid_targets_mean": 11470.9,
"valid_targets_min": 3448
},
{
"epoch": 0.8547794117647058,
"grad_norm": 0.3279486360331197,
"learning_rate": 3.994921992461797e-05,
"loss": 0.5461,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18447428941726685,
"step": 155,
"valid_targets_mean": 9752.1,
"valid_targets_min": 3827
},
{
"epoch": 0.8823529411764706,
"grad_norm": 0.3415570189377227,
"learning_rate": 3.992782404959627e-05,
"loss": 0.5485,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18221747875213623,
"step": 160,
"valid_targets_mean": 9179.8,
"valid_targets_min": 1780
},
{
"epoch": 0.9099264705882353,
"grad_norm": 0.39579278374013543,
"learning_rate": 3.9902684280098965e-05,
"loss": 0.5476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18513306975364685,
"step": 165,
"valid_targets_mean": 10199.6,
"valid_targets_min": 2157
},
{
"epoch": 0.9375,
"grad_norm": 0.2798313329426632,
"learning_rate": 3.987380533920287e-05,
"loss": 0.5392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2026323676109314,
"step": 170,
"valid_targets_mean": 11315.1,
"valid_targets_min": 6090
},
{
"epoch": 0.9650735294117647,
"grad_norm": 0.3493942306576815,
"learning_rate": 3.984119265247314e-05,
"loss": 0.5474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15390989184379578,
"step": 175,
"valid_targets_mean": 8281.4,
"valid_targets_min": 2125
},
{
"epoch": 0.9926470588235294,
"grad_norm": 0.36521635704700256,
"learning_rate": 3.9804852346943866e-05,
"loss": 0.5426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18721234798431396,
"step": 180,
"valid_targets_mean": 10394.0,
"valid_targets_min": 2057
},
{
"epoch": 1.0165441176470589,
"grad_norm": 0.30506233144116673,
"learning_rate": 3.9764791249967044e-05,
"loss": 0.5428,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19133886694908142,
"step": 185,
"valid_targets_mean": 10289.3,
"valid_targets_min": 2465
},
{
"epoch": 1.0441176470588236,
"grad_norm": 0.2910843215873972,
"learning_rate": 3.972101688792986e-05,
"loss": 0.5312,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1826484203338623,
"step": 190,
"valid_targets_mean": 9581.6,
"valid_targets_min": 1460
},
{
"epoch": 1.0716911764705883,
"grad_norm": 0.2755584276978832,
"learning_rate": 3.967353748484071e-05,
"loss": 0.5344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18369868397712708,
"step": 195,
"valid_targets_mean": 10337.5,
"valid_targets_min": 2688
},
{
"epoch": 1.099264705882353,
"grad_norm": 0.359251780614338,
"learning_rate": 3.962236196078411e-05,
"loss": 0.5225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15699255466461182,
"step": 200,
"valid_targets_mean": 9281.1,
"valid_targets_min": 1589
},
{
"epoch": 1.1268382352941178,
"grad_norm": 0.27720289986260105,
"learning_rate": 3.956749993024489e-05,
"loss": 0.5257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.140591099858284,
"step": 205,
"valid_targets_mean": 7799.2,
"valid_targets_min": 1608
},
{
"epoch": 1.1544117647058822,
"grad_norm": 0.26983976305045465,
"learning_rate": 3.950896170030186e-05,
"loss": 0.5259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17038512229919434,
"step": 210,
"valid_targets_mean": 9629.8,
"valid_targets_min": 2130
},
{
"epoch": 1.181985294117647,
"grad_norm": 0.2946999679550615,
"learning_rate": 3.9446758268691395e-05,
"loss": 0.5268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18730860948562622,
"step": 215,
"valid_targets_mean": 9681.8,
"valid_targets_min": 455
},
{
"epoch": 1.2095588235294117,
"grad_norm": 0.299840004738946,
"learning_rate": 3.9380901321741315e-05,
"loss": 0.5185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17023658752441406,
"step": 220,
"valid_targets_mean": 9639.6,
"valid_targets_min": 3931
},
{
"epoch": 1.2371323529411764,
"grad_norm": 0.2966956776393768,
"learning_rate": 3.931140323217524e-05,
"loss": 0.526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16511359810829163,
"step": 225,
"valid_targets_mean": 9019.7,
"valid_targets_min": 1681
},
{
"epoch": 1.2647058823529411,
"grad_norm": 0.2881134761116685,
"learning_rate": 3.923827705678818e-05,
"loss": 0.5219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17622773349285126,
"step": 230,
"valid_targets_mean": 9761.3,
"valid_targets_min": 1943
},
{
"epoch": 1.2922794117647058,
"grad_norm": 0.3211671842301337,
"learning_rate": 3.916153653399352e-05,
"loss": 0.5215,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17426463961601257,
"step": 235,
"valid_targets_mean": 9748.3,
"valid_targets_min": 2276
},
{
"epoch": 1.3198529411764706,
"grad_norm": 0.32683328829954483,
"learning_rate": 3.908119608124184e-05,
"loss": 0.522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16995075345039368,
"step": 240,
"valid_targets_mean": 9808.2,
"valid_targets_min": 3060
},
{
"epoch": 1.3474264705882353,
"grad_norm": 0.2885476814272769,
"learning_rate": 3.8997270792312435e-05,
"loss": 0.5139,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.172532856464386,
"step": 245,
"valid_targets_mean": 9445.8,
"valid_targets_min": 1887
},
{
"epoch": 1.375,
"grad_norm": 0.40619327872879724,
"learning_rate": 3.890977643447746e-05,
"loss": 0.5224,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16190695762634277,
"step": 250,
"valid_targets_mean": 9265.1,
"valid_targets_min": 4316
},
{
"epoch": 1.4025735294117647,
"grad_norm": 0.3079521022594924,
"learning_rate": 3.8818729445539765e-05,
"loss": 0.5096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14533713459968567,
"step": 255,
"valid_targets_mean": 8282.3,
"valid_targets_min": 2412
},
{
"epoch": 1.4301470588235294,
"grad_norm": 0.31036078222548275,
"learning_rate": 3.872414693074466e-05,
"loss": 0.5148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1640068143606186,
"step": 260,
"valid_targets_mean": 9684.3,
"valid_targets_min": 3184
},
{
"epoch": 1.4577205882352942,
"grad_norm": 0.26541664634789813,
"learning_rate": 3.862604665956632e-05,
"loss": 0.5115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17487174272537231,
"step": 265,
"valid_targets_mean": 10682.2,
"valid_targets_min": 4848
},
{
"epoch": 1.4852941176470589,
"grad_norm": 0.28874244943207317,
"learning_rate": 3.8524447062369355e-05,
"loss": 0.5134,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16116756200790405,
"step": 270,
"valid_targets_mean": 9613.4,
"valid_targets_min": 1794
},
{
"epoch": 1.5128676470588234,
"grad_norm": 0.3043371506396099,
"learning_rate": 3.8419367226946286e-05,
"loss": 0.5167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1825055181980133,
"step": 275,
"valid_targets_mean": 11411.1,
"valid_targets_min": 3611
},
{
"epoch": 1.5404411764705883,
"grad_norm": 0.3223108606843313,
"learning_rate": 3.831082689493143e-05,
"loss": 0.5176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16505330801010132,
"step": 280,
"valid_targets_mean": 9131.8,
"valid_targets_min": 2184
},
{
"epoch": 1.5680147058823528,
"grad_norm": 0.27729135718776116,
"learning_rate": 3.819884645809203e-05,
"loss": 0.5147,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16858059167861938,
"step": 285,
"valid_targets_mean": 9615.7,
"valid_targets_min": 1996
},
{
"epoch": 1.5955882352941178,
"grad_norm": 0.3660763161980808,
"learning_rate": 3.808344695449715e-05,
"loss": 0.5088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1702558845281601,
"step": 290,
"valid_targets_mean": 10235.8,
"valid_targets_min": 4142
},
{
"epoch": 1.6231617647058822,
"grad_norm": 0.3041037414824209,
"learning_rate": 3.796465006456523e-05,
"loss": 0.5065,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1646835058927536,
"step": 295,
"valid_targets_mean": 9766.0,
"valid_targets_min": 1557
},
{
"epoch": 1.6507352941176472,
"grad_norm": 0.3198950333330496,
"learning_rate": 3.784247810699093e-05,
"loss": 0.5101,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18033772706985474,
"step": 300,
"valid_targets_mean": 10882.1,
"valid_targets_min": 3133
},
{
"epoch": 1.6783088235294117,
"grad_norm": 0.2915834656120734,
"learning_rate": 3.7716954034552004e-05,
"loss": 0.5113,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17557981610298157,
"step": 305,
"valid_targets_mean": 10674.8,
"valid_targets_min": 1627
},
{
"epoch": 1.7058823529411766,
"grad_norm": 0.3586758923774135,
"learning_rate": 3.758810142979719e-05,
"loss": 0.5087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17392417788505554,
"step": 310,
"valid_targets_mean": 9954.1,
"valid_targets_min": 1356
},
{
"epoch": 1.7334558823529411,
"grad_norm": 0.2645346812549258,
"learning_rate": 3.74559445006156e-05,
"loss": 0.5157,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17819076776504517,
"step": 315,
"valid_targets_mean": 10323.9,
"valid_targets_min": 2361
},
{
"epoch": 1.7610294117647058,
"grad_norm": 0.25271889404246967,
"learning_rate": 3.732050807568878e-05,
"loss": 0.5066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17629846930503845,
"step": 320,
"valid_targets_mean": 11451.3,
"valid_targets_min": 1489
},
{
"epoch": 1.7886029411764706,
"grad_norm": 0.2763437959725264,
"learning_rate": 3.718181759982604e-05,
"loss": 0.5073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1563217043876648,
"step": 325,
"valid_targets_mean": 8785.6,
"valid_targets_min": 2129
},
{
"epoch": 1.8161764705882353,
"grad_norm": 0.30042350536966067,
"learning_rate": 3.703989912918409e-05,
"loss": 0.5054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.176944762468338,
"step": 330,
"valid_targets_mean": 10494.3,
"valid_targets_min": 2765
},
{
"epoch": 1.84375,
"grad_norm": 0.3378004054537919,
"learning_rate": 3.689477932637181e-05,
"loss": 0.5034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1579429805278778,
"step": 335,
"valid_targets_mean": 9586.9,
"valid_targets_min": 1283
},
{
"epoch": 1.8713235294117647,
"grad_norm": 0.2679674790343664,
"learning_rate": 3.674648545544104e-05,
"loss": 0.5077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.178257018327713,
"step": 340,
"valid_targets_mean": 9507.9,
"valid_targets_min": 2612
},
{
"epoch": 1.8988970588235294,
"grad_norm": 0.2579462646319047,
"learning_rate": 3.659504537676444e-05,
"loss": 0.4975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15231987833976746,
"step": 345,
"valid_targets_mean": 9333.8,
"valid_targets_min": 1719
},
{
"epoch": 1.9264705882352942,
"grad_norm": 0.2749601553036317,
"learning_rate": 3.6440487541801246e-05,
"loss": 0.4995,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16826336085796356,
"step": 350,
"valid_targets_mean": 9289.1,
"valid_targets_min": 2423
},
{
"epoch": 1.9540441176470589,
"grad_norm": 0.27745508706156247,
"learning_rate": 3.628284098775207e-05,
"loss": 0.5038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1490720808506012,
"step": 355,
"valid_targets_mean": 8659.3,
"valid_targets_min": 3197
},
{
"epoch": 1.9816176470588234,
"grad_norm": 0.255832293559672,
"learning_rate": 3.612213533210356e-05,
"loss": 0.5071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1693265736103058,
"step": 360,
"valid_targets_mean": 10030.6,
"valid_targets_min": 2085
},
{
"epoch": 2.005514705882353,
"grad_norm": 0.33130827465355267,
"learning_rate": 3.595840076706411e-05,
"loss": 0.5046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15648218989372253,
"step": 365,
"valid_targets_mean": 9743.0,
"valid_targets_min": 1915
},
{
"epoch": 2.0330882352941178,
"grad_norm": 0.3384282546059246,
"learning_rate": 3.579166805389154e-05,
"loss": 0.4962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16740265488624573,
"step": 370,
"valid_targets_mean": 9421.0,
"valid_targets_min": 1529
},
{
"epoch": 2.0606617647058822,
"grad_norm": 0.33622127319274503,
"learning_rate": 3.562196851711391e-05,
"loss": 0.4878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16401368379592896,
"step": 375,
"valid_targets_mean": 8955.2,
"valid_targets_min": 1538
},
{
"epoch": 2.088235294117647,
"grad_norm": 0.2894449977451741,
"learning_rate": 3.5449334038644515e-05,
"loss": 0.5018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16942408680915833,
"step": 380,
"valid_targets_mean": 9713.6,
"valid_targets_min": 1919
},
{
"epoch": 2.1158088235294117,
"grad_norm": 0.34751761847543067,
"learning_rate": 3.5273797051792114e-05,
"loss": 0.4948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1818588376045227,
"step": 385,
"valid_targets_mean": 10809.2,
"valid_targets_min": 4108
},
{
"epoch": 2.1433823529411766,
"grad_norm": 0.2635161588753612,
"learning_rate": 3.509539053516759e-05,
"loss": 0.4966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16954530775547028,
"step": 390,
"valid_targets_mean": 10632.2,
"valid_targets_min": 1764
},
{
"epoch": 2.170955882352941,
"grad_norm": 0.24420082473821403,
"learning_rate": 3.49141480064882e-05,
"loss": 0.4982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17877304553985596,
"step": 395,
"valid_targets_mean": 11006.1,
"valid_targets_min": 3599
},
{
"epoch": 2.198529411764706,
"grad_norm": 0.27155049413143767,
"learning_rate": 3.47301035162805e-05,
"loss": 0.4882,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16475136578083038,
"step": 400,
"valid_targets_mean": 9823.3,
"valid_targets_min": 2475
},
{
"epoch": 2.2261029411764706,
"grad_norm": 0.3287310658697828,
"learning_rate": 3.454329164148317e-05,
"loss": 0.4965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16053670644760132,
"step": 405,
"valid_targets_mean": 9885.7,
"valid_targets_min": 3454
},
{
"epoch": 2.2536764705882355,
"grad_norm": 0.2784404641293258,
"learning_rate": 3.435374747895095e-05,
"loss": 0.4873,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16837987303733826,
"step": 410,
"valid_targets_mean": 9935.2,
"valid_targets_min": 1837
},
{
"epoch": 2.28125,
"grad_norm": 0.28151764792692086,
"learning_rate": 3.4161506638860903e-05,
"loss": 0.4956,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15354721248149872,
"step": 415,
"valid_targets_mean": 9233.6,
"valid_targets_min": 1381
},
{
"epoch": 2.3088235294117645,
"grad_norm": 0.3543003365756421,
"learning_rate": 3.396660523802225e-05,
"loss": 0.4878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15362058579921722,
"step": 420,
"valid_targets_mean": 9784.1,
"valid_targets_min": 2502
},
{
"epoch": 2.3363970588235294,
"grad_norm": 0.3459844290219232,
"learning_rate": 3.376907989309097e-05,
"loss": 0.4898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16421331465244293,
"step": 425,
"valid_targets_mean": 9645.9,
"valid_targets_min": 1466
},
{
"epoch": 2.363970588235294,
"grad_norm": 0.2867558328265102,
"learning_rate": 3.3568967713690574e-05,
"loss": 0.4911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16321928799152374,
"step": 430,
"valid_targets_mean": 9835.3,
"valid_targets_min": 2574
},
{
"epoch": 2.391544117647059,
"grad_norm": 0.26780085668465703,
"learning_rate": 3.3366306295440195e-05,
"loss": 0.4835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17828045785427094,
"step": 435,
"valid_targets_mean": 11611.0,
"valid_targets_min": 2718
},
{
"epoch": 2.4191176470588234,
"grad_norm": 0.26518791813195564,
"learning_rate": 3.316113371289137e-05,
"loss": 0.4973,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15982230007648468,
"step": 440,
"valid_targets_mean": 9063.5,
"valid_targets_min": 3199
},
{
"epoch": 2.4466911764705883,
"grad_norm": 0.28443782068251516,
"learning_rate": 3.295348851237494e-05,
"loss": 0.4926,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14535918831825256,
"step": 445,
"valid_targets_mean": 9235.6,
"valid_targets_min": 617
},
{
"epoch": 2.474264705882353,
"grad_norm": 0.3188266718733089,
"learning_rate": 3.2743409704759175e-05,
"loss": 0.495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18030281364917755,
"step": 450,
"valid_targets_mean": 10848.3,
"valid_targets_min": 4332
},
{
"epoch": 2.5018382352941178,
"grad_norm": 0.2667422692086372,
"learning_rate": 3.253093675812073e-05,
"loss": 0.488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15864768624305725,
"step": 455,
"valid_targets_mean": 9596.5,
"valid_targets_min": 1920
},
{
"epoch": 2.5294117647058822,
"grad_norm": 0.2604029768908445,
"learning_rate": 3.231610959032968e-05,
"loss": 0.4885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1746014654636383,
"step": 460,
"valid_targets_mean": 10420.3,
"valid_targets_min": 1764
},
{
"epoch": 2.556985294117647,
"grad_norm": 0.2858191680359426,
"learning_rate": 3.2098968561550024e-05,
"loss": 0.4868,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15824320912361145,
"step": 465,
"valid_targets_mean": 10062.4,
"valid_targets_min": 1489
},
{
"epoch": 2.5845588235294117,
"grad_norm": 0.2597776478065362,
"learning_rate": 3.18795544666571e-05,
"loss": 0.4875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15228307247161865,
"step": 470,
"valid_targets_mean": 9501.3,
"valid_targets_min": 2570
},
{
"epoch": 2.6121323529411766,
"grad_norm": 0.2619195905683205,
"learning_rate": 3.1657908527573376e-05,
"loss": 0.489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14361000061035156,
"step": 475,
"valid_targets_mean": 8306.0,
"valid_targets_min": 521
},
{
"epoch": 2.639705882352941,
"grad_norm": 0.3061155537403774,
"learning_rate": 3.143407238552394e-05,
"loss": 0.4835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13861876726150513,
"step": 480,
"valid_targets_mean": 9225.4,
"valid_targets_min": 344
},
{
"epoch": 2.6672794117647056,
"grad_norm": 0.2602743554835305,
"learning_rate": 3.1208088093213276e-05,
"loss": 0.4882,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15896561741828918,
"step": 485,
"valid_targets_mean": 9260.0,
"valid_targets_min": 1839
},
{
"epoch": 2.6948529411764706,
"grad_norm": 0.2724505413952303,
"learning_rate": 3.097999810692468e-05,
"loss": 0.4825,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14052462577819824,
"step": 490,
"valid_targets_mean": 8514.9,
"valid_targets_min": 2227
},
{
"epoch": 2.7224264705882355,
"grad_norm": 0.2662825428558882,
"learning_rate": 3.074984527854392e-05,
"loss": 0.492,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16768789291381836,
"step": 495,
"valid_targets_mean": 9108.5,
"valid_targets_min": 413
},
{
"epoch": 2.75,
"grad_norm": 0.2975150447539531,
"learning_rate": 3.0517672847508517e-05,
"loss": 0.4858,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15554597973823547,
"step": 500,
"valid_targets_mean": 9202.9,
"valid_targets_min": 967
},
{
"epoch": 2.7775735294117645,
"grad_norm": 0.2550495273374524,
"learning_rate": 3.0283524432684214e-05,
"loss": 0.4909,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.178862527012825,
"step": 505,
"valid_targets_mean": 10239.5,
"valid_targets_min": 4379
},
{
"epoch": 2.8051470588235294,
"grad_norm": 0.31261731562154393,
"learning_rate": 3.0047444024170197e-05,
"loss": 0.4781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14991816878318787,
"step": 510,
"valid_targets_mean": 9316.9,
"valid_targets_min": 349
},
{
"epoch": 2.8327205882352944,
"grad_norm": 0.30993641338002453,
"learning_rate": 2.9809475975034586e-05,
"loss": 0.4862,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16486816108226776,
"step": 515,
"valid_targets_mean": 10542.5,
"valid_targets_min": 4704
},
{
"epoch": 2.860294117647059,
"grad_norm": 0.2824856754358018,
"learning_rate": 2.9569664992981648e-05,
"loss": 0.4807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15252447128295898,
"step": 520,
"valid_targets_mean": 9458.6,
"valid_targets_min": 2709
},
{
"epoch": 2.8878676470588234,
"grad_norm": 0.2930565337599535,
"learning_rate": 2.932805613195249e-05,
"loss": 0.4843,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14687579870224,
"step": 525,
"valid_targets_mean": 8935.3,
"valid_targets_min": 2361
},
{
"epoch": 2.9154411764705883,
"grad_norm": 0.30339081178623667,
"learning_rate": 2.9084694783660615e-05,
"loss": 0.4824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19605809450149536,
"step": 530,
"valid_targets_mean": 11474.3,
"valid_targets_min": 3566
},
{
"epoch": 2.943014705882353,
"grad_norm": 0.35774459464396025,
"learning_rate": 2.8839626669064073e-05,
"loss": 0.486,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15574738383293152,
"step": 535,
"valid_targets_mean": 9388.9,
"valid_targets_min": 1223
},
{
"epoch": 2.9705882352941178,
"grad_norm": 0.23890534631672108,
"learning_rate": 2.8592897829775732e-05,
"loss": 0.4887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1441933661699295,
"step": 540,
"valid_targets_mean": 9105.8,
"valid_targets_min": 2471
},
{
"epoch": 2.9981617647058822,
"grad_norm": 0.2874190440574071,
"learning_rate": 2.8344554619413355e-05,
"loss": 0.486,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16626250743865967,
"step": 545,
"valid_targets_mean": 9902.8,
"valid_targets_min": 1834
},
{
"epoch": 3.0220588235294117,
"grad_norm": 0.2797379791583693,
"learning_rate": 2.8094643694890947e-05,
"loss": 0.4762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15490218997001648,
"step": 550,
"valid_targets_mean": 9643.8,
"valid_targets_min": 2579
},
{
"epoch": 3.0496323529411766,
"grad_norm": 0.2781063666088034,
"learning_rate": 2.784321200765326e-05,
"loss": 0.4829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1765371710062027,
"step": 555,
"valid_targets_mean": 10938.4,
"valid_targets_min": 3219
},
{
"epoch": 3.077205882352941,
"grad_norm": 0.2633292268883383,
"learning_rate": 2.7590306794854853e-05,
"loss": 0.4862,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1536048948764801,
"step": 560,
"valid_targets_mean": 9472.4,
"valid_targets_min": 2616
},
{
"epoch": 3.104779411764706,
"grad_norm": 0.3024333218240714,
"learning_rate": 2.7335975570485552e-05,
"loss": 0.4809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16254714131355286,
"step": 565,
"valid_targets_mean": 9964.5,
"valid_targets_min": 1829
},
{
"epoch": 3.1323529411764706,
"grad_norm": 0.296651896630342,
"learning_rate": 2.7080266116443855e-05,
"loss": 0.4784,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1577146351337433,
"step": 570,
"valid_targets_mean": 8750.8,
"valid_targets_min": 698
},
{
"epoch": 3.1599264705882355,
"grad_norm": 0.2731702178415364,
"learning_rate": 2.6823226473559992e-05,
"loss": 0.4783,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16758793592453003,
"step": 575,
"valid_targets_mean": 10279.2,
"valid_targets_min": 2019
},
{
"epoch": 3.1875,
"grad_norm": 0.30439545297509896,
"learning_rate": 2.656490493257042e-05,
"loss": 0.4725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1514369696378708,
"step": 580,
"valid_targets_mean": 9135.0,
"valid_targets_min": 2521
},
{
"epoch": 3.2150735294117645,
"grad_norm": 0.27128678116813276,
"learning_rate": 2.6305350025045257e-05,
"loss": 0.478,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16004298627376556,
"step": 585,
"valid_targets_mean": 9188.1,
"valid_targets_min": 1834
},
{
"epoch": 3.2426470588235294,
"grad_norm": 0.2915602972024743,
"learning_rate": 2.604461051427054e-05,
"loss": 0.4767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1670864373445511,
"step": 590,
"valid_targets_mean": 9554.1,
"valid_targets_min": 2225
},
{
"epoch": 3.270220588235294,
"grad_norm": 0.2652841967385897,
"learning_rate": 2.5782735386086954e-05,
"loss": 0.478,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18112346529960632,
"step": 595,
"valid_targets_mean": 10889.3,
"valid_targets_min": 2020
},
{
"epoch": 3.297794117647059,
"grad_norm": 0.29039039949321094,
"learning_rate": 2.5519773839686707e-05,
"loss": 0.4792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16282935440540314,
"step": 600,
"valid_targets_mean": 9785.2,
"valid_targets_min": 2533
},
{
"epoch": 3.3253676470588234,
"grad_norm": 0.3075493577268038,
"learning_rate": 2.525577527837036e-05,
"loss": 0.4755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16678908467292786,
"step": 605,
"valid_targets_mean": 10208.7,
"valid_targets_min": 1585
},
{
"epoch": 3.3529411764705883,
"grad_norm": 0.28091651740846124,
"learning_rate": 2.4990789300265256e-05,
"loss": 0.4742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15700221061706543,
"step": 610,
"valid_targets_mean": 9146.5,
"valid_targets_min": 2410
},
{
"epoch": 3.380514705882353,
"grad_norm": 0.28102447688530885,
"learning_rate": 2.472486568900745e-05,
"loss": 0.4695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15741194784641266,
"step": 615,
"valid_targets_mean": 9390.8,
"valid_targets_min": 1976
},
{
"epoch": 3.4080882352941178,
"grad_norm": 0.33795109998469414,
"learning_rate": 2.445805440438866e-05,
"loss": 0.4796,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1518850028514862,
"step": 620,
"valid_targets_mean": 9751.7,
"valid_targets_min": 1398
},
{
"epoch": 3.4356617647058822,
"grad_norm": 0.2899570887713248,
"learning_rate": 2.419040557297024e-05,
"loss": 0.4784,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15818291902542114,
"step": 625,
"valid_targets_mean": 9005.8,
"valid_targets_min": 1875
},
{
"epoch": 3.463235294117647,
"grad_norm": 0.27113682924469873,
"learning_rate": 2.3921969478665702e-05,
"loss": 0.4742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14796185493469238,
"step": 630,
"valid_targets_mean": 8848.4,
"valid_targets_min": 1331
},
{
"epoch": 3.4908088235294117,
"grad_norm": 0.30195562694475775,
"learning_rate": 2.3652796553293794e-05,
"loss": 0.4707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1724786013364792,
"step": 635,
"valid_targets_mean": 10914.3,
"valid_targets_min": 2947
},
{
"epoch": 3.5183823529411766,
"grad_norm": 0.27090220964326883,
"learning_rate": 2.338293736710373e-05,
"loss": 0.4748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1456066370010376,
"step": 640,
"valid_targets_mean": 8925.1,
"valid_targets_min": 617
},
{
"epoch": 3.545955882352941,
"grad_norm": 0.2644791412793208,
"learning_rate": 2.3112442619274408e-05,
"loss": 0.4756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1511625200510025,
"step": 645,
"valid_targets_mean": 9793.2,
"valid_targets_min": 1724
},
{
"epoch": 3.5735294117647056,
"grad_norm": 0.25813430075433286,
"learning_rate": 2.2841363128389388e-05,
"loss": 0.4839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16180767118930817,
"step": 650,
"valid_targets_mean": 10196.3,
"valid_targets_min": 2718
},
{
"epoch": 3.6011029411764706,
"grad_norm": 0.27785983668454983,
"learning_rate": 2.2569749822889526e-05,
"loss": 0.4758,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1579504758119583,
"step": 655,
"valid_targets_mean": 8632.2,
"valid_targets_min": 1381
},
{
"epoch": 3.6286764705882355,
"grad_norm": 0.29111353881691315,
"learning_rate": 2.229765373150489e-05,
"loss": 0.4719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15923120081424713,
"step": 660,
"valid_targets_mean": 9744.8,
"valid_targets_min": 1369
},
{
"epoch": 3.65625,
"grad_norm": 0.25730565829379,
"learning_rate": 2.2025125973667817e-05,
"loss": 0.4679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13490340113639832,
"step": 665,
"valid_targets_mean": 8098.9,
"valid_targets_min": 1119
},
{
"epoch": 3.6838235294117645,
"grad_norm": 0.25083718631280677,
"learning_rate": 2.1752217749908997e-05,
"loss": 0.4714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16616028547286987,
"step": 670,
"valid_targets_mean": 10727.1,
"valid_targets_min": 1686
},
{
"epoch": 3.7113970588235294,
"grad_norm": 0.29118617817633485,
"learning_rate": 2.147898033223831e-05,
"loss": 0.4709,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1437336653470993,
"step": 675,
"valid_targets_mean": 8196.6,
"valid_targets_min": 643
},
{
"epoch": 3.7389705882352944,
"grad_norm": 0.2868072475645195,
"learning_rate": 2.120546505451218e-05,
"loss": 0.4716,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1592090129852295,
"step": 680,
"valid_targets_mean": 9257.1,
"valid_targets_min": 2425
},
{
"epoch": 3.766544117647059,
"grad_norm": 0.2646065431597373,
"learning_rate": 2.0931723302789346e-05,
"loss": 0.4739,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15621066093444824,
"step": 685,
"valid_targets_mean": 9754.2,
"valid_targets_min": 3945
},
{
"epoch": 3.7941176470588234,
"grad_norm": 0.27437618997704516,
"learning_rate": 2.065780650567683e-05,
"loss": 0.474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14808349311351776,
"step": 690,
"valid_targets_mean": 9839.2,
"valid_targets_min": 4592
},
{
"epoch": 3.8216911764705883,
"grad_norm": 0.24821319026154287,
"learning_rate": 2.038376612466793e-05,
"loss": 0.4742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16178636252880096,
"step": 695,
"valid_targets_mean": 11111.1,
"valid_targets_min": 1996
},
{
"epoch": 3.849264705882353,
"grad_norm": 0.2465619978535496,
"learning_rate": 2.0109653644473966e-05,
"loss": 0.471,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1422886848449707,
"step": 700,
"valid_targets_mean": 10060.5,
"valid_targets_min": 313
},
{
"epoch": 3.8768382352941178,
"grad_norm": 0.3119293770555057,
"learning_rate": 1.9835520563351735e-05,
"loss": 0.468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1610221415758133,
"step": 705,
"valid_targets_mean": 10060.2,
"valid_targets_min": 1469
},
{
"epoch": 3.9044117647058822,
"grad_norm": 0.27494281301179807,
"learning_rate": 1.9561418383428374e-05,
"loss": 0.4752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.162495955824852,
"step": 710,
"valid_targets_mean": 9654.4,
"valid_targets_min": 4185
},
{
"epoch": 3.931985294117647,
"grad_norm": 0.26008772339215747,
"learning_rate": 1.9287398601025562e-05,
"loss": 0.4705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15456292033195496,
"step": 715,
"valid_targets_mean": 9493.3,
"valid_targets_min": 2208
},
{
"epoch": 3.9595588235294117,
"grad_norm": 0.271085036968312,
"learning_rate": 1.9013512696984696e-05,
"loss": 0.4689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1571996957063675,
"step": 720,
"valid_targets_mean": 9132.5,
"valid_targets_min": 1567
},
{
"epoch": 3.9871323529411766,
"grad_norm": 0.2411363224005689,
"learning_rate": 1.8739812126995093e-05,
"loss": 0.4722,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14264698326587677,
"step": 725,
"valid_targets_mean": 9456.3,
"valid_targets_min": 1832
},
{
"epoch": 4.011029411764706,
"grad_norm": 0.2359597961140309,
"learning_rate": 1.8466348311926863e-05,
"loss": 0.466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15933065116405487,
"step": 730,
"valid_targets_mean": 10693.0,
"valid_targets_min": 5400
},
{
"epoch": 4.038602941176471,
"grad_norm": 0.26309525718508847,
"learning_rate": 1.8193172628170324e-05,
"loss": 0.4682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15065504610538483,
"step": 735,
"valid_targets_mean": 10060.2,
"valid_targets_min": 505
},
{
"epoch": 4.0661764705882355,
"grad_norm": 0.2473581577370302,
"learning_rate": 1.792033639798377e-05,
"loss": 0.4703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1632881462574005,
"step": 740,
"valid_targets_mean": 9911.9,
"valid_targets_min": 1738
},
{
"epoch": 4.09375,
"grad_norm": 0.2565338212535585,
"learning_rate": 1.764789087985145e-05,
"loss": 0.465,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13955152034759521,
"step": 745,
"valid_targets_mean": 8719.0,
"valid_targets_min": 835
},
{
"epoch": 4.1213235294117645,
"grad_norm": 0.29464540307392384,
"learning_rate": 1.737588725885345e-05,
"loss": 0.4625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15189291536808014,
"step": 750,
"valid_targets_mean": 9459.8,
"valid_targets_min": 2465
},
{
"epoch": 4.148897058823529,
"grad_norm": 0.29502353988540564,
"learning_rate": 1.7104376637049474e-05,
"loss": 0.4647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13400202989578247,
"step": 755,
"valid_targets_mean": 8704.5,
"valid_targets_min": 1765
},
{
"epoch": 4.176470588235294,
"grad_norm": 0.2954768301391897,
"learning_rate": 1.6833410023878104e-05,
"loss": 0.4711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15495823323726654,
"step": 760,
"valid_targets_mean": 10454.3,
"valid_targets_min": 1863
},
{
"epoch": 4.204044117647059,
"grad_norm": 0.2420606644014351,
"learning_rate": 1.6563038326573544e-05,
"loss": 0.4664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14864429831504822,
"step": 765,
"valid_targets_mean": 9574.8,
"valid_targets_min": 2267
},
{
"epoch": 4.231617647058823,
"grad_norm": 0.22980463517846378,
"learning_rate": 1.6293312340601545e-05,
"loss": 0.4675,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.168225958943367,
"step": 770,
"valid_targets_mean": 11102.2,
"valid_targets_min": 3249
},
{
"epoch": 4.259191176470588,
"grad_norm": 0.2360042458287228,
"learning_rate": 1.60242827401163e-05,
"loss": 0.4629,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1540679782629013,
"step": 775,
"valid_targets_mean": 9416.9,
"valid_targets_min": 455
},
{
"epoch": 4.286764705882353,
"grad_norm": 0.24859722691681074,
"learning_rate": 1.5756000068440184e-05,
"loss": 0.4595,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14932796359062195,
"step": 780,
"valid_targets_mean": 9871.3,
"valid_targets_min": 1989
},
{
"epoch": 4.314338235294118,
"grad_norm": 0.24874113072111292,
"learning_rate": 1.548851472856802e-05,
"loss": 0.4659,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12739743292331696,
"step": 785,
"valid_targets_mean": 8745.9,
"valid_targets_min": 1265
},
{
"epoch": 4.341911764705882,
"grad_norm": 0.2616979627725329,
"learning_rate": 1.5221876973697729e-05,
"loss": 0.4675,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16212889552116394,
"step": 790,
"valid_targets_mean": 9819.2,
"valid_targets_min": 1922
},
{
"epoch": 4.369485294117647,
"grad_norm": 0.2605902813842398,
"learning_rate": 1.4956136897789155e-05,
"loss": 0.4682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1644110381603241,
"step": 795,
"valid_targets_mean": 10543.6,
"valid_targets_min": 2238
},
{
"epoch": 4.397058823529412,
"grad_norm": 0.27655005722080517,
"learning_rate": 1.4691344426152733e-05,
"loss": 0.4684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13570016622543335,
"step": 800,
"valid_targets_mean": 7597.6,
"valid_targets_min": 349
},
{
"epoch": 4.424632352941177,
"grad_norm": 0.2681286272432588,
"learning_rate": 1.4427549306069915e-05,
"loss": 0.468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17025525867938995,
"step": 805,
"valid_targets_mean": 10451.5,
"valid_targets_min": 2785
},
{
"epoch": 4.452205882352941,
"grad_norm": 0.25262683253593055,
"learning_rate": 1.416480109744701e-05,
"loss": 0.4658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14067339897155762,
"step": 810,
"valid_targets_mean": 9118.7,
"valid_targets_min": 375
},
{
"epoch": 4.479779411764706,
"grad_norm": 0.2833620837271674,
"learning_rate": 1.3903149163504221e-05,
"loss": 0.4733,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17299406230449677,
"step": 815,
"valid_targets_mean": 9922.2,
"valid_targets_min": 2029
},
{
"epoch": 4.507352941176471,
"grad_norm": 0.28237169634387094,
"learning_rate": 1.3642642661501641e-05,
"loss": 0.4637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1708087921142578,
"step": 820,
"valid_targets_mean": 10745.8,
"valid_targets_min": 4792
},
{
"epoch": 4.5349264705882355,
"grad_norm": 0.24345906344118387,
"learning_rate": 1.3383330533503971e-05,
"loss": 0.4705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15734925866127014,
"step": 825,
"valid_targets_mean": 9852.8,
"valid_targets_min": 2881
},
{
"epoch": 4.5625,
"grad_norm": 0.24266003465888375,
"learning_rate": 1.3125261497185588e-05,
"loss": 0.4646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14917130768299103,
"step": 830,
"valid_targets_mean": 10095.4,
"valid_targets_min": 1468
},
{
"epoch": 4.5900735294117645,
"grad_norm": 0.24693814616775356,
"learning_rate": 1.2868484036677896e-05,
"loss": 0.4676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14834946393966675,
"step": 835,
"valid_targets_mean": 9061.1,
"valid_targets_min": 1531
},
{
"epoch": 4.617647058823529,
"grad_norm": 0.25767151072786965,
"learning_rate": 1.2613046393460411e-05,
"loss": 0.4694,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1626255214214325,
"step": 840,
"valid_targets_mean": 9341.8,
"valid_targets_min": 2334
},
{
"epoch": 4.645220588235294,
"grad_norm": 0.2523502952262432,
"learning_rate": 1.2358996557297532e-05,
"loss": 0.4685,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14751945436000824,
"step": 845,
"valid_targets_mean": 9205.8,
"valid_targets_min": 2278
},
{
"epoch": 4.672794117647059,
"grad_norm": 0.2671934827994343,
"learning_rate": 1.2106382257222595e-05,
"loss": 0.4657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1465722620487213,
"step": 850,
"valid_targets_mean": 9662.6,
"valid_targets_min": 818
},
{
"epoch": 4.700367647058823,
"grad_norm": 0.2678177528324946,
"learning_rate": 1.1855250952570852e-05,
"loss": 0.4632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18250469863414764,
"step": 855,
"valid_targets_mean": 11265.1,
"valid_targets_min": 3015
},
{
"epoch": 4.727941176470588,
"grad_norm": 0.26786812942141464,
"learning_rate": 1.1605649824063176e-05,
"loss": 0.4704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1485501229763031,
"step": 860,
"valid_targets_mean": 9213.1,
"valid_targets_min": 1661
},
{
"epoch": 4.755514705882353,
"grad_norm": 0.23312897885245942,
"learning_rate": 1.1357625764942095e-05,
"loss": 0.4646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16314777731895447,
"step": 865,
"valid_targets_mean": 9994.2,
"valid_targets_min": 1692
},
{
"epoch": 4.783088235294118,
"grad_norm": 0.24908363289906799,
"learning_rate": 1.1111225372161818e-05,
"loss": 0.463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15313473343849182,
"step": 870,
"valid_targets_mean": 9034.0,
"valid_targets_min": 1750
},
{
"epoch": 4.810661764705882,
"grad_norm": 0.23939462069601722,
"learning_rate": 1.0866494937633953e-05,
"loss": 0.4616,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14009788632392883,
"step": 875,
"valid_targets_mean": 8579.7,
"valid_targets_min": 1839
},
{
"epoch": 4.838235294117647,
"grad_norm": 0.23955295963992793,
"learning_rate": 1.0623480439530493e-05,
"loss": 0.468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18267551064491272,
"step": 880,
"valid_targets_mean": 10546.3,
"valid_targets_min": 3024
},
{
"epoch": 4.865808823529412,
"grad_norm": 0.2616170432420212,
"learning_rate": 1.038222753364581e-05,
"loss": 0.4693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1501752734184265,
"step": 885,
"valid_targets_mean": 9260.8,
"valid_targets_min": 1575
},
{
"epoch": 4.893382352941177,
"grad_norm": 0.22421807483190181,
"learning_rate": 1.0142781544819158e-05,
"loss": 0.4669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15006357431411743,
"step": 890,
"valid_targets_mean": 10454.8,
"valid_targets_min": 5073
},
{
"epoch": 4.920955882352941,
"grad_norm": 0.22432441115476565,
"learning_rate": 9.905187458419343e-06,
"loss": 0.4628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1583147495985031,
"step": 895,
"valid_targets_mean": 9921.4,
"valid_targets_min": 2207
},
{
"epoch": 4.948529411764706,
"grad_norm": 0.25000893710548316,
"learning_rate": 9.669489911893261e-06,
"loss": 0.4679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16411854326725006,
"step": 900,
"valid_targets_mean": 9388.2,
"valid_targets_min": 3775
},
{
"epoch": 4.976102941176471,
"grad_norm": 0.2608534142049316,
"learning_rate": 9.435733186379694e-06,
"loss": 0.4591,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14242586493492126,
"step": 905,
"valid_targets_mean": 9715.4,
"valid_targets_min": 2367
},
{
"epoch": 5.0,
"grad_norm": 0.528080421416379,
"learning_rate": 9.2039611983901e-06,
"loss": 0.4665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.45787763595581055,
"step": 910,
"valid_targets_mean": 9017.6,
"valid_targets_min": 1527
},
{
"epoch": 5.0275735294117645,
"grad_norm": 0.25364778323379056,
"learning_rate": 8.974217491557916e-06,
"loss": 0.4582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15120220184326172,
"step": 915,
"valid_targets_mean": 9724.2,
"valid_targets_min": 4256
},
{
"epoch": 5.055147058823529,
"grad_norm": 0.2720756490768129,
"learning_rate": 8.746545228457864e-06,
"loss": 0.4627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18436521291732788,
"step": 920,
"valid_targets_mean": 10848.2,
"valid_targets_min": 2906
},
{
"epoch": 5.082720588235294,
"grad_norm": 0.2476109752744865,
"learning_rate": 8.520987182496916e-06,
"loss": 0.4615,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1586351841688156,
"step": 925,
"valid_targets_mean": 9801.3,
"valid_targets_min": 2511
},
{
"epoch": 5.110294117647059,
"grad_norm": 0.24561606722052834,
"learning_rate": 8.297585729878328e-06,
"loss": 0.4605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15383873879909515,
"step": 930,
"valid_targets_mean": 10230.4,
"valid_targets_min": 1542
},
{
"epoch": 5.137867647058823,
"grad_norm": 0.23155125931202536,
"learning_rate": 8.076382841640278e-06,
"loss": 0.4595,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15065628290176392,
"step": 935,
"valid_targets_mean": 9486.9,
"valid_targets_min": 2504
},
{
"epoch": 5.165441176470588,
"grad_norm": 0.2386411883344939,
"learning_rate": 7.8574200757707e-06,
"loss": 0.4691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16948550939559937,
"step": 940,
"valid_targets_mean": 10931.1,
"valid_targets_min": 1488
},
{
"epoch": 5.193014705882353,
"grad_norm": 0.24623043409190853,
"learning_rate": 7.640738569399645e-06,
"loss": 0.4627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17989715933799744,
"step": 945,
"valid_targets_mean": 11805.3,
"valid_targets_min": 5070
},
{
"epoch": 5.220588235294118,
"grad_norm": 0.25645332420887573,
"learning_rate": 7.426379031070736e-06,
"loss": 0.4653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14318367838859558,
"step": 950,
"valid_targets_mean": 9511.3,
"valid_targets_min": 2171
},
{
"epoch": 5.248161764705882,
"grad_norm": 0.2509019424889472,
"learning_rate": 7.214381733093156e-06,
"loss": 0.4623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14922644197940826,
"step": 955,
"valid_targets_mean": 9984.3,
"valid_targets_min": 3056
},
{
"epoch": 5.275735294117647,
"grad_norm": 0.22957419096439602,
"learning_rate": 7.004786503975552e-06,
"loss": 0.464,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15641915798187256,
"step": 960,
"valid_targets_mean": 10315.0,
"valid_targets_min": 4133
},
{
"epoch": 5.303308823529412,
"grad_norm": 0.28021168498690796,
"learning_rate": 6.7976327209433855e-06,
"loss": 0.4604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1481720507144928,
"step": 965,
"valid_targets_mean": 9206.2,
"valid_targets_min": 1886
},
{
"epoch": 5.330882352941177,
"grad_norm": 0.2426926206109474,
"learning_rate": 6.592959302541004e-06,
"loss": 0.4589,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.173628568649292,
"step": 970,
"valid_targets_mean": 10906.5,
"valid_targets_min": 4457
},
{
"epoch": 5.358455882352941,
"grad_norm": 0.23348669978255307,
"learning_rate": 6.39080470131989e-06,
"loss": 0.4609,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16939929127693176,
"step": 975,
"valid_targets_mean": 10147.1,
"valid_targets_min": 1450
},
{
"epoch": 5.386029411764706,
"grad_norm": 0.2413777466766637,
"learning_rate": 6.1912068966145145e-06,
"loss": 0.4565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15642428398132324,
"step": 980,
"valid_targets_mean": 9358.1,
"valid_targets_min": 535
},
{
"epoch": 5.413602941176471,
"grad_norm": 0.2388089637438226,
"learning_rate": 5.994203387407036e-06,
"loss": 0.4629,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1432351917028427,
"step": 985,
"valid_targets_mean": 9491.7,
"valid_targets_min": 1802
},
{
"epoch": 5.4411764705882355,
"grad_norm": 0.2190404554471656,
"learning_rate": 5.7998311852822406e-06,
"loss": 0.4633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16985687613487244,
"step": 990,
"valid_targets_mean": 10872.9,
"valid_targets_min": 2969
},
{
"epoch": 5.46875,
"grad_norm": 0.2238371695459484,
"learning_rate": 5.608126807474145e-06,
"loss": 0.4622,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15325827896595,
"step": 995,
"valid_targets_mean": 10413.0,
"valid_targets_min": 2987
},
{
"epoch": 5.4963235294117645,
"grad_norm": 0.23551352199294445,
"learning_rate": 5.419126270005317e-06,
"loss": 0.4626,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14396341145038605,
"step": 1000,
"valid_targets_mean": 9435.3,
"valid_targets_min": 2314
},
{
"epoch": 5.523897058823529,
"grad_norm": 0.24552735754578195,
"learning_rate": 5.23286508092051e-06,
"loss": 0.4557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15775898098945618,
"step": 1005,
"valid_targets_mean": 9846.8,
"valid_targets_min": 2897
},
{
"epoch": 5.551470588235294,
"grad_norm": 0.23811014424649815,
"learning_rate": 5.049378233615652e-06,
"loss": 0.4656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15497168898582458,
"step": 1010,
"valid_targets_mean": 9349.0,
"valid_targets_min": 4354
},
{
"epoch": 5.579044117647059,
"grad_norm": 0.22012023372507486,
"learning_rate": 4.868700200263521e-06,
"loss": 0.4624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1530693769454956,
"step": 1015,
"valid_targets_mean": 10066.7,
"valid_targets_min": 3738
},
{
"epoch": 5.606617647058823,
"grad_norm": 0.2384744886144643,
"learning_rate": 4.690864925337404e-06,
"loss": 0.459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14344365894794464,
"step": 1020,
"valid_targets_mean": 9216.1,
"valid_targets_min": 2135
},
{
"epoch": 5.634191176470588,
"grad_norm": 0.22620664113381828,
"learning_rate": 4.515905819233828e-06,
"loss": 0.4587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15465840697288513,
"step": 1025,
"valid_targets_mean": 9175.2,
"valid_targets_min": 2058
},
{
"epoch": 5.661764705882353,
"grad_norm": 0.2308131065264548,
"learning_rate": 4.343855751995645e-06,
"loss": 0.463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14798535406589508,
"step": 1030,
"valid_targets_mean": 9069.3,
"valid_targets_min": 1487
},
{
"epoch": 5.689338235294118,
"grad_norm": 0.20210750386819545,
"learning_rate": 4.174747047136707e-06,
"loss": 0.4629,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13314756751060486,
"step": 1035,
"valid_targets_mean": 8539.2,
"valid_targets_min": 2094
},
{
"epoch": 5.716911764705882,
"grad_norm": 0.2182748594468666,
"learning_rate": 4.008611475569082e-06,
"loss": 0.4623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1582449972629547,
"step": 1040,
"valid_targets_mean": 10496.3,
"valid_targets_min": 1687
},
{
"epoch": 5.744485294117647,
"grad_norm": 0.22255571373558947,
"learning_rate": 3.845480249634226e-06,
"loss": 0.4664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15279759466648102,
"step": 1045,
"valid_targets_mean": 9399.6,
"valid_targets_min": 2004
},
{
"epoch": 5.772058823529412,
"grad_norm": 0.23780299660682128,
"learning_rate": 3.685384017239013e-06,
"loss": 0.4563,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1619456708431244,
"step": 1050,
"valid_targets_mean": 10361.6,
"valid_targets_min": 2616
},
{
"epoch": 5.799632352941177,
"grad_norm": 0.2149753999385376,
"learning_rate": 3.5283528560978163e-06,
"loss": 0.4606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15598419308662415,
"step": 1055,
"valid_targets_mean": 10178.3,
"valid_targets_min": 1567
},
{
"epoch": 5.827205882352941,
"grad_norm": 0.23604543135606404,
"learning_rate": 3.3744162680817526e-06,
"loss": 0.463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15586526691913605,
"step": 1060,
"valid_targets_mean": 10143.1,
"valid_targets_min": 2785
},
{
"epoch": 5.854779411764706,
"grad_norm": 0.21782882942010112,
"learning_rate": 3.2236031736760775e-06,
"loss": 0.4628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1613750457763672,
"step": 1065,
"valid_targets_mean": 10578.9,
"valid_targets_min": 3416
},
{
"epoch": 5.882352941176471,
"grad_norm": 0.24360429896673724,
"learning_rate": 3.075941906546789e-06,
"loss": 0.4643,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17128227651119232,
"step": 1070,
"valid_targets_mean": 10810.2,
"valid_targets_min": 4489
},
{
"epoch": 5.9099264705882355,
"grad_norm": 0.21853281708343872,
"learning_rate": 2.9314602082175624e-06,
"loss": 0.4634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1510457843542099,
"step": 1075,
"valid_targets_mean": 9562.4,
"valid_targets_min": 2854
},
{
"epoch": 5.9375,
"grad_norm": 0.21718471329556974,
"learning_rate": 2.790185222857804e-06,
"loss": 0.4581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14728333055973053,
"step": 1080,
"valid_targets_mean": 9295.6,
"valid_targets_min": 1896
},
{
"epoch": 5.9650735294117645,
"grad_norm": 0.21759757406424912,
"learning_rate": 2.6521434921830593e-06,
"loss": 0.4602,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16067388653755188,
"step": 1085,
"valid_targets_mean": 9997.4,
"valid_targets_min": 1567
},
{
"epoch": 5.992647058823529,
"grad_norm": 0.2372799465315037,
"learning_rate": 2.517360950468519e-06,
"loss": 0.4564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14969328045845032,
"step": 1090,
"valid_targets_mean": 9974.2,
"valid_targets_min": 2365
},
{
"epoch": 6.016544117647059,
"grad_norm": 0.22070664266477452,
"learning_rate": 2.3858629196766846e-06,
"loss": 0.4622,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14457306265830994,
"step": 1095,
"valid_targets_mean": 8829.4,
"valid_targets_min": 2041
},
{
"epoch": 6.044117647058823,
"grad_norm": 0.23770514220168587,
"learning_rate": 2.2576741047000605e-06,
"loss": 0.4634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1596033126115799,
"step": 1100,
"valid_targets_mean": 10972.9,
"valid_targets_min": 5689
},
{
"epoch": 6.071691176470588,
"grad_norm": 0.2380064578876329,
"learning_rate": 2.1328185887197872e-06,
"loss": 0.4583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16930876672267914,
"step": 1105,
"valid_targets_mean": 9747.8,
"valid_targets_min": 2320
},
{
"epoch": 6.099264705882353,
"grad_norm": 0.22187661591130384,
"learning_rate": 2.011319828681049e-06,
"loss": 0.4545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1560242772102356,
"step": 1110,
"valid_targets_mean": 9941.7,
"valid_targets_min": 1490
},
{
"epoch": 6.126838235294118,
"grad_norm": 0.22871735612785934,
"learning_rate": 1.8932006508861866e-06,
"loss": 0.4582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13863195478916168,
"step": 1115,
"valid_targets_mean": 8947.5,
"valid_targets_min": 1633
},
{
"epoch": 6.154411764705882,
"grad_norm": 0.220041969177945,
"learning_rate": 1.7784832467062129e-06,
"loss": 0.4631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16414231061935425,
"step": 1120,
"valid_targets_mean": 10703.4,
"valid_targets_min": 1974
},
{
"epoch": 6.181985294117647,
"grad_norm": 0.22496390430322033,
"learning_rate": 1.6671891684117048e-06,
"loss": 0.4559,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16701489686965942,
"step": 1125,
"valid_targets_mean": 11036.8,
"valid_targets_min": 1223
},
{
"epoch": 6.209558823529412,
"grad_norm": 0.23390335972144527,
"learning_rate": 1.55933932512369e-06,
"loss": 0.4602,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15574294328689575,
"step": 1130,
"valid_targets_mean": 9363.7,
"valid_targets_min": 2356
},
{
"epoch": 6.237132352941177,
"grad_norm": 0.21186034476089416,
"learning_rate": 1.4549539788853984e-06,
"loss": 0.4616,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.167169451713562,
"step": 1135,
"valid_targets_mean": 9769.6,
"valid_targets_min": 1886
},
{
"epoch": 6.264705882352941,
"grad_norm": 0.24246484888375872,
"learning_rate": 1.3540527408555915e-06,
"loss": 0.4573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13978439569473267,
"step": 1140,
"valid_targets_mean": 9072.4,
"valid_targets_min": 1851
},
{
"epoch": 6.292279411764706,
"grad_norm": 0.20852258029588774,
"learning_rate": 1.2566545676241494e-06,
"loss": 0.4636,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16217908263206482,
"step": 1145,
"valid_targets_mean": 10326.9,
"valid_targets_min": 2180
},
{
"epoch": 6.319852941176471,
"grad_norm": 0.259220830437003,
"learning_rate": 1.1627777576506306e-06,
"loss": 0.4647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13484236598014832,
"step": 1150,
"valid_targets_mean": 8036.9,
"valid_targets_min": 2414
},
{
"epoch": 6.3474264705882355,
"grad_norm": 0.23165660017295853,
"learning_rate": 1.0724399478265312e-06,
"loss": 0.4619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15904231369495392,
"step": 1155,
"valid_targets_mean": 10136.1,
"valid_targets_min": 3152
},
{
"epoch": 6.375,
"grad_norm": 0.21620185766223732,
"learning_rate": 9.85658110161747e-07,
"loss": 0.463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15874940156936646,
"step": 1160,
"valid_targets_mean": 10696.2,
"valid_targets_min": 1469
},
{
"epoch": 6.4025735294117645,
"grad_norm": 0.2266371932340415,
"learning_rate": 9.02448548596031e-07,
"loss": 0.4628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15737676620483398,
"step": 1165,
"valid_targets_mean": 10155.7,
"valid_targets_min": 1265
},
{
"epoch": 6.430147058823529,
"grad_norm": 0.22665087403909726,
"learning_rate": 8.228268959359086e-07,
"loss": 0.4626,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15128636360168457,
"step": 1170,
"valid_targets_mean": 9104.2,
"valid_targets_min": 1738
},
{
"epoch": 6.457720588235294,
"grad_norm": 0.279337484358945,
"learning_rate": 7.468081109177028e-07,
"loss": 0.4574,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13439278304576874,
"step": 1175,
"valid_targets_mean": 8199.8,
"valid_targets_min": 1505
},
{
"epoch": 6.485294117647059,
"grad_norm": 0.22106817456760308,
"learning_rate": 6.744064753972068e-07,
"loss": 0.4646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15889695286750793,
"step": 1180,
"valid_targets_mean": 9808.5,
"valid_targets_min": 2024
},
{
"epoch": 6.512867647058823,
"grad_norm": 0.20952794028596322,
"learning_rate": 6.056355916665024e-07,
"loss": 0.4559,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16096243262290955,
"step": 1185,
"valid_targets_mean": 10794.8,
"valid_targets_min": 3436
},
{
"epoch": 6.540441176470588,
"grad_norm": 0.25549266747636473,
"learning_rate": 5.405083798984567e-07,
"loss": 0.4541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1415504813194275,
"step": 1190,
"valid_targets_mean": 8488.8,
"valid_targets_min": 1557
},
{
"epoch": 6.568014705882353,
"grad_norm": 0.21576881708453866,
"learning_rate": 4.790370757193907e-07,
"loss": 0.4566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14048513770103455,
"step": 1195,
"valid_targets_mean": 8573.9,
"valid_targets_min": 1413
},
{
"epoch": 6.595588235294118,
"grad_norm": 0.21385907663265988,
"learning_rate": 4.212332279103204e-07,
"loss": 0.4538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14507344365119934,
"step": 1200,
"valid_targets_mean": 8611.7,
"valid_targets_min": 1369
},
{
"epoch": 6.623161764705882,
"grad_norm": 0.27787320041061075,
"learning_rate": 3.671076962372655e-07,
"loss": 0.4598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17089204490184784,
"step": 1205,
"valid_targets_mean": 10808.4,
"valid_targets_min": 2900
},
{
"epoch": 6.650735294117647,
"grad_norm": 0.21747531396757017,
"learning_rate": 3.1667064941099724e-07,
"loss": 0.4568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14115549623966217,
"step": 1210,
"valid_targets_mean": 8620.9,
"valid_targets_min": 2139
},
{
"epoch": 6.678308823529412,
"grad_norm": 0.23882168189925346,
"learning_rate": 2.699315631766064e-07,
"loss": 0.4632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1577366292476654,
"step": 1215,
"valid_targets_mean": 10010.7,
"valid_targets_min": 2525
},
{
"epoch": 6.705882352941177,
"grad_norm": 0.21158133975405694,
"learning_rate": 2.26899218533283e-07,
"loss": 0.46,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15490460395812988,
"step": 1220,
"valid_targets_mean": 10206.4,
"valid_targets_min": 1948
},
{
"epoch": 6.733455882352941,
"grad_norm": 0.2068070148831523,
"learning_rate": 1.8758170008459142e-07,
"loss": 0.4624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17629992961883545,
"step": 1225,
"valid_targets_mean": 11098.0,
"valid_targets_min": 2855
},
{
"epoch": 6.761029411764706,
"grad_norm": 0.276570316072206,
"learning_rate": 1.5198639451960095e-07,
"loss": 0.4548,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1544700562953949,
"step": 1230,
"valid_targets_mean": 10218.4,
"valid_targets_min": 1690
},
{
"epoch": 6.788602941176471,
"grad_norm": 0.24887539900389968,
"learning_rate": 1.201199892251337e-07,
"loss": 0.4555,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16659650206565857,
"step": 1235,
"valid_targets_mean": 11563.8,
"valid_targets_min": 1522
},
{
"epoch": 6.8161764705882355,
"grad_norm": 0.2186914252195666,
"learning_rate": 9.198847102937614e-08,
"loss": 0.4552,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15176278352737427,
"step": 1240,
"valid_targets_mean": 9547.0,
"valid_targets_min": 2825
},
{
"epoch": 6.84375,
"grad_norm": 0.20820326483482113,
"learning_rate": 6.759712507711902e-08,
"loss": 0.4622,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15682631731033325,
"step": 1245,
"valid_targets_mean": 9983.1,
"valid_targets_min": 2230
},
{
"epoch": 6.8713235294117645,
"grad_norm": 0.22180946034862845,
"learning_rate": 4.695053383683812e-08,
"loss": 0.457,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14503879845142365,
"step": 1250,
"valid_targets_mean": 8904.7,
"valid_targets_min": 1080
},
{
"epoch": 6.898897058823529,
"grad_norm": 0.2383751709023781,
"learning_rate": 3.0052576239749666e-08,
"loss": 0.4554,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1317831426858902,
"step": 1255,
"valid_targets_mean": 8981.2,
"valid_targets_min": 1992
},
{
"epoch": 6.926470588235294,
"grad_norm": 0.2107262711334655,
"learning_rate": 1.6906426951086573e-08,
"loss": 0.4581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14984363317489624,
"step": 1260,
"valid_targets_mean": 10053.0,
"valid_targets_min": 3060
},
{
"epoch": 6.954044117647059,
"grad_norm": 0.2623126377291314,
"learning_rate": 7.514555773648901e-09,
"loss": 0.4614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16520237922668457,
"step": 1265,
"valid_targets_mean": 10900.3,
"valid_targets_min": 3734
},
{
"epoch": 6.981617647058823,
"grad_norm": 0.2030352524829392,
"learning_rate": 1.8787271838083263e-09,
"loss": 0.4614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15681001543998718,
"step": 1270,
"valid_targets_mean": 10194.2,
"valid_targets_min": 4309
},
{
"epoch": 7.0,
"step": 1274,
"total_flos": 5.319956987025293e+18,
"train_loss": 0.0,
"train_runtime": 1.2382,
"train_samples_per_second": 98264.932,
"train_steps_per_second": 1028.893
}
],
"logging_steps": 5,
"max_steps": 1274,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.319956987025293e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}