Files
nemotron-terminal-scientifi…/trainer_state.json
ModelHub XC 93f8dd95a9 初始化项目,由ModelHub XC社区提供模型
Model: laion/nemotron-terminal-scientific_computing__Qwen3-8B
Source: Original Platform
2026-04-25 01:11:05 +08:00

2277 lines
63 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1015,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.024630541871921183,
"grad_norm": 10.743034651431167,
"learning_rate": 1.5686274509803923e-06,
"loss": 0.8575,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29485759139060974,
"step": 5,
"valid_targets_mean": 9236.3,
"valid_targets_min": 1969
},
{
"epoch": 0.04926108374384237,
"grad_norm": 4.965625847050405,
"learning_rate": 3.529411764705883e-06,
"loss": 0.8189,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27123022079467773,
"step": 10,
"valid_targets_mean": 9325.8,
"valid_targets_min": 845
},
{
"epoch": 0.07389162561576355,
"grad_norm": 1.5709764932057944,
"learning_rate": 5.4901960784313735e-06,
"loss": 0.7166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22228774428367615,
"step": 15,
"valid_targets_mean": 8391.1,
"valid_targets_min": 1557
},
{
"epoch": 0.09852216748768473,
"grad_norm": 1.1070034152098873,
"learning_rate": 7.450980392156863e-06,
"loss": 0.6554,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19853773713111877,
"step": 20,
"valid_targets_mean": 8116.2,
"valid_targets_min": 1655
},
{
"epoch": 0.12315270935960591,
"grad_norm": 0.7952086999098457,
"learning_rate": 9.411764705882354e-06,
"loss": 0.6269,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2077217698097229,
"step": 25,
"valid_targets_mean": 9368.0,
"valid_targets_min": 2823
},
{
"epoch": 0.1477832512315271,
"grad_norm": 0.649366154808441,
"learning_rate": 1.1372549019607844e-05,
"loss": 0.5914,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18282532691955566,
"step": 30,
"valid_targets_mean": 8624.1,
"valid_targets_min": 3036
},
{
"epoch": 0.1724137931034483,
"grad_norm": 0.4388941481235093,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.5478,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1840602457523346,
"step": 35,
"valid_targets_mean": 9535.8,
"valid_targets_min": 3381
},
{
"epoch": 0.19704433497536947,
"grad_norm": 0.3731243643842238,
"learning_rate": 1.5294117647058822e-05,
"loss": 0.5168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15646323561668396,
"step": 40,
"valid_targets_mean": 7694.9,
"valid_targets_min": 1593
},
{
"epoch": 0.22167487684729065,
"grad_norm": 0.314999922874189,
"learning_rate": 1.7254901960784314e-05,
"loss": 0.5032,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15275967121124268,
"step": 45,
"valid_targets_mean": 8356.0,
"valid_targets_min": 2848
},
{
"epoch": 0.24630541871921183,
"grad_norm": 0.27241302838236864,
"learning_rate": 1.9215686274509807e-05,
"loss": 0.482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1699313223361969,
"step": 50,
"valid_targets_mean": 8695.2,
"valid_targets_min": 1878
},
{
"epoch": 0.270935960591133,
"grad_norm": 0.24171316383457545,
"learning_rate": 2.1176470588235296e-05,
"loss": 0.4697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15137627720832825,
"step": 55,
"valid_targets_mean": 8684.1,
"valid_targets_min": 2323
},
{
"epoch": 0.2955665024630542,
"grad_norm": 0.2752759358588667,
"learning_rate": 2.3137254901960788e-05,
"loss": 0.4524,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15112951397895813,
"step": 60,
"valid_targets_mean": 8717.0,
"valid_targets_min": 352
},
{
"epoch": 0.32019704433497537,
"grad_norm": 0.3256829290721947,
"learning_rate": 2.5098039215686277e-05,
"loss": 0.4471,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15601575374603271,
"step": 65,
"valid_targets_mean": 8975.6,
"valid_targets_min": 777
},
{
"epoch": 0.3448275862068966,
"grad_norm": 0.26016520519307323,
"learning_rate": 2.705882352941177e-05,
"loss": 0.4394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14817699790000916,
"step": 70,
"valid_targets_mean": 8358.4,
"valid_targets_min": 4473
},
{
"epoch": 0.3694581280788177,
"grad_norm": 0.27210923180659075,
"learning_rate": 2.9019607843137258e-05,
"loss": 0.4316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13734804093837738,
"step": 75,
"valid_targets_mean": 8500.8,
"valid_targets_min": 1844
},
{
"epoch": 0.39408866995073893,
"grad_norm": 0.2762491285053443,
"learning_rate": 3.098039215686275e-05,
"loss": 0.4175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12119661271572113,
"step": 80,
"valid_targets_mean": 8555.7,
"valid_targets_min": 2666
},
{
"epoch": 0.4187192118226601,
"grad_norm": 0.27953653823930036,
"learning_rate": 3.294117647058824e-05,
"loss": 0.4151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13656310737133026,
"step": 85,
"valid_targets_mean": 9335.4,
"valid_targets_min": 2375
},
{
"epoch": 0.4433497536945813,
"grad_norm": 0.27762179667403303,
"learning_rate": 3.490196078431373e-05,
"loss": 0.4137,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14938178658485413,
"step": 90,
"valid_targets_mean": 9836.8,
"valid_targets_min": 4990
},
{
"epoch": 0.46798029556650245,
"grad_norm": 0.30533872508264365,
"learning_rate": 3.686274509803922e-05,
"loss": 0.4065,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12681949138641357,
"step": 95,
"valid_targets_mean": 8551.3,
"valid_targets_min": 2982
},
{
"epoch": 0.49261083743842365,
"grad_norm": 0.3178616151302271,
"learning_rate": 3.882352941176471e-05,
"loss": 0.4092,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13543638586997986,
"step": 100,
"valid_targets_mean": 8156.8,
"valid_targets_min": 378
},
{
"epoch": 0.5172413793103449,
"grad_norm": 0.3480550602168805,
"learning_rate": 3.999952639479403e-05,
"loss": 0.4025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1413741409778595,
"step": 105,
"valid_targets_mean": 8588.5,
"valid_targets_min": 2292
},
{
"epoch": 0.541871921182266,
"grad_norm": 0.2886232799853676,
"learning_rate": 3.999419859382013e-05,
"loss": 0.4066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12243609875440598,
"step": 110,
"valid_targets_mean": 8215.7,
"valid_targets_min": 996
},
{
"epoch": 0.5665024630541872,
"grad_norm": 0.30402912482935435,
"learning_rate": 3.99829525676357e-05,
"loss": 0.3992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13463672995567322,
"step": 115,
"valid_targets_mean": 9568.1,
"valid_targets_min": 1704
},
{
"epoch": 0.5911330049261084,
"grad_norm": 0.31173572113829323,
"learning_rate": 3.996579164503212e-05,
"loss": 0.4006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13769567012786865,
"step": 120,
"valid_targets_mean": 8704.1,
"valid_targets_min": 1267
},
{
"epoch": 0.6157635467980296,
"grad_norm": 0.2681375523972611,
"learning_rate": 3.9942720905593045e-05,
"loss": 0.4021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1356489658355713,
"step": 125,
"valid_targets_mean": 9078.8,
"valid_targets_min": 1116
},
{
"epoch": 0.6403940886699507,
"grad_norm": 0.3358135332937248,
"learning_rate": 3.991374717819092e-05,
"loss": 0.3953,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14018836617469788,
"step": 130,
"valid_targets_mean": 9290.2,
"valid_targets_min": 3098
},
{
"epoch": 0.6650246305418719,
"grad_norm": 0.3112144751609365,
"learning_rate": 3.987887903896564e-05,
"loss": 0.3917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13718704879283905,
"step": 135,
"valid_targets_mean": 8901.3,
"valid_targets_min": 2172
},
{
"epoch": 0.6896551724137931,
"grad_norm": 0.37646691210233985,
"learning_rate": 3.9838126808786006e-05,
"loss": 0.392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1294289231300354,
"step": 140,
"valid_targets_mean": 9038.9,
"valid_targets_min": 2299
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.2878512375569559,
"learning_rate": 3.9791502550194803e-05,
"loss": 0.3869,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1291024088859558,
"step": 145,
"valid_targets_mean": 9420.4,
"valid_targets_min": 1430
},
{
"epoch": 0.7389162561576355,
"grad_norm": 0.24042444621370282,
"learning_rate": 3.973902006383831e-05,
"loss": 0.388,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13769212365150452,
"step": 150,
"valid_targets_mean": 8992.5,
"valid_targets_min": 897
},
{
"epoch": 0.7635467980295566,
"grad_norm": 0.310912607819178,
"learning_rate": 3.968069488438139e-05,
"loss": 0.3824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12700515985488892,
"step": 155,
"valid_targets_mean": 8964.4,
"valid_targets_min": 349
},
{
"epoch": 0.7881773399014779,
"grad_norm": 0.2655434672665567,
"learning_rate": 3.9616544275909195e-05,
"loss": 0.3812,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11202602088451385,
"step": 160,
"valid_targets_mean": 8055.7,
"valid_targets_min": 434
},
{
"epoch": 0.812807881773399,
"grad_norm": 0.26868643875179116,
"learning_rate": 3.954658722681712e-05,
"loss": 0.3816,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12862944602966309,
"step": 165,
"valid_targets_mean": 9318.7,
"valid_targets_min": 4356
},
{
"epoch": 0.8374384236453202,
"grad_norm": 0.28733672498228385,
"learning_rate": 3.9470844444190246e-05,
"loss": 0.3849,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13202814757823944,
"step": 170,
"valid_targets_mean": 9219.0,
"valid_targets_min": 2164
},
{
"epoch": 0.8620689655172413,
"grad_norm": 0.28313069036684496,
"learning_rate": 3.938933834767414e-05,
"loss": 0.3823,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1158134713768959,
"step": 175,
"valid_targets_mean": 8608.2,
"valid_targets_min": 467
},
{
"epoch": 0.8866995073891626,
"grad_norm": 0.26052104379161,
"learning_rate": 3.930209306283867e-05,
"loss": 0.3743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11961864680051804,
"step": 180,
"valid_targets_mean": 7993.2,
"valid_targets_min": 3574
},
{
"epoch": 0.9113300492610837,
"grad_norm": 0.258686222921349,
"learning_rate": 3.9209134414036925e-05,
"loss": 0.3729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10488448292016983,
"step": 185,
"valid_targets_mean": 8279.7,
"valid_targets_min": 1136
},
{
"epoch": 0.9359605911330049,
"grad_norm": 0.26136503207196515,
"learning_rate": 3.9110489916761276e-05,
"loss": 0.3801,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13918861746788025,
"step": 190,
"valid_targets_mean": 9786.9,
"valid_targets_min": 3188
},
{
"epoch": 0.9605911330049262,
"grad_norm": 0.2743644632084587,
"learning_rate": 3.9006188769498865e-05,
"loss": 0.3741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13338595628738403,
"step": 195,
"valid_targets_mean": 10200.8,
"valid_targets_min": 5539
},
{
"epoch": 0.9852216748768473,
"grad_norm": 0.24805804478278223,
"learning_rate": 3.8896261845088955e-05,
"loss": 0.3741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12868089973926544,
"step": 200,
"valid_targets_mean": 8725.2,
"valid_targets_min": 2242
},
{
"epoch": 1.0098522167487685,
"grad_norm": 0.27564411587309895,
"learning_rate": 3.8780741681584636e-05,
"loss": 0.3612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10980004072189331,
"step": 205,
"valid_targets_mean": 8534.5,
"valid_targets_min": 3406
},
{
"epoch": 1.0344827586206897,
"grad_norm": 0.2788578201037295,
"learning_rate": 3.865966247262166e-05,
"loss": 0.3593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11370569467544556,
"step": 210,
"valid_targets_mean": 8309.0,
"valid_targets_min": 3283
},
{
"epoch": 1.0591133004926108,
"grad_norm": 0.248625884092612,
"learning_rate": 3.8533060057297235e-05,
"loss": 0.3561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12278437614440918,
"step": 215,
"valid_targets_mean": 8779.9,
"valid_targets_min": 2576
},
{
"epoch": 1.083743842364532,
"grad_norm": 0.2684186476300992,
"learning_rate": 3.840097190956175e-05,
"loss": 0.3581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1202234998345375,
"step": 220,
"valid_targets_mean": 9176.9,
"valid_targets_min": 4137
},
{
"epoch": 1.1083743842364533,
"grad_norm": 0.25632340589308605,
"learning_rate": 3.826343712712658e-05,
"loss": 0.3503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10630079358816147,
"step": 225,
"valid_targets_mean": 8646.0,
"valid_targets_min": 1169
},
{
"epoch": 1.1330049261083743,
"grad_norm": 0.27498707289566154,
"learning_rate": 3.81204964198913e-05,
"loss": 0.3595,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1310245394706726,
"step": 230,
"valid_targets_mean": 9294.8,
"valid_targets_min": 2075
},
{
"epoch": 1.1576354679802956,
"grad_norm": 0.2642154827340513,
"learning_rate": 3.797219209789365e-05,
"loss": 0.3497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1117246225476265,
"step": 235,
"valid_targets_mean": 8429.1,
"valid_targets_min": 1930
},
{
"epoch": 1.1822660098522166,
"grad_norm": 0.2836720432865943,
"learning_rate": 3.7818568058785906e-05,
"loss": 0.3585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11454908549785614,
"step": 240,
"valid_targets_mean": 8133.4,
"valid_targets_min": 390
},
{
"epoch": 1.206896551724138,
"grad_norm": 0.3365645717447432,
"learning_rate": 3.7659669774841274e-05,
"loss": 0.3591,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11681550741195679,
"step": 245,
"valid_targets_mean": 8933.8,
"valid_targets_min": 612
},
{
"epoch": 1.2315270935960592,
"grad_norm": 0.31327662356339114,
"learning_rate": 3.749554427949426e-05,
"loss": 0.351,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1127677857875824,
"step": 250,
"valid_targets_mean": 8292.3,
"valid_targets_min": 2632
},
{
"epoch": 1.2561576354679804,
"grad_norm": 0.23261922775556002,
"learning_rate": 3.7326240153418895e-05,
"loss": 0.3501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1259966492652893,
"step": 255,
"valid_targets_mean": 9653.4,
"valid_targets_min": 3518
},
{
"epoch": 1.2807881773399015,
"grad_norm": 0.2525335827059363,
"learning_rate": 3.7151807510148975e-05,
"loss": 0.3582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11980243027210236,
"step": 260,
"valid_targets_mean": 9146.5,
"valid_targets_min": 1456
},
{
"epoch": 1.3054187192118227,
"grad_norm": 0.3310408716814809,
"learning_rate": 3.697229798124464e-05,
"loss": 0.3577,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11194953322410583,
"step": 265,
"valid_targets_mean": 9172.4,
"valid_targets_min": 3619
},
{
"epoch": 1.3300492610837438,
"grad_norm": 0.2648103991246787,
"learning_rate": 3.678776470100954e-05,
"loss": 0.3545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1303391456604004,
"step": 270,
"valid_targets_mean": 8879.3,
"valid_targets_min": 4536
},
{
"epoch": 1.354679802955665,
"grad_norm": 0.25449167975817794,
"learning_rate": 3.659826229076326e-05,
"loss": 0.3534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1403665840625763,
"step": 275,
"valid_targets_mean": 10481.7,
"valid_targets_min": 4554
},
{
"epoch": 1.3793103448275863,
"grad_norm": 0.2818830814758012,
"learning_rate": 3.640384684267357e-05,
"loss": 0.3469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1068924218416214,
"step": 280,
"valid_targets_mean": 8961.1,
"valid_targets_min": 3230
},
{
"epoch": 1.4039408866995073,
"grad_norm": 0.2693512065658683,
"learning_rate": 3.6204575903153285e-05,
"loss": 0.3532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11001914739608765,
"step": 285,
"valid_targets_mean": 8176.8,
"valid_targets_min": 3715
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.25311056093985246,
"learning_rate": 3.600050845582669e-05,
"loss": 0.3474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12169365584850311,
"step": 290,
"valid_targets_mean": 9581.9,
"valid_targets_min": 4873
},
{
"epoch": 1.4532019704433496,
"grad_norm": 0.2633607633838574,
"learning_rate": 3.57917049040706e-05,
"loss": 0.3484,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1089414581656456,
"step": 295,
"valid_targets_mean": 8416.3,
"valid_targets_min": 425
},
{
"epoch": 1.477832512315271,
"grad_norm": 0.244398881708659,
"learning_rate": 3.557822705313507e-05,
"loss": 0.3518,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11258911341428757,
"step": 300,
"valid_targets_mean": 8594.2,
"valid_targets_min": 2520
},
{
"epoch": 1.5024630541871922,
"grad_norm": 0.24997620578865998,
"learning_rate": 3.5360138091849276e-05,
"loss": 0.3469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11095688492059708,
"step": 305,
"valid_targets_mean": 8580.7,
"valid_targets_min": 3098
},
{
"epoch": 1.5270935960591134,
"grad_norm": 0.2245714689080682,
"learning_rate": 3.513750257391778e-05,
"loss": 0.3508,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1221964880824089,
"step": 310,
"valid_targets_mean": 9254.5,
"valid_targets_min": 3804
},
{
"epoch": 1.5517241379310345,
"grad_norm": 0.25608384850505034,
"learning_rate": 3.4910386398812784e-05,
"loss": 0.3477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1219601258635521,
"step": 315,
"valid_targets_mean": 8776.5,
"valid_targets_min": 3199
},
{
"epoch": 1.5763546798029555,
"grad_norm": 0.3647096406549712,
"learning_rate": 3.467885679226817e-05,
"loss": 0.3485,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1203581690788269,
"step": 320,
"valid_targets_mean": 9392.0,
"valid_targets_min": 3986
},
{
"epoch": 1.6009852216748768,
"grad_norm": 0.24346253896666947,
"learning_rate": 3.444298228638077e-05,
"loss": 0.3532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12136732041835785,
"step": 325,
"valid_targets_mean": 9895.2,
"valid_targets_min": 4423
},
{
"epoch": 1.625615763546798,
"grad_norm": 0.2428346066186375,
"learning_rate": 3.420283269932514e-05,
"loss": 0.345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11618741601705551,
"step": 330,
"valid_targets_mean": 8762.7,
"valid_targets_min": 3014
},
{
"epoch": 1.6502463054187193,
"grad_norm": 0.24297086102914583,
"learning_rate": 3.3958479114687515e-05,
"loss": 0.3458,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11359558999538422,
"step": 335,
"valid_targets_mean": 9019.1,
"valid_targets_min": 2745
},
{
"epoch": 1.6748768472906403,
"grad_norm": 0.23444365454203103,
"learning_rate": 3.3709993860425346e-05,
"loss": 0.3473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1222086101770401,
"step": 340,
"valid_targets_mean": 9328.7,
"valid_targets_min": 2345
},
{
"epoch": 1.6995073891625616,
"grad_norm": 0.22889649248851593,
"learning_rate": 3.345745048745838e-05,
"loss": 0.3516,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11141127347946167,
"step": 345,
"valid_targets_mean": 8866.8,
"valid_targets_min": 366
},
{
"epoch": 1.7241379310344827,
"grad_norm": 0.2343831962801274,
"learning_rate": 3.320092374789782e-05,
"loss": 0.3546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11277924478054047,
"step": 350,
"valid_targets_mean": 8265.9,
"valid_targets_min": 375
},
{
"epoch": 1.748768472906404,
"grad_norm": 0.2467687483921903,
"learning_rate": 3.2940489572919917e-05,
"loss": 0.3472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11757723987102509,
"step": 355,
"valid_targets_mean": 9222.7,
"valid_targets_min": 3977
},
{
"epoch": 1.7733990147783252,
"grad_norm": 0.2576031897620249,
"learning_rate": 3.267622505029053e-05,
"loss": 0.3448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1083230972290039,
"step": 360,
"valid_targets_mean": 8165.4,
"valid_targets_min": 2847
},
{
"epoch": 1.7980295566502464,
"grad_norm": 0.2847455800013209,
"learning_rate": 3.24082084015474e-05,
"loss": 0.3492,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10450009256601334,
"step": 365,
"valid_targets_mean": 7683.3,
"valid_targets_min": 384
},
{
"epoch": 1.8226600985221675,
"grad_norm": 0.24889533758877141,
"learning_rate": 3.213651895884683e-05,
"loss": 0.3508,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12324145436286926,
"step": 370,
"valid_targets_mean": 9273.4,
"valid_targets_min": 339
},
{
"epoch": 1.8472906403940885,
"grad_norm": 0.2509726775951525,
"learning_rate": 3.1861237141481506e-05,
"loss": 0.3463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10529517382383347,
"step": 375,
"valid_targets_mean": 7376.7,
"valid_targets_min": 2690
},
{
"epoch": 1.8719211822660098,
"grad_norm": 0.30513053329486167,
"learning_rate": 3.158244443207671e-05,
"loss": 0.3535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12324077636003494,
"step": 380,
"valid_targets_mean": 8858.1,
"valid_targets_min": 2118
},
{
"epoch": 1.896551724137931,
"grad_norm": 0.257201744023466,
"learning_rate": 3.130022335247163e-05,
"loss": 0.3417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12891273200511932,
"step": 385,
"valid_targets_mean": 9345.1,
"valid_targets_min": 5281
},
{
"epoch": 1.9211822660098523,
"grad_norm": 0.24998558537607088,
"learning_rate": 3.101465743929318e-05,
"loss": 0.345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11640559881925583,
"step": 390,
"valid_targets_mean": 9220.7,
"valid_targets_min": 2002
},
{
"epoch": 1.9458128078817734,
"grad_norm": 0.22830611933571576,
"learning_rate": 3.072583121922939e-05,
"loss": 0.3455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11128903925418854,
"step": 395,
"valid_targets_mean": 8883.6,
"valid_targets_min": 2488
},
{
"epoch": 1.9704433497536946,
"grad_norm": 0.22396942549404023,
"learning_rate": 3.0433830184009694e-05,
"loss": 0.3453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11893004179000854,
"step": 400,
"valid_targets_mean": 8981.7,
"valid_targets_min": 2763
},
{
"epoch": 1.9950738916256157,
"grad_norm": 0.24644092979609167,
"learning_rate": 3.0138740765099724e-05,
"loss": 0.3489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1190701425075531,
"step": 405,
"valid_targets_mean": 8123.2,
"valid_targets_min": 415
},
{
"epoch": 2.019704433497537,
"grad_norm": 0.2650309894835959,
"learning_rate": 2.984065030811776e-05,
"loss": 0.3243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11054711788892746,
"step": 410,
"valid_targets_mean": 8905.1,
"valid_targets_min": 859
},
{
"epoch": 2.044334975369458,
"grad_norm": 0.22732032307524816,
"learning_rate": 2.9539647046980716e-05,
"loss": 0.3208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10819808393716812,
"step": 415,
"valid_targets_mean": 8779.8,
"valid_targets_min": 2486
},
{
"epoch": 2.0689655172413794,
"grad_norm": 0.25644205605632525,
"learning_rate": 2.923582007778716e-05,
"loss": 0.3251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10985960066318512,
"step": 420,
"valid_targets_mean": 9446.7,
"valid_targets_min": 3496
},
{
"epoch": 2.0935960591133007,
"grad_norm": 0.24088638698690903,
"learning_rate": 2.8929259332445096e-05,
"loss": 0.3271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10128775238990784,
"step": 425,
"valid_targets_mean": 9092.7,
"valid_targets_min": 983
},
{
"epoch": 2.1182266009852215,
"grad_norm": 0.24541067677362202,
"learning_rate": 2.8620055552052403e-05,
"loss": 0.3258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11216947436332703,
"step": 430,
"valid_targets_mean": 8788.4,
"valid_targets_min": 3614
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.2482079004721641,
"learning_rate": 2.8308300260037734e-05,
"loss": 0.3224,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1115947961807251,
"step": 435,
"valid_targets_mean": 9745.7,
"valid_targets_min": 3675
},
{
"epoch": 2.167487684729064,
"grad_norm": 0.22913552233839607,
"learning_rate": 2.7994085735069814e-05,
"loss": 0.325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10517291724681854,
"step": 440,
"valid_targets_mean": 8710.5,
"valid_targets_min": 3662
},
{
"epoch": 2.1921182266009853,
"grad_norm": 0.24146267991687,
"learning_rate": 2.767750498374327e-05,
"loss": 0.3197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12296651303768158,
"step": 445,
"valid_targets_mean": 8849.4,
"valid_targets_min": 535
},
{
"epoch": 2.2167487684729066,
"grad_norm": 0.21827048442446823,
"learning_rate": 2.735865171304889e-05,
"loss": 0.3188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10590243339538574,
"step": 450,
"valid_targets_mean": 8600.2,
"valid_targets_min": 3249
},
{
"epoch": 2.2413793103448274,
"grad_norm": 0.23028098738931826,
"learning_rate": 2.703762030263666e-05,
"loss": 0.3243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09583649784326553,
"step": 455,
"valid_targets_mean": 8295.3,
"valid_targets_min": 3305
},
{
"epoch": 2.2660098522167487,
"grad_norm": 0.22275379600509554,
"learning_rate": 2.6714505776879666e-05,
"loss": 0.3212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1080237478017807,
"step": 460,
"valid_targets_mean": 9274.6,
"valid_targets_min": 2483
},
{
"epoch": 2.29064039408867,
"grad_norm": 0.2207031504793314,
"learning_rate": 2.6389403776747116e-05,
"loss": 0.3182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11194107681512833,
"step": 465,
"valid_targets_mean": 9042.2,
"valid_targets_min": 4617
},
{
"epoch": 2.315270935960591,
"grad_norm": 0.2241399700657853,
"learning_rate": 2.606241053149492e-05,
"loss": 0.3244,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10940317064523697,
"step": 470,
"valid_targets_mean": 9518.9,
"valid_targets_min": 1468
},
{
"epoch": 2.3399014778325125,
"grad_norm": 0.24058313899023676,
"learning_rate": 2.5733622830182095e-05,
"loss": 0.327,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09529858827590942,
"step": 475,
"valid_targets_mean": 8239.7,
"valid_targets_min": 251
},
{
"epoch": 2.3645320197044333,
"grad_norm": 0.23562354398146523,
"learning_rate": 2.5403137993021483e-05,
"loss": 0.3234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09677626192569733,
"step": 480,
"valid_targets_mean": 8106.5,
"valid_targets_min": 639
},
{
"epoch": 2.3891625615763545,
"grad_norm": 0.2530852630178121,
"learning_rate": 2.5071053842573264e-05,
"loss": 0.3223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10545364022254944,
"step": 485,
"valid_targets_mean": 9255.8,
"valid_targets_min": 1844
},
{
"epoch": 2.413793103448276,
"grad_norm": 0.21881629429554156,
"learning_rate": 2.473746867478973e-05,
"loss": 0.3208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11455640196800232,
"step": 490,
"valid_targets_mean": 9913.8,
"valid_targets_min": 3181
},
{
"epoch": 2.438423645320197,
"grad_norm": 0.2524273469977475,
"learning_rate": 2.4402481229919982e-05,
"loss": 0.325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10793764144182205,
"step": 495,
"valid_targets_mean": 9409.1,
"valid_targets_min": 386
},
{
"epoch": 2.4630541871921183,
"grad_norm": 0.21182409471855082,
"learning_rate": 2.406619066328311e-05,
"loss": 0.321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10518929362297058,
"step": 500,
"valid_targets_mean": 9363.0,
"valid_targets_min": 4145
},
{
"epoch": 2.4876847290640396,
"grad_norm": 0.2191040471839855,
"learning_rate": 2.3728696515918496e-05,
"loss": 0.3213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10714466869831085,
"step": 505,
"valid_targets_mean": 8711.5,
"valid_targets_min": 667
},
{
"epoch": 2.512315270935961,
"grad_norm": 0.2448105118684568,
"learning_rate": 2.3390098685121938e-05,
"loss": 0.322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09540620446205139,
"step": 510,
"valid_targets_mean": 8153.1,
"valid_targets_min": 3180
},
{
"epoch": 2.5369458128078817,
"grad_norm": 0.220518003206731,
"learning_rate": 2.3050497394876363e-05,
"loss": 0.3222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10619120299816132,
"step": 515,
"valid_targets_mean": 8894.3,
"valid_targets_min": 2359
},
{
"epoch": 2.561576354679803,
"grad_norm": 0.23717022293975634,
"learning_rate": 2.2709993166185803e-05,
"loss": 0.3341,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10815994441509247,
"step": 520,
"valid_targets_mean": 8602.2,
"valid_targets_min": 4126
},
{
"epoch": 2.586206896551724,
"grad_norm": 0.2312939895442153,
"learning_rate": 2.2368686787321475e-05,
"loss": 0.3242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10106731951236725,
"step": 525,
"valid_targets_mean": 7950.1,
"valid_targets_min": 410
},
{
"epoch": 2.6108374384236455,
"grad_norm": 0.22544526165656265,
"learning_rate": 2.2026679283988727e-05,
"loss": 0.3196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10532703995704651,
"step": 530,
"valid_targets_mean": 9430.9,
"valid_targets_min": 4000
},
{
"epoch": 2.6354679802955667,
"grad_norm": 0.20231326994814502,
"learning_rate": 2.168407188942373e-05,
"loss": 0.3221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11423169821500778,
"step": 535,
"valid_targets_mean": 9679.7,
"valid_targets_min": 2166
},
{
"epoch": 2.6600985221674875,
"grad_norm": 0.20631608444099137,
"learning_rate": 2.1340966014428744e-05,
"loss": 0.322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09936245530843735,
"step": 540,
"valid_targets_mean": 8590.2,
"valid_targets_min": 366
},
{
"epoch": 2.684729064039409,
"grad_norm": 0.2264625162075009,
"learning_rate": 2.0997463217354803e-05,
"loss": 0.3207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11137133836746216,
"step": 545,
"valid_targets_mean": 8606.8,
"valid_targets_min": 2707
},
{
"epoch": 2.70935960591133,
"grad_norm": 0.19869783835319854,
"learning_rate": 2.065366517404071e-05,
"loss": 0.3208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11436265707015991,
"step": 550,
"valid_targets_mean": 9982.7,
"valid_targets_min": 5207
},
{
"epoch": 2.7339901477832513,
"grad_norm": 0.21361086043176336,
"learning_rate": 2.030967364771733e-05,
"loss": 0.317,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10924769937992096,
"step": 555,
"valid_targets_mean": 8423.0,
"valid_targets_min": 3589
},
{
"epoch": 2.7586206896551726,
"grad_norm": 0.2159779190682014,
"learning_rate": 1.996559045888593e-05,
"loss": 0.3193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11431802809238434,
"step": 560,
"valid_targets_mean": 9245.8,
"valid_targets_min": 796
},
{
"epoch": 2.7832512315270934,
"grad_norm": 0.19784699425306007,
"learning_rate": 1.9621517455179627e-05,
"loss": 0.323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11673343181610107,
"step": 565,
"valid_targets_mean": 9697.0,
"valid_targets_min": 4763
},
{
"epoch": 2.8078817733990147,
"grad_norm": 0.21421367114423392,
"learning_rate": 1.9277556481216737e-05,
"loss": 0.3192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10330238938331604,
"step": 570,
"valid_targets_mean": 8288.4,
"valid_targets_min": 424
},
{
"epoch": 2.832512315270936,
"grad_norm": 0.19836582938289005,
"learning_rate": 1.893380934845514e-05,
"loss": 0.3226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12032057344913483,
"step": 575,
"valid_targets_mean": 8848.4,
"valid_targets_min": 2745
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.21631083193557546,
"learning_rate": 1.8590377805056306e-05,
"loss": 0.3192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10158735513687134,
"step": 580,
"valid_targets_mean": 8897.4,
"valid_targets_min": 2826
},
{
"epoch": 2.8817733990147785,
"grad_norm": 0.20981357344340126,
"learning_rate": 1.8247363505768177e-05,
"loss": 0.3153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09952034801244736,
"step": 585,
"valid_targets_mean": 8201.6,
"valid_targets_min": 1306
},
{
"epoch": 2.9064039408866993,
"grad_norm": 0.2271419153781403,
"learning_rate": 1.7904867981835617e-05,
"loss": 0.3229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11091038584709167,
"step": 590,
"valid_targets_mean": 9144.8,
"valid_targets_min": 1621
},
{
"epoch": 2.9310344827586206,
"grad_norm": 0.20711350475630588,
"learning_rate": 1.7562992610947517e-05,
"loss": 0.3165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0879313200712204,
"step": 595,
"valid_targets_mean": 6960.7,
"valid_targets_min": 341
},
{
"epoch": 2.955665024630542,
"grad_norm": 0.20228134899502237,
"learning_rate": 1.7221838587229215e-05,
"loss": 0.3234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12365391105413437,
"step": 600,
"valid_targets_mean": 9605.3,
"valid_targets_min": 417
},
{
"epoch": 2.980295566502463,
"grad_norm": 0.2081591697388915,
"learning_rate": 1.6881506891289386e-05,
"loss": 0.3164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10903681814670563,
"step": 605,
"valid_targets_mean": 8928.9,
"valid_targets_min": 2255
},
{
"epoch": 3.0049261083743843,
"grad_norm": 0.23994026035218605,
"learning_rate": 1.654209826033004e-05,
"loss": 0.3148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10147841274738312,
"step": 610,
"valid_targets_mean": 8829.4,
"valid_targets_min": 4278
},
{
"epoch": 3.0295566502463056,
"grad_norm": 0.22674405029866287,
"learning_rate": 1.6203713158328626e-05,
"loss": 0.3046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10697513818740845,
"step": 615,
"valid_targets_mean": 9735.6,
"valid_targets_min": 755
},
{
"epoch": 3.0541871921182264,
"grad_norm": 0.23064581576560553,
"learning_rate": 1.586645174630094e-05,
"loss": 0.2992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0928591638803482,
"step": 620,
"valid_targets_mean": 8492.0,
"valid_targets_min": 3198
},
{
"epoch": 3.0788177339901477,
"grad_norm": 0.23288072128103748,
"learning_rate": 1.5530413852653816e-05,
"loss": 0.3028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10058435797691345,
"step": 625,
"valid_targets_mean": 8571.2,
"valid_targets_min": 228
},
{
"epoch": 3.103448275862069,
"grad_norm": 0.21989355607355687,
"learning_rate": 1.5195698943636135e-05,
"loss": 0.2978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09678147733211517,
"step": 630,
"valid_targets_mean": 8105.6,
"valid_targets_min": 1260
},
{
"epoch": 3.12807881773399,
"grad_norm": 0.19891006352522667,
"learning_rate": 1.4862406093897175e-05,
"loss": 0.2999,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09648677706718445,
"step": 635,
"valid_targets_mean": 8988.8,
"valid_targets_min": 363
},
{
"epoch": 3.1527093596059115,
"grad_norm": 0.24655653698082503,
"learning_rate": 1.4530633957160733e-05,
"loss": 0.3012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08152036368846893,
"step": 640,
"valid_targets_mean": 7848.4,
"valid_targets_min": 2993
},
{
"epoch": 3.1773399014778327,
"grad_norm": 0.22762238372136157,
"learning_rate": 1.4200480737023943e-05,
"loss": 0.3004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10375897586345673,
"step": 645,
"valid_targets_mean": 9312.4,
"valid_targets_min": 2486
},
{
"epoch": 3.2019704433497536,
"grad_norm": 0.22534789337999453,
"learning_rate": 1.3872044157889297e-05,
"loss": 0.3019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09716019779443741,
"step": 650,
"valid_targets_mean": 8545.7,
"valid_targets_min": 1935
},
{
"epoch": 3.226600985221675,
"grad_norm": 0.20514988270828538,
"learning_rate": 1.3545421436038477e-05,
"loss": 0.3094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10193373262882233,
"step": 655,
"valid_targets_mean": 8863.9,
"valid_targets_min": 2907
},
{
"epoch": 3.251231527093596,
"grad_norm": 0.1978171411281838,
"learning_rate": 1.3220709250856656e-05,
"loss": 0.3058,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0844145119190216,
"step": 660,
"valid_targets_mean": 8168.6,
"valid_targets_min": 2005
},
{
"epoch": 3.2758620689655173,
"grad_norm": 0.20349161652396405,
"learning_rate": 1.2898003716215626e-05,
"loss": 0.2999,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10471087694168091,
"step": 665,
"valid_targets_mean": 9535.9,
"valid_targets_min": 3279
},
{
"epoch": 3.3004926108374386,
"grad_norm": 0.19864565640995638,
"learning_rate": 1.2577400352024426e-05,
"loss": 0.3011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09200199693441391,
"step": 670,
"valid_targets_mean": 8635.4,
"valid_targets_min": 2501
},
{
"epoch": 3.3251231527093594,
"grad_norm": 0.2096280531918653,
"learning_rate": 1.2258994055955658e-05,
"loss": 0.2993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09174078702926636,
"step": 675,
"valid_targets_mean": 8636.1,
"valid_targets_min": 2381
},
{
"epoch": 3.3497536945812807,
"grad_norm": 0.21102625245478873,
"learning_rate": 1.1942879075356135e-05,
"loss": 0.3,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10436161607503891,
"step": 680,
"valid_targets_mean": 9196.2,
"valid_targets_min": 2880
},
{
"epoch": 3.374384236453202,
"grad_norm": 0.20074354219907545,
"learning_rate": 1.1629148979349836e-05,
"loss": 0.2964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09593068063259125,
"step": 685,
"valid_targets_mean": 9236.0,
"valid_targets_min": 3690
},
{
"epoch": 3.399014778325123,
"grad_norm": 0.21770217204418577,
"learning_rate": 1.1317896631141814e-05,
"loss": 0.3052,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10593973100185394,
"step": 690,
"valid_targets_mean": 8596.7,
"valid_targets_min": 467
},
{
"epoch": 3.4236453201970445,
"grad_norm": 0.202033129923407,
"learning_rate": 1.1009214160530875e-05,
"loss": 0.3036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08792443573474884,
"step": 695,
"valid_targets_mean": 7999.6,
"valid_targets_min": 3034
},
{
"epoch": 3.4482758620689653,
"grad_norm": 0.19377905050918098,
"learning_rate": 1.0703192936639481e-05,
"loss": 0.3022,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08651701360940933,
"step": 700,
"valid_targets_mean": 8115.2,
"valid_targets_min": 2167
},
{
"epoch": 3.4729064039408866,
"grad_norm": 0.18949986820075895,
"learning_rate": 1.0399923540868712e-05,
"loss": 0.3059,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10459959506988525,
"step": 705,
"valid_targets_mean": 9690.8,
"valid_targets_min": 3185
},
{
"epoch": 3.497536945812808,
"grad_norm": 0.2011804523127738,
"learning_rate": 1.0099495740086454e-05,
"loss": 0.3009,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10490408539772034,
"step": 710,
"valid_targets_mean": 8907.2,
"valid_targets_min": 2179
},
{
"epoch": 3.522167487684729,
"grad_norm": 0.20548088230585662,
"learning_rate": 9.801998460056643e-06,
"loss": 0.3038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10383732616901398,
"step": 715,
"valid_targets_mean": 8324.9,
"valid_targets_min": 983
},
{
"epoch": 3.5467980295566504,
"grad_norm": 0.19683329981111766,
"learning_rate": 9.507519759117546e-06,
"loss": 0.2991,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09702172875404358,
"step": 720,
"valid_targets_mean": 8744.2,
"valid_targets_min": 3967
},
{
"epoch": 3.571428571428571,
"grad_norm": 0.19625336438708468,
"learning_rate": 9.216146802116676e-06,
"loss": 0.3053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09151807427406311,
"step": 725,
"valid_targets_mean": 8401.1,
"valid_targets_min": 1020
},
{
"epoch": 3.596059113300493,
"grad_norm": 0.18708247392345115,
"learning_rate": 8.92796583461031e-06,
"loss": 0.3064,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09643843024969101,
"step": 730,
"valid_targets_mean": 8835.6,
"valid_targets_min": 796
},
{
"epoch": 3.6206896551724137,
"grad_norm": 0.19360040135948756,
"learning_rate": 8.643062157335e-06,
"loss": 0.2994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08726485073566437,
"step": 735,
"valid_targets_mean": 8767.6,
"valid_targets_min": 1621
},
{
"epoch": 3.645320197044335,
"grad_norm": 0.18440066609355732,
"learning_rate": 8.361520100958856e-06,
"loss": 0.2982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08847818523645401,
"step": 740,
"valid_targets_mean": 9271.4,
"valid_targets_min": 4099
},
{
"epoch": 3.6699507389162562,
"grad_norm": 0.18646457682094164,
"learning_rate": 8.083423001119855e-06,
"loss": 0.3,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09122344851493835,
"step": 745,
"valid_targets_mean": 8637.3,
"valid_targets_min": 2856
},
{
"epoch": 3.6945812807881775,
"grad_norm": 0.20175487117378538,
"learning_rate": 7.80885317375877e-06,
"loss": 0.2991,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10354309529066086,
"step": 750,
"valid_targets_mean": 8972.3,
"valid_targets_min": 390
},
{
"epoch": 3.7192118226600988,
"grad_norm": 0.18258436718922763,
"learning_rate": 7.537891890753879e-06,
"loss": 0.2975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10517409443855286,
"step": 755,
"valid_targets_mean": 9500.0,
"valid_targets_min": 3694
},
{
"epoch": 3.7438423645320196,
"grad_norm": 0.1888454473684914,
"learning_rate": 7.27061935586471e-06,
"loss": 0.3027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10511292517185211,
"step": 760,
"valid_targets_mean": 9314.8,
"valid_targets_min": 3498
},
{
"epoch": 3.768472906403941,
"grad_norm": 0.1906383407925554,
"learning_rate": 7.007114680991995e-06,
"loss": 0.3049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11089085787534714,
"step": 765,
"valid_targets_mean": 9594.2,
"valid_targets_min": 3836
},
{
"epoch": 3.793103448275862,
"grad_norm": 0.18789808493594767,
"learning_rate": 6.747455862760723e-06,
"loss": 0.3018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0920998752117157,
"step": 770,
"valid_targets_mean": 7893.3,
"valid_targets_min": 3364
},
{
"epoch": 3.8177339901477834,
"grad_norm": 0.1946470476381285,
"learning_rate": 6.491719759433414e-06,
"loss": 0.3021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09941431879997253,
"step": 775,
"valid_targets_mean": 8573.0,
"valid_targets_min": 2428
},
{
"epoch": 3.8423645320197046,
"grad_norm": 0.18335741825763685,
"learning_rate": 6.239982068160251e-06,
"loss": 0.2989,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10005414485931396,
"step": 780,
"valid_targets_mean": 8930.7,
"valid_targets_min": 3592
},
{
"epoch": 3.8669950738916254,
"grad_norm": 0.19394167335220494,
"learning_rate": 5.9923173025729895e-06,
"loss": 0.3018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0917779877781868,
"step": 785,
"valid_targets_mean": 8102.1,
"valid_targets_min": 2504
},
{
"epoch": 3.8916256157635467,
"grad_norm": 0.18120225767769269,
"learning_rate": 5.748798770729071e-06,
"loss": 0.2968,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09662172943353653,
"step": 790,
"valid_targets_mean": 9064.0,
"valid_targets_min": 2340
},
{
"epoch": 3.916256157635468,
"grad_norm": 0.19190077238373823,
"learning_rate": 5.509498553412727e-06,
"loss": 0.3041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10005126893520355,
"step": 795,
"valid_targets_mean": 8898.5,
"valid_targets_min": 794
},
{
"epoch": 3.9408866995073892,
"grad_norm": 0.17974352936982052,
"learning_rate": 5.274487482799206e-06,
"loss": 0.2993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09953851997852325,
"step": 800,
"valid_targets_mean": 8959.3,
"valid_targets_min": 322
},
{
"epoch": 3.9655172413793105,
"grad_norm": 0.18698159849014778,
"learning_rate": 5.04383512148871e-06,
"loss": 0.307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10734543204307556,
"step": 805,
"valid_targets_mean": 9183.6,
"valid_targets_min": 3696
},
{
"epoch": 3.9901477832512313,
"grad_norm": 0.18442090586080964,
"learning_rate": 4.817609741916009e-06,
"loss": 0.3056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0857645720243454,
"step": 810,
"valid_targets_mean": 7837.0,
"valid_targets_min": 2597
},
{
"epoch": 4.014778325123153,
"grad_norm": 0.18552650025158898,
"learning_rate": 4.595878306142059e-06,
"loss": 0.2883,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09557130932807922,
"step": 815,
"valid_targets_mean": 8530.3,
"valid_targets_min": 1990
},
{
"epoch": 4.039408866995074,
"grad_norm": 0.19107540271966575,
"learning_rate": 4.37870644603336e-06,
"loss": 0.2915,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08814047276973724,
"step": 820,
"valid_targets_mean": 8476.8,
"valid_targets_min": 2709
},
{
"epoch": 4.064039408866995,
"grad_norm": 0.21852387313861962,
"learning_rate": 4.1661584438351645e-06,
"loss": 0.2911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08739292621612549,
"step": 825,
"valid_targets_mean": 8461.2,
"valid_targets_min": 352
},
{
"epoch": 4.088669950738916,
"grad_norm": 0.1839668348467172,
"learning_rate": 3.958297213144084e-06,
"loss": 0.2928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07950074970722198,
"step": 830,
"valid_targets_mean": 7643.1,
"valid_targets_min": 1946
},
{
"epoch": 4.113300492610837,
"grad_norm": 0.18845310147717448,
"learning_rate": 3.7551842802858772e-06,
"loss": 0.2892,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10723338276147842,
"step": 835,
"valid_targets_mean": 9729.5,
"valid_targets_min": 4630
},
{
"epoch": 4.137931034482759,
"grad_norm": 0.1940491672102659,
"learning_rate": 3.5568797661038004e-06,
"loss": 0.2913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09230605512857437,
"step": 840,
"valid_targets_mean": 8704.1,
"valid_targets_min": 3915
},
{
"epoch": 4.16256157635468,
"grad_norm": 0.18469213419035746,
"learning_rate": 3.3634423681630392e-06,
"loss": 0.2926,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10094676911830902,
"step": 845,
"valid_targets_mean": 9719.5,
"valid_targets_min": 4368
},
{
"epoch": 4.187192118226601,
"grad_norm": 0.18737436904852822,
"learning_rate": 3.174929343376374e-06,
"loss": 0.2942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09700489789247513,
"step": 850,
"valid_targets_mean": 9506.8,
"valid_targets_min": 859
},
{
"epoch": 4.211822660098522,
"grad_norm": 0.18423499331496113,
"learning_rate": 2.991396491056331e-06,
"loss": 0.2835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10383116453886032,
"step": 855,
"valid_targets_mean": 9004.8,
"valid_targets_min": 2993
},
{
"epoch": 4.236453201970443,
"grad_norm": 0.19530539384844203,
"learning_rate": 2.812898136398705e-06,
"loss": 0.2938,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10272261500358582,
"step": 860,
"valid_targets_mean": 10417.8,
"valid_targets_min": 2835
},
{
"epoch": 4.261083743842365,
"grad_norm": 0.1768936566418224,
"learning_rate": 2.6394871144024926e-06,
"loss": 0.2879,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08824898302555084,
"step": 865,
"valid_targets_mean": 8173.8,
"valid_targets_min": 761
},
{
"epoch": 4.285714285714286,
"grad_norm": 0.18803866498160385,
"learning_rate": 2.471214754230866e-06,
"loss": 0.2894,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09874047338962555,
"step": 870,
"valid_targets_mean": 8255.1,
"valid_targets_min": 3232
},
{
"epoch": 4.310344827586207,
"grad_norm": 0.18305855723051326,
"learning_rate": 2.3081308640178945e-06,
"loss": 0.2893,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09291456639766693,
"step": 875,
"valid_targets_mean": 8635.8,
"valid_targets_min": 3871
},
{
"epoch": 4.334975369458128,
"grad_norm": 0.17966738406863703,
"learning_rate": 2.1502837161254873e-06,
"loss": 0.2916,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10657632350921631,
"step": 880,
"valid_targets_mean": 9381.9,
"valid_targets_min": 3764
},
{
"epoch": 4.359605911330049,
"grad_norm": 0.16961839703667175,
"learning_rate": 1.9977200328548953e-06,
"loss": 0.2853,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09088948369026184,
"step": 885,
"valid_targets_mean": 8735.8,
"valid_targets_min": 2093
},
{
"epoch": 4.384236453201971,
"grad_norm": 0.17218142432918168,
"learning_rate": 1.8504849726170637e-06,
"loss": 0.2913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09188289195299149,
"step": 890,
"valid_targets_mean": 8830.7,
"valid_targets_min": 2083
},
{
"epoch": 4.4088669950738915,
"grad_norm": 0.1813446374116515,
"learning_rate": 1.7086221165658544e-06,
"loss": 0.2897,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09599706530570984,
"step": 895,
"valid_targets_mean": 8826.4,
"valid_targets_min": 3172
},
{
"epoch": 4.433497536945813,
"grad_norm": 0.18498320583010483,
"learning_rate": 1.5721734556981761e-06,
"loss": 0.2918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09900371730327606,
"step": 900,
"valid_targets_mean": 8690.2,
"valid_targets_min": 2278
},
{
"epoch": 4.458128078817734,
"grad_norm": 0.20510849376299953,
"learning_rate": 1.4411793784247263e-06,
"loss": 0.2921,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10054295510053635,
"step": 905,
"valid_targets_mean": 8741.5,
"valid_targets_min": 467
},
{
"epoch": 4.482758620689655,
"grad_norm": 0.1922020499715368,
"learning_rate": 1.3156786586151916e-06,
"loss": 0.294,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10521981865167618,
"step": 910,
"valid_targets_mean": 9091.2,
"valid_targets_min": 3243
},
{
"epoch": 4.5073891625615765,
"grad_norm": 0.17566941817494466,
"learning_rate": 1.195708444121253e-06,
"loss": 0.2912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0981191098690033,
"step": 915,
"valid_targets_mean": 8294.6,
"valid_targets_min": 3384
},
{
"epoch": 4.532019704433497,
"grad_norm": 0.18568976979390986,
"learning_rate": 1.0813042457809497e-06,
"loss": 0.2891,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08985172212123871,
"step": 920,
"valid_targets_mean": 8663.1,
"valid_targets_min": 667
},
{
"epoch": 4.556650246305419,
"grad_norm": 0.1809563651428104,
"learning_rate": 9.724999269075598e-07,
"loss": 0.2919,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09896715730428696,
"step": 925,
"valid_targets_mean": 8589.9,
"valid_targets_min": 399
},
{
"epoch": 4.58128078817734,
"grad_norm": 0.1826346698165506,
"learning_rate": 8.693276932661732e-07,
"loss": 0.2918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09368403255939484,
"step": 930,
"valid_targets_mean": 8382.1,
"valid_targets_min": 2243
},
{
"epoch": 4.605911330049262,
"grad_norm": 0.17397784390869644,
"learning_rate": 7.718180835408584e-07,
"loss": 0.2899,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09835617244243622,
"step": 935,
"valid_targets_mean": 8955.2,
"valid_targets_min": 378
},
{
"epoch": 4.630541871921182,
"grad_norm": 0.17704960582110957,
"learning_rate": 6.799999602953189e-07,
"loss": 0.2903,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09321548044681549,
"step": 940,
"valid_targets_mean": 8988.6,
"valid_targets_min": 2533
},
{
"epoch": 4.655172413793103,
"grad_norm": 0.1741304537218507,
"learning_rate": 5.939005014296428e-07,
"loss": 0.2904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09430979192256927,
"step": 945,
"valid_targets_mean": 8396.6,
"valid_targets_min": 337
},
{
"epoch": 4.679802955665025,
"grad_norm": 0.16592953591918666,
"learning_rate": 5.135451921357337e-07,
"loss": 0.2939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10379470884799957,
"step": 950,
"valid_targets_mean": 9645.2,
"valid_targets_min": 2567
},
{
"epoch": 4.704433497536946,
"grad_norm": 0.17443346928975734,
"learning_rate": 4.3895781735375566e-07,
"loss": 0.2896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09814979881048203,
"step": 955,
"valid_targets_mean": 8773.4,
"valid_targets_min": 406
},
{
"epoch": 4.7290640394088665,
"grad_norm": 0.17648712594362892,
"learning_rate": 3.70160454731876e-07,
"loss": 0.2935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09249699860811234,
"step": 960,
"valid_targets_mean": 8434.4,
"valid_targets_min": 410
},
{
"epoch": 4.753694581280788,
"grad_norm": 0.16739065144158322,
"learning_rate": 3.0717346809132407e-07,
"loss": 0.2902,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10244876146316528,
"step": 965,
"valid_targets_mean": 9382.5,
"valid_targets_min": 2089
},
{
"epoch": 4.778325123152709,
"grad_norm": 0.1667695775270446,
"learning_rate": 2.5001550139877707e-07,
"loss": 0.2858,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09757320582866669,
"step": 970,
"valid_targets_mean": 9711.1,
"valid_targets_min": 5200
},
{
"epoch": 4.802955665024631,
"grad_norm": 0.17184028910965868,
"learning_rate": 1.987034732477877e-07,
"loss": 0.2892,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09788213670253754,
"step": 975,
"valid_targets_mean": 8935.1,
"valid_targets_min": 3319
},
{
"epoch": 4.827586206896552,
"grad_norm": 0.16740666268964893,
"learning_rate": 1.5325257185093923e-07,
"loss": 0.2851,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09289947897195816,
"step": 980,
"valid_targets_mean": 8989.3,
"valid_targets_min": 504
},
{
"epoch": 4.852216748768473,
"grad_norm": 0.17517346033665723,
"learning_rate": 1.1367625054416575e-07,
"loss": 0.2884,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09196974337100983,
"step": 985,
"valid_targets_mean": 8455.2,
"valid_targets_min": 845
},
{
"epoch": 4.876847290640394,
"grad_norm": 0.17165728117501497,
"learning_rate": 7.998622380461563e-08,
"loss": 0.2921,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10432468354701996,
"step": 990,
"valid_targets_mean": 8684.4,
"valid_targets_min": 387
},
{
"epoch": 4.901477832512315,
"grad_norm": 0.16816491897161798,
"learning_rate": 5.219246378319387e-08,
"loss": 0.2887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0968252569437027,
"step": 995,
"valid_targets_mean": 8864.2,
"valid_targets_min": 4695
},
{
"epoch": 4.926108374384237,
"grad_norm": 0.18359960757267443,
"learning_rate": 3.030319735283449e-08,
"loss": 0.3004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09898233413696289,
"step": 1000,
"valid_targets_mean": 8488.2,
"valid_targets_min": 3522
},
{
"epoch": 4.9507389162561575,
"grad_norm": 0.1729827087715371,
"learning_rate": 1.4324903673370583e-08,
"loss": 0.2872,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10422653704881668,
"step": 1005,
"valid_targets_mean": 9734.5,
"valid_targets_min": 5199
},
{
"epoch": 4.975369458128079,
"grad_norm": 0.1665182843059149,
"learning_rate": 4.262312273721758e-09,
"loss": 0.2894,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08595976233482361,
"step": 1010,
"valid_targets_mean": 7786.4,
"valid_targets_min": 2848
},
{
"epoch": 5.0,
"grad_norm": 0.17102014635406348,
"learning_rate": 1.184016519673037e-10,
"loss": 0.2931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0942406952381134,
"step": 1015,
"valid_targets_mean": 8724.4,
"valid_targets_min": 2268
},
{
"epoch": 5.0,
"step": 1015,
"total_flos": 3.756182037619278e+18,
"train_loss": 0.0,
"train_runtime": 2.2907,
"train_samples_per_second": 42523.793,
"train_steps_per_second": 443.093
}
],
"logging_steps": 5,
"max_steps": 1015,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.756182037619278e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}