2277 lines
63 KiB
JSON
2277 lines
63 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1015,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.024630541871921183,
|
|
"grad_norm": 10.743034651431167,
|
|
"learning_rate": 1.5686274509803923e-06,
|
|
"loss": 0.8575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29485759139060974,
|
|
"step": 5,
|
|
"valid_targets_mean": 9236.3,
|
|
"valid_targets_min": 1969
|
|
},
|
|
{
|
|
"epoch": 0.04926108374384237,
|
|
"grad_norm": 4.965625847050405,
|
|
"learning_rate": 3.529411764705883e-06,
|
|
"loss": 0.8189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27123022079467773,
|
|
"step": 10,
|
|
"valid_targets_mean": 9325.8,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 0.07389162561576355,
|
|
"grad_norm": 1.5709764932057944,
|
|
"learning_rate": 5.4901960784313735e-06,
|
|
"loss": 0.7166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22228774428367615,
|
|
"step": 15,
|
|
"valid_targets_mean": 8391.1,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 0.09852216748768473,
|
|
"grad_norm": 1.1070034152098873,
|
|
"learning_rate": 7.450980392156863e-06,
|
|
"loss": 0.6554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19853773713111877,
|
|
"step": 20,
|
|
"valid_targets_mean": 8116.2,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 0.12315270935960591,
|
|
"grad_norm": 0.7952086999098457,
|
|
"learning_rate": 9.411764705882354e-06,
|
|
"loss": 0.6269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2077217698097229,
|
|
"step": 25,
|
|
"valid_targets_mean": 9368.0,
|
|
"valid_targets_min": 2823
|
|
},
|
|
{
|
|
"epoch": 0.1477832512315271,
|
|
"grad_norm": 0.649366154808441,
|
|
"learning_rate": 1.1372549019607844e-05,
|
|
"loss": 0.5914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18282532691955566,
|
|
"step": 30,
|
|
"valid_targets_mean": 8624.1,
|
|
"valid_targets_min": 3036
|
|
},
|
|
{
|
|
"epoch": 0.1724137931034483,
|
|
"grad_norm": 0.4388941481235093,
|
|
"learning_rate": 1.3333333333333333e-05,
|
|
"loss": 0.5478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1840602457523346,
|
|
"step": 35,
|
|
"valid_targets_mean": 9535.8,
|
|
"valid_targets_min": 3381
|
|
},
|
|
{
|
|
"epoch": 0.19704433497536947,
|
|
"grad_norm": 0.3731243643842238,
|
|
"learning_rate": 1.5294117647058822e-05,
|
|
"loss": 0.5168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15646323561668396,
|
|
"step": 40,
|
|
"valid_targets_mean": 7694.9,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 0.22167487684729065,
|
|
"grad_norm": 0.314999922874189,
|
|
"learning_rate": 1.7254901960784314e-05,
|
|
"loss": 0.5032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15275967121124268,
|
|
"step": 45,
|
|
"valid_targets_mean": 8356.0,
|
|
"valid_targets_min": 2848
|
|
},
|
|
{
|
|
"epoch": 0.24630541871921183,
|
|
"grad_norm": 0.27241302838236864,
|
|
"learning_rate": 1.9215686274509807e-05,
|
|
"loss": 0.482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1699313223361969,
|
|
"step": 50,
|
|
"valid_targets_mean": 8695.2,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 0.270935960591133,
|
|
"grad_norm": 0.24171316383457545,
|
|
"learning_rate": 2.1176470588235296e-05,
|
|
"loss": 0.4697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15137627720832825,
|
|
"step": 55,
|
|
"valid_targets_mean": 8684.1,
|
|
"valid_targets_min": 2323
|
|
},
|
|
{
|
|
"epoch": 0.2955665024630542,
|
|
"grad_norm": 0.2752759358588667,
|
|
"learning_rate": 2.3137254901960788e-05,
|
|
"loss": 0.4524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15112951397895813,
|
|
"step": 60,
|
|
"valid_targets_mean": 8717.0,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 0.32019704433497537,
|
|
"grad_norm": 0.3256829290721947,
|
|
"learning_rate": 2.5098039215686277e-05,
|
|
"loss": 0.4471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15601575374603271,
|
|
"step": 65,
|
|
"valid_targets_mean": 8975.6,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 0.3448275862068966,
|
|
"grad_norm": 0.26016520519307323,
|
|
"learning_rate": 2.705882352941177e-05,
|
|
"loss": 0.4394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14817699790000916,
|
|
"step": 70,
|
|
"valid_targets_mean": 8358.4,
|
|
"valid_targets_min": 4473
|
|
},
|
|
{
|
|
"epoch": 0.3694581280788177,
|
|
"grad_norm": 0.27210923180659075,
|
|
"learning_rate": 2.9019607843137258e-05,
|
|
"loss": 0.4316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13734804093837738,
|
|
"step": 75,
|
|
"valid_targets_mean": 8500.8,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 0.39408866995073893,
|
|
"grad_norm": 0.2762491285053443,
|
|
"learning_rate": 3.098039215686275e-05,
|
|
"loss": 0.4175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12119661271572113,
|
|
"step": 80,
|
|
"valid_targets_mean": 8555.7,
|
|
"valid_targets_min": 2666
|
|
},
|
|
{
|
|
"epoch": 0.4187192118226601,
|
|
"grad_norm": 0.27953653823930036,
|
|
"learning_rate": 3.294117647058824e-05,
|
|
"loss": 0.4151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13656310737133026,
|
|
"step": 85,
|
|
"valid_targets_mean": 9335.4,
|
|
"valid_targets_min": 2375
|
|
},
|
|
{
|
|
"epoch": 0.4433497536945813,
|
|
"grad_norm": 0.27762179667403303,
|
|
"learning_rate": 3.490196078431373e-05,
|
|
"loss": 0.4137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14938178658485413,
|
|
"step": 90,
|
|
"valid_targets_mean": 9836.8,
|
|
"valid_targets_min": 4990
|
|
},
|
|
{
|
|
"epoch": 0.46798029556650245,
|
|
"grad_norm": 0.30533872508264365,
|
|
"learning_rate": 3.686274509803922e-05,
|
|
"loss": 0.4065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12681949138641357,
|
|
"step": 95,
|
|
"valid_targets_mean": 8551.3,
|
|
"valid_targets_min": 2982
|
|
},
|
|
{
|
|
"epoch": 0.49261083743842365,
|
|
"grad_norm": 0.3178616151302271,
|
|
"learning_rate": 3.882352941176471e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13543638586997986,
|
|
"step": 100,
|
|
"valid_targets_mean": 8156.8,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 0.5172413793103449,
|
|
"grad_norm": 0.3480550602168805,
|
|
"learning_rate": 3.999952639479403e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1413741409778595,
|
|
"step": 105,
|
|
"valid_targets_mean": 8588.5,
|
|
"valid_targets_min": 2292
|
|
},
|
|
{
|
|
"epoch": 0.541871921182266,
|
|
"grad_norm": 0.2886232799853676,
|
|
"learning_rate": 3.999419859382013e-05,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12243609875440598,
|
|
"step": 110,
|
|
"valid_targets_mean": 8215.7,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 0.5665024630541872,
|
|
"grad_norm": 0.30402912482935435,
|
|
"learning_rate": 3.99829525676357e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13463672995567322,
|
|
"step": 115,
|
|
"valid_targets_mean": 9568.1,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 0.5911330049261084,
|
|
"grad_norm": 0.31173572113829323,
|
|
"learning_rate": 3.996579164503212e-05,
|
|
"loss": 0.4006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13769567012786865,
|
|
"step": 120,
|
|
"valid_targets_mean": 8704.1,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 0.6157635467980296,
|
|
"grad_norm": 0.2681375523972611,
|
|
"learning_rate": 3.9942720905593045e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1356489658355713,
|
|
"step": 125,
|
|
"valid_targets_mean": 9078.8,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 0.6403940886699507,
|
|
"grad_norm": 0.3358135332937248,
|
|
"learning_rate": 3.991374717819092e-05,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14018836617469788,
|
|
"step": 130,
|
|
"valid_targets_mean": 9290.2,
|
|
"valid_targets_min": 3098
|
|
},
|
|
{
|
|
"epoch": 0.6650246305418719,
|
|
"grad_norm": 0.3112144751609365,
|
|
"learning_rate": 3.987887903896564e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13718704879283905,
|
|
"step": 135,
|
|
"valid_targets_mean": 8901.3,
|
|
"valid_targets_min": 2172
|
|
},
|
|
{
|
|
"epoch": 0.6896551724137931,
|
|
"grad_norm": 0.37646691210233985,
|
|
"learning_rate": 3.9838126808786006e-05,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1294289231300354,
|
|
"step": 140,
|
|
"valid_targets_mean": 9038.9,
|
|
"valid_targets_min": 2299
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 0.2878512375569559,
|
|
"learning_rate": 3.9791502550194803e-05,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1291024088859558,
|
|
"step": 145,
|
|
"valid_targets_mean": 9420.4,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 0.7389162561576355,
|
|
"grad_norm": 0.24042444621370282,
|
|
"learning_rate": 3.973902006383831e-05,
|
|
"loss": 0.388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13769212365150452,
|
|
"step": 150,
|
|
"valid_targets_mean": 8992.5,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 0.7635467980295566,
|
|
"grad_norm": 0.310912607819178,
|
|
"learning_rate": 3.968069488438139e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12700515985488892,
|
|
"step": 155,
|
|
"valid_targets_mean": 8964.4,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 0.7881773399014779,
|
|
"grad_norm": 0.2655434672665567,
|
|
"learning_rate": 3.9616544275909195e-05,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11202602088451385,
|
|
"step": 160,
|
|
"valid_targets_mean": 8055.7,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 0.812807881773399,
|
|
"grad_norm": 0.26868643875179116,
|
|
"learning_rate": 3.954658722681712e-05,
|
|
"loss": 0.3816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12862944602966309,
|
|
"step": 165,
|
|
"valid_targets_mean": 9318.7,
|
|
"valid_targets_min": 4356
|
|
},
|
|
{
|
|
"epoch": 0.8374384236453202,
|
|
"grad_norm": 0.28733672498228385,
|
|
"learning_rate": 3.9470844444190246e-05,
|
|
"loss": 0.3849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13202814757823944,
|
|
"step": 170,
|
|
"valid_targets_mean": 9219.0,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 0.8620689655172413,
|
|
"grad_norm": 0.28313069036684496,
|
|
"learning_rate": 3.938933834767414e-05,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1158134713768959,
|
|
"step": 175,
|
|
"valid_targets_mean": 8608.2,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 0.8866995073891626,
|
|
"grad_norm": 0.26052104379161,
|
|
"learning_rate": 3.930209306283867e-05,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11961864680051804,
|
|
"step": 180,
|
|
"valid_targets_mean": 7993.2,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 0.9113300492610837,
|
|
"grad_norm": 0.258686222921349,
|
|
"learning_rate": 3.9209134414036925e-05,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10488448292016983,
|
|
"step": 185,
|
|
"valid_targets_mean": 8279.7,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 0.9359605911330049,
|
|
"grad_norm": 0.26136503207196515,
|
|
"learning_rate": 3.9110489916761276e-05,
|
|
"loss": 0.3801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13918861746788025,
|
|
"step": 190,
|
|
"valid_targets_mean": 9786.9,
|
|
"valid_targets_min": 3188
|
|
},
|
|
{
|
|
"epoch": 0.9605911330049262,
|
|
"grad_norm": 0.2743644632084587,
|
|
"learning_rate": 3.9006188769498865e-05,
|
|
"loss": 0.3741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13338595628738403,
|
|
"step": 195,
|
|
"valid_targets_mean": 10200.8,
|
|
"valid_targets_min": 5539
|
|
},
|
|
{
|
|
"epoch": 0.9852216748768473,
|
|
"grad_norm": 0.24805804478278223,
|
|
"learning_rate": 3.8896261845088955e-05,
|
|
"loss": 0.3741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12868089973926544,
|
|
"step": 200,
|
|
"valid_targets_mean": 8725.2,
|
|
"valid_targets_min": 2242
|
|
},
|
|
{
|
|
"epoch": 1.0098522167487685,
|
|
"grad_norm": 0.27564411587309895,
|
|
"learning_rate": 3.8780741681584636e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10980004072189331,
|
|
"step": 205,
|
|
"valid_targets_mean": 8534.5,
|
|
"valid_targets_min": 3406
|
|
},
|
|
{
|
|
"epoch": 1.0344827586206897,
|
|
"grad_norm": 0.2788578201037295,
|
|
"learning_rate": 3.865966247262166e-05,
|
|
"loss": 0.3593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11370569467544556,
|
|
"step": 210,
|
|
"valid_targets_mean": 8309.0,
|
|
"valid_targets_min": 3283
|
|
},
|
|
{
|
|
"epoch": 1.0591133004926108,
|
|
"grad_norm": 0.248625884092612,
|
|
"learning_rate": 3.8533060057297235e-05,
|
|
"loss": 0.3561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12278437614440918,
|
|
"step": 215,
|
|
"valid_targets_mean": 8779.9,
|
|
"valid_targets_min": 2576
|
|
},
|
|
{
|
|
"epoch": 1.083743842364532,
|
|
"grad_norm": 0.2684186476300992,
|
|
"learning_rate": 3.840097190956175e-05,
|
|
"loss": 0.3581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1202234998345375,
|
|
"step": 220,
|
|
"valid_targets_mean": 9176.9,
|
|
"valid_targets_min": 4137
|
|
},
|
|
{
|
|
"epoch": 1.1083743842364533,
|
|
"grad_norm": 0.25632340589308605,
|
|
"learning_rate": 3.826343712712658e-05,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10630079358816147,
|
|
"step": 225,
|
|
"valid_targets_mean": 8646.0,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 1.1330049261083743,
|
|
"grad_norm": 0.27498707289566154,
|
|
"learning_rate": 3.81204964198913e-05,
|
|
"loss": 0.3595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310245394706726,
|
|
"step": 230,
|
|
"valid_targets_mean": 9294.8,
|
|
"valid_targets_min": 2075
|
|
},
|
|
{
|
|
"epoch": 1.1576354679802956,
|
|
"grad_norm": 0.2642154827340513,
|
|
"learning_rate": 3.797219209789365e-05,
|
|
"loss": 0.3497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1117246225476265,
|
|
"step": 235,
|
|
"valid_targets_mean": 8429.1,
|
|
"valid_targets_min": 1930
|
|
},
|
|
{
|
|
"epoch": 1.1822660098522166,
|
|
"grad_norm": 0.2836720432865943,
|
|
"learning_rate": 3.7818568058785906e-05,
|
|
"loss": 0.3585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11454908549785614,
|
|
"step": 240,
|
|
"valid_targets_mean": 8133.4,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 1.206896551724138,
|
|
"grad_norm": 0.3365645717447432,
|
|
"learning_rate": 3.7659669774841274e-05,
|
|
"loss": 0.3591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11681550741195679,
|
|
"step": 245,
|
|
"valid_targets_mean": 8933.8,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 1.2315270935960592,
|
|
"grad_norm": 0.31327662356339114,
|
|
"learning_rate": 3.749554427949426e-05,
|
|
"loss": 0.351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1127677857875824,
|
|
"step": 250,
|
|
"valid_targets_mean": 8292.3,
|
|
"valid_targets_min": 2632
|
|
},
|
|
{
|
|
"epoch": 1.2561576354679804,
|
|
"grad_norm": 0.23261922775556002,
|
|
"learning_rate": 3.7326240153418895e-05,
|
|
"loss": 0.3501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259966492652893,
|
|
"step": 255,
|
|
"valid_targets_mean": 9653.4,
|
|
"valid_targets_min": 3518
|
|
},
|
|
{
|
|
"epoch": 1.2807881773399015,
|
|
"grad_norm": 0.2525335827059363,
|
|
"learning_rate": 3.7151807510148975e-05,
|
|
"loss": 0.3582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11980243027210236,
|
|
"step": 260,
|
|
"valid_targets_mean": 9146.5,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 1.3054187192118227,
|
|
"grad_norm": 0.3310408716814809,
|
|
"learning_rate": 3.697229798124464e-05,
|
|
"loss": 0.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11194953322410583,
|
|
"step": 265,
|
|
"valid_targets_mean": 9172.4,
|
|
"valid_targets_min": 3619
|
|
},
|
|
{
|
|
"epoch": 1.3300492610837438,
|
|
"grad_norm": 0.2648103991246787,
|
|
"learning_rate": 3.678776470100954e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303391456604004,
|
|
"step": 270,
|
|
"valid_targets_mean": 8879.3,
|
|
"valid_targets_min": 4536
|
|
},
|
|
{
|
|
"epoch": 1.354679802955665,
|
|
"grad_norm": 0.25449167975817794,
|
|
"learning_rate": 3.659826229076326e-05,
|
|
"loss": 0.3534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1403665840625763,
|
|
"step": 275,
|
|
"valid_targets_mean": 10481.7,
|
|
"valid_targets_min": 4554
|
|
},
|
|
{
|
|
"epoch": 1.3793103448275863,
|
|
"grad_norm": 0.2818830814758012,
|
|
"learning_rate": 3.640384684267357e-05,
|
|
"loss": 0.3469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1068924218416214,
|
|
"step": 280,
|
|
"valid_targets_mean": 8961.1,
|
|
"valid_targets_min": 3230
|
|
},
|
|
{
|
|
"epoch": 1.4039408866995073,
|
|
"grad_norm": 0.2693512065658683,
|
|
"learning_rate": 3.6204575903153285e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11001914739608765,
|
|
"step": 285,
|
|
"valid_targets_mean": 8176.8,
|
|
"valid_targets_min": 3715
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 0.25311056093985246,
|
|
"learning_rate": 3.600050845582669e-05,
|
|
"loss": 0.3474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12169365584850311,
|
|
"step": 290,
|
|
"valid_targets_mean": 9581.9,
|
|
"valid_targets_min": 4873
|
|
},
|
|
{
|
|
"epoch": 1.4532019704433496,
|
|
"grad_norm": 0.2633607633838574,
|
|
"learning_rate": 3.57917049040706e-05,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1089414581656456,
|
|
"step": 295,
|
|
"valid_targets_mean": 8416.3,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 1.477832512315271,
|
|
"grad_norm": 0.244398881708659,
|
|
"learning_rate": 3.557822705313507e-05,
|
|
"loss": 0.3518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11258911341428757,
|
|
"step": 300,
|
|
"valid_targets_mean": 8594.2,
|
|
"valid_targets_min": 2520
|
|
},
|
|
{
|
|
"epoch": 1.5024630541871922,
|
|
"grad_norm": 0.24997620578865998,
|
|
"learning_rate": 3.5360138091849276e-05,
|
|
"loss": 0.3469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11095688492059708,
|
|
"step": 305,
|
|
"valid_targets_mean": 8580.7,
|
|
"valid_targets_min": 3098
|
|
},
|
|
{
|
|
"epoch": 1.5270935960591134,
|
|
"grad_norm": 0.2245714689080682,
|
|
"learning_rate": 3.513750257391778e-05,
|
|
"loss": 0.3508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1221964880824089,
|
|
"step": 310,
|
|
"valid_targets_mean": 9254.5,
|
|
"valid_targets_min": 3804
|
|
},
|
|
{
|
|
"epoch": 1.5517241379310345,
|
|
"grad_norm": 0.25608384850505034,
|
|
"learning_rate": 3.4910386398812784e-05,
|
|
"loss": 0.3477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1219601258635521,
|
|
"step": 315,
|
|
"valid_targets_mean": 8776.5,
|
|
"valid_targets_min": 3199
|
|
},
|
|
{
|
|
"epoch": 1.5763546798029555,
|
|
"grad_norm": 0.3647096406549712,
|
|
"learning_rate": 3.467885679226817e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1203581690788269,
|
|
"step": 320,
|
|
"valid_targets_mean": 9392.0,
|
|
"valid_targets_min": 3986
|
|
},
|
|
{
|
|
"epoch": 1.6009852216748768,
|
|
"grad_norm": 0.24346253896666947,
|
|
"learning_rate": 3.444298228638077e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12136732041835785,
|
|
"step": 325,
|
|
"valid_targets_mean": 9895.2,
|
|
"valid_targets_min": 4423
|
|
},
|
|
{
|
|
"epoch": 1.625615763546798,
|
|
"grad_norm": 0.2428346066186375,
|
|
"learning_rate": 3.420283269932514e-05,
|
|
"loss": 0.345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11618741601705551,
|
|
"step": 330,
|
|
"valid_targets_mean": 8762.7,
|
|
"valid_targets_min": 3014
|
|
},
|
|
{
|
|
"epoch": 1.6502463054187193,
|
|
"grad_norm": 0.24297086102914583,
|
|
"learning_rate": 3.3958479114687515e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11359558999538422,
|
|
"step": 335,
|
|
"valid_targets_mean": 9019.1,
|
|
"valid_targets_min": 2745
|
|
},
|
|
{
|
|
"epoch": 1.6748768472906403,
|
|
"grad_norm": 0.23444365454203103,
|
|
"learning_rate": 3.3709993860425346e-05,
|
|
"loss": 0.3473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1222086101770401,
|
|
"step": 340,
|
|
"valid_targets_mean": 9328.7,
|
|
"valid_targets_min": 2345
|
|
},
|
|
{
|
|
"epoch": 1.6995073891625616,
|
|
"grad_norm": 0.22889649248851593,
|
|
"learning_rate": 3.345745048745838e-05,
|
|
"loss": 0.3516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11141127347946167,
|
|
"step": 345,
|
|
"valid_targets_mean": 8866.8,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 1.7241379310344827,
|
|
"grad_norm": 0.2343831962801274,
|
|
"learning_rate": 3.320092374789782e-05,
|
|
"loss": 0.3546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11277924478054047,
|
|
"step": 350,
|
|
"valid_targets_mean": 8265.9,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 1.748768472906404,
|
|
"grad_norm": 0.2467687483921903,
|
|
"learning_rate": 3.2940489572919917e-05,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11757723987102509,
|
|
"step": 355,
|
|
"valid_targets_mean": 9222.7,
|
|
"valid_targets_min": 3977
|
|
},
|
|
{
|
|
"epoch": 1.7733990147783252,
|
|
"grad_norm": 0.2576031897620249,
|
|
"learning_rate": 3.267622505029053e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1083230972290039,
|
|
"step": 360,
|
|
"valid_targets_mean": 8165.4,
|
|
"valid_targets_min": 2847
|
|
},
|
|
{
|
|
"epoch": 1.7980295566502464,
|
|
"grad_norm": 0.2847455800013209,
|
|
"learning_rate": 3.24082084015474e-05,
|
|
"loss": 0.3492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10450009256601334,
|
|
"step": 365,
|
|
"valid_targets_mean": 7683.3,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 1.8226600985221675,
|
|
"grad_norm": 0.24889533758877141,
|
|
"learning_rate": 3.213651895884683e-05,
|
|
"loss": 0.3508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12324145436286926,
|
|
"step": 370,
|
|
"valid_targets_mean": 9273.4,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 1.8472906403940885,
|
|
"grad_norm": 0.2509726775951525,
|
|
"learning_rate": 3.1861237141481506e-05,
|
|
"loss": 0.3463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10529517382383347,
|
|
"step": 375,
|
|
"valid_targets_mean": 7376.7,
|
|
"valid_targets_min": 2690
|
|
},
|
|
{
|
|
"epoch": 1.8719211822660098,
|
|
"grad_norm": 0.30513053329486167,
|
|
"learning_rate": 3.158244443207671e-05,
|
|
"loss": 0.3535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12324077636003494,
|
|
"step": 380,
|
|
"valid_targets_mean": 8858.1,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 1.896551724137931,
|
|
"grad_norm": 0.257201744023466,
|
|
"learning_rate": 3.130022335247163e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12891273200511932,
|
|
"step": 385,
|
|
"valid_targets_mean": 9345.1,
|
|
"valid_targets_min": 5281
|
|
},
|
|
{
|
|
"epoch": 1.9211822660098523,
|
|
"grad_norm": 0.24998558537607088,
|
|
"learning_rate": 3.101465743929318e-05,
|
|
"loss": 0.345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11640559881925583,
|
|
"step": 390,
|
|
"valid_targets_mean": 9220.7,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 1.9458128078817734,
|
|
"grad_norm": 0.22830611933571576,
|
|
"learning_rate": 3.072583121922939e-05,
|
|
"loss": 0.3455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11128903925418854,
|
|
"step": 395,
|
|
"valid_targets_mean": 8883.6,
|
|
"valid_targets_min": 2488
|
|
},
|
|
{
|
|
"epoch": 1.9704433497536946,
|
|
"grad_norm": 0.22396942549404023,
|
|
"learning_rate": 3.0433830184009694e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11893004179000854,
|
|
"step": 400,
|
|
"valid_targets_mean": 8981.7,
|
|
"valid_targets_min": 2763
|
|
},
|
|
{
|
|
"epoch": 1.9950738916256157,
|
|
"grad_norm": 0.24644092979609167,
|
|
"learning_rate": 3.0138740765099724e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1190701425075531,
|
|
"step": 405,
|
|
"valid_targets_mean": 8123.2,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 2.019704433497537,
|
|
"grad_norm": 0.2650309894835959,
|
|
"learning_rate": 2.984065030811776e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11054711788892746,
|
|
"step": 410,
|
|
"valid_targets_mean": 8905.1,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 2.044334975369458,
|
|
"grad_norm": 0.22732032307524816,
|
|
"learning_rate": 2.9539647046980716e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10819808393716812,
|
|
"step": 415,
|
|
"valid_targets_mean": 8779.8,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 2.0689655172413794,
|
|
"grad_norm": 0.25644205605632525,
|
|
"learning_rate": 2.923582007778716e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10985960066318512,
|
|
"step": 420,
|
|
"valid_targets_mean": 9446.7,
|
|
"valid_targets_min": 3496
|
|
},
|
|
{
|
|
"epoch": 2.0935960591133007,
|
|
"grad_norm": 0.24088638698690903,
|
|
"learning_rate": 2.8929259332445096e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10128775238990784,
|
|
"step": 425,
|
|
"valid_targets_mean": 9092.7,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 2.1182266009852215,
|
|
"grad_norm": 0.24541067677362202,
|
|
"learning_rate": 2.8620055552052403e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11216947436332703,
|
|
"step": 430,
|
|
"valid_targets_mean": 8788.4,
|
|
"valid_targets_min": 3614
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 0.2482079004721641,
|
|
"learning_rate": 2.8308300260037734e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1115947961807251,
|
|
"step": 435,
|
|
"valid_targets_mean": 9745.7,
|
|
"valid_targets_min": 3675
|
|
},
|
|
{
|
|
"epoch": 2.167487684729064,
|
|
"grad_norm": 0.22913552233839607,
|
|
"learning_rate": 2.7994085735069814e-05,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10517291724681854,
|
|
"step": 440,
|
|
"valid_targets_mean": 8710.5,
|
|
"valid_targets_min": 3662
|
|
},
|
|
{
|
|
"epoch": 2.1921182266009853,
|
|
"grad_norm": 0.24146267991687,
|
|
"learning_rate": 2.767750498374327e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12296651303768158,
|
|
"step": 445,
|
|
"valid_targets_mean": 8849.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.2167487684729066,
|
|
"grad_norm": 0.21827048442446823,
|
|
"learning_rate": 2.735865171304889e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10590243339538574,
|
|
"step": 450,
|
|
"valid_targets_mean": 8600.2,
|
|
"valid_targets_min": 3249
|
|
},
|
|
{
|
|
"epoch": 2.2413793103448274,
|
|
"grad_norm": 0.23028098738931826,
|
|
"learning_rate": 2.703762030263666e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09583649784326553,
|
|
"step": 455,
|
|
"valid_targets_mean": 8295.3,
|
|
"valid_targets_min": 3305
|
|
},
|
|
{
|
|
"epoch": 2.2660098522167487,
|
|
"grad_norm": 0.22275379600509554,
|
|
"learning_rate": 2.6714505776879666e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1080237478017807,
|
|
"step": 460,
|
|
"valid_targets_mean": 9274.6,
|
|
"valid_targets_min": 2483
|
|
},
|
|
{
|
|
"epoch": 2.29064039408867,
|
|
"grad_norm": 0.2207031504793314,
|
|
"learning_rate": 2.6389403776747116e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11194107681512833,
|
|
"step": 465,
|
|
"valid_targets_mean": 9042.2,
|
|
"valid_targets_min": 4617
|
|
},
|
|
{
|
|
"epoch": 2.315270935960591,
|
|
"grad_norm": 0.2241399700657853,
|
|
"learning_rate": 2.606241053149492e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10940317064523697,
|
|
"step": 470,
|
|
"valid_targets_mean": 9518.9,
|
|
"valid_targets_min": 1468
|
|
},
|
|
{
|
|
"epoch": 2.3399014778325125,
|
|
"grad_norm": 0.24058313899023676,
|
|
"learning_rate": 2.5733622830182095e-05,
|
|
"loss": 0.327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09529858827590942,
|
|
"step": 475,
|
|
"valid_targets_mean": 8239.7,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 2.3645320197044333,
|
|
"grad_norm": 0.23562354398146523,
|
|
"learning_rate": 2.5403137993021483e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09677626192569733,
|
|
"step": 480,
|
|
"valid_targets_mean": 8106.5,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 2.3891625615763545,
|
|
"grad_norm": 0.2530852630178121,
|
|
"learning_rate": 2.5071053842573264e-05,
|
|
"loss": 0.3223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10545364022254944,
|
|
"step": 485,
|
|
"valid_targets_mean": 9255.8,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 2.413793103448276,
|
|
"grad_norm": 0.21881629429554156,
|
|
"learning_rate": 2.473746867478973e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11455640196800232,
|
|
"step": 490,
|
|
"valid_targets_mean": 9913.8,
|
|
"valid_targets_min": 3181
|
|
},
|
|
{
|
|
"epoch": 2.438423645320197,
|
|
"grad_norm": 0.2524273469977475,
|
|
"learning_rate": 2.4402481229919982e-05,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10793764144182205,
|
|
"step": 495,
|
|
"valid_targets_mean": 9409.1,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 2.4630541871921183,
|
|
"grad_norm": 0.21182409471855082,
|
|
"learning_rate": 2.406619066328311e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10518929362297058,
|
|
"step": 500,
|
|
"valid_targets_mean": 9363.0,
|
|
"valid_targets_min": 4145
|
|
},
|
|
{
|
|
"epoch": 2.4876847290640396,
|
|
"grad_norm": 0.2191040471839855,
|
|
"learning_rate": 2.3728696515918496e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10714466869831085,
|
|
"step": 505,
|
|
"valid_targets_mean": 8711.5,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 2.512315270935961,
|
|
"grad_norm": 0.2448105118684568,
|
|
"learning_rate": 2.3390098685121938e-05,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09540620446205139,
|
|
"step": 510,
|
|
"valid_targets_mean": 8153.1,
|
|
"valid_targets_min": 3180
|
|
},
|
|
{
|
|
"epoch": 2.5369458128078817,
|
|
"grad_norm": 0.220518003206731,
|
|
"learning_rate": 2.3050497394876363e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10619120299816132,
|
|
"step": 515,
|
|
"valid_targets_mean": 8894.3,
|
|
"valid_targets_min": 2359
|
|
},
|
|
{
|
|
"epoch": 2.561576354679803,
|
|
"grad_norm": 0.23717022293975634,
|
|
"learning_rate": 2.2709993166185803e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10815994441509247,
|
|
"step": 520,
|
|
"valid_targets_mean": 8602.2,
|
|
"valid_targets_min": 4126
|
|
},
|
|
{
|
|
"epoch": 2.586206896551724,
|
|
"grad_norm": 0.2312939895442153,
|
|
"learning_rate": 2.2368686787321475e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10106731951236725,
|
|
"step": 525,
|
|
"valid_targets_mean": 7950.1,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 2.6108374384236455,
|
|
"grad_norm": 0.22544526165656265,
|
|
"learning_rate": 2.2026679283988727e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10532703995704651,
|
|
"step": 530,
|
|
"valid_targets_mean": 9430.9,
|
|
"valid_targets_min": 4000
|
|
},
|
|
{
|
|
"epoch": 2.6354679802955667,
|
|
"grad_norm": 0.20231326994814502,
|
|
"learning_rate": 2.168407188942373e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11423169821500778,
|
|
"step": 535,
|
|
"valid_targets_mean": 9679.7,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 2.6600985221674875,
|
|
"grad_norm": 0.20631608444099137,
|
|
"learning_rate": 2.1340966014428744e-05,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09936245530843735,
|
|
"step": 540,
|
|
"valid_targets_mean": 8590.2,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 2.684729064039409,
|
|
"grad_norm": 0.2264625162075009,
|
|
"learning_rate": 2.0997463217354803e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11137133836746216,
|
|
"step": 545,
|
|
"valid_targets_mean": 8606.8,
|
|
"valid_targets_min": 2707
|
|
},
|
|
{
|
|
"epoch": 2.70935960591133,
|
|
"grad_norm": 0.19869783835319854,
|
|
"learning_rate": 2.065366517404071e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11436265707015991,
|
|
"step": 550,
|
|
"valid_targets_mean": 9982.7,
|
|
"valid_targets_min": 5207
|
|
},
|
|
{
|
|
"epoch": 2.7339901477832513,
|
|
"grad_norm": 0.21361086043176336,
|
|
"learning_rate": 2.030967364771733e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10924769937992096,
|
|
"step": 555,
|
|
"valid_targets_mean": 8423.0,
|
|
"valid_targets_min": 3589
|
|
},
|
|
{
|
|
"epoch": 2.7586206896551726,
|
|
"grad_norm": 0.2159779190682014,
|
|
"learning_rate": 1.996559045888593e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11431802809238434,
|
|
"step": 560,
|
|
"valid_targets_mean": 9245.8,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 2.7832512315270934,
|
|
"grad_norm": 0.19784699425306007,
|
|
"learning_rate": 1.9621517455179627e-05,
|
|
"loss": 0.323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11673343181610107,
|
|
"step": 565,
|
|
"valid_targets_mean": 9697.0,
|
|
"valid_targets_min": 4763
|
|
},
|
|
{
|
|
"epoch": 2.8078817733990147,
|
|
"grad_norm": 0.21421367114423392,
|
|
"learning_rate": 1.9277556481216737e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10330238938331604,
|
|
"step": 570,
|
|
"valid_targets_mean": 8288.4,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 2.832512315270936,
|
|
"grad_norm": 0.19836582938289005,
|
|
"learning_rate": 1.893380934845514e-05,
|
|
"loss": 0.3226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12032057344913483,
|
|
"step": 575,
|
|
"valid_targets_mean": 8848.4,
|
|
"valid_targets_min": 2745
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.21631083193557546,
|
|
"learning_rate": 1.8590377805056306e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10158735513687134,
|
|
"step": 580,
|
|
"valid_targets_mean": 8897.4,
|
|
"valid_targets_min": 2826
|
|
},
|
|
{
|
|
"epoch": 2.8817733990147785,
|
|
"grad_norm": 0.20981357344340126,
|
|
"learning_rate": 1.8247363505768177e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09952034801244736,
|
|
"step": 585,
|
|
"valid_targets_mean": 8201.6,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 2.9064039408866993,
|
|
"grad_norm": 0.2271419153781403,
|
|
"learning_rate": 1.7904867981835617e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11091038584709167,
|
|
"step": 590,
|
|
"valid_targets_mean": 9144.8,
|
|
"valid_targets_min": 1621
|
|
},
|
|
{
|
|
"epoch": 2.9310344827586206,
|
|
"grad_norm": 0.20711350475630588,
|
|
"learning_rate": 1.7562992610947517e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0879313200712204,
|
|
"step": 595,
|
|
"valid_targets_mean": 6960.7,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 2.955665024630542,
|
|
"grad_norm": 0.20228134899502237,
|
|
"learning_rate": 1.7221838587229215e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12365391105413437,
|
|
"step": 600,
|
|
"valid_targets_mean": 9605.3,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 2.980295566502463,
|
|
"grad_norm": 0.2081591697388915,
|
|
"learning_rate": 1.6881506891289386e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10903681814670563,
|
|
"step": 605,
|
|
"valid_targets_mean": 8928.9,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 3.0049261083743843,
|
|
"grad_norm": 0.23994026035218605,
|
|
"learning_rate": 1.654209826033004e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10147841274738312,
|
|
"step": 610,
|
|
"valid_targets_mean": 8829.4,
|
|
"valid_targets_min": 4278
|
|
},
|
|
{
|
|
"epoch": 3.0295566502463056,
|
|
"grad_norm": 0.22674405029866287,
|
|
"learning_rate": 1.6203713158328626e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10697513818740845,
|
|
"step": 615,
|
|
"valid_targets_mean": 9735.6,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 3.0541871921182264,
|
|
"grad_norm": 0.23064581576560553,
|
|
"learning_rate": 1.586645174630094e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0928591638803482,
|
|
"step": 620,
|
|
"valid_targets_mean": 8492.0,
|
|
"valid_targets_min": 3198
|
|
},
|
|
{
|
|
"epoch": 3.0788177339901477,
|
|
"grad_norm": 0.23288072128103748,
|
|
"learning_rate": 1.5530413852653816e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10058435797691345,
|
|
"step": 625,
|
|
"valid_targets_mean": 8571.2,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 3.103448275862069,
|
|
"grad_norm": 0.21989355607355687,
|
|
"learning_rate": 1.5195698943636135e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09678147733211517,
|
|
"step": 630,
|
|
"valid_targets_mean": 8105.6,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 3.12807881773399,
|
|
"grad_norm": 0.19891006352522667,
|
|
"learning_rate": 1.4862406093897175e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09648677706718445,
|
|
"step": 635,
|
|
"valid_targets_mean": 8988.8,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 3.1527093596059115,
|
|
"grad_norm": 0.24655653698082503,
|
|
"learning_rate": 1.4530633957160733e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08152036368846893,
|
|
"step": 640,
|
|
"valid_targets_mean": 7848.4,
|
|
"valid_targets_min": 2993
|
|
},
|
|
{
|
|
"epoch": 3.1773399014778327,
|
|
"grad_norm": 0.22762238372136157,
|
|
"learning_rate": 1.4200480737023943e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10375897586345673,
|
|
"step": 645,
|
|
"valid_targets_mean": 9312.4,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 3.2019704433497536,
|
|
"grad_norm": 0.22534789337999453,
|
|
"learning_rate": 1.3872044157889297e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09716019779443741,
|
|
"step": 650,
|
|
"valid_targets_mean": 8545.7,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 3.226600985221675,
|
|
"grad_norm": 0.20514988270828538,
|
|
"learning_rate": 1.3545421436038477e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10193373262882233,
|
|
"step": 655,
|
|
"valid_targets_mean": 8863.9,
|
|
"valid_targets_min": 2907
|
|
},
|
|
{
|
|
"epoch": 3.251231527093596,
|
|
"grad_norm": 0.1978171411281838,
|
|
"learning_rate": 1.3220709250856656e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0844145119190216,
|
|
"step": 660,
|
|
"valid_targets_mean": 8168.6,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 3.2758620689655173,
|
|
"grad_norm": 0.20349161652396405,
|
|
"learning_rate": 1.2898003716215626e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10471087694168091,
|
|
"step": 665,
|
|
"valid_targets_mean": 9535.9,
|
|
"valid_targets_min": 3279
|
|
},
|
|
{
|
|
"epoch": 3.3004926108374386,
|
|
"grad_norm": 0.19864565640995638,
|
|
"learning_rate": 1.2577400352024426e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09200199693441391,
|
|
"step": 670,
|
|
"valid_targets_mean": 8635.4,
|
|
"valid_targets_min": 2501
|
|
},
|
|
{
|
|
"epoch": 3.3251231527093594,
|
|
"grad_norm": 0.2096280531918653,
|
|
"learning_rate": 1.2258994055955658e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09174078702926636,
|
|
"step": 675,
|
|
"valid_targets_mean": 8636.1,
|
|
"valid_targets_min": 2381
|
|
},
|
|
{
|
|
"epoch": 3.3497536945812807,
|
|
"grad_norm": 0.21102625245478873,
|
|
"learning_rate": 1.1942879075356135e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10436161607503891,
|
|
"step": 680,
|
|
"valid_targets_mean": 9196.2,
|
|
"valid_targets_min": 2880
|
|
},
|
|
{
|
|
"epoch": 3.374384236453202,
|
|
"grad_norm": 0.20074354219907545,
|
|
"learning_rate": 1.1629148979349836e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09593068063259125,
|
|
"step": 685,
|
|
"valid_targets_mean": 9236.0,
|
|
"valid_targets_min": 3690
|
|
},
|
|
{
|
|
"epoch": 3.399014778325123,
|
|
"grad_norm": 0.21770217204418577,
|
|
"learning_rate": 1.1317896631141814e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10593973100185394,
|
|
"step": 690,
|
|
"valid_targets_mean": 8596.7,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 3.4236453201970445,
|
|
"grad_norm": 0.202033129923407,
|
|
"learning_rate": 1.1009214160530875e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08792443573474884,
|
|
"step": 695,
|
|
"valid_targets_mean": 7999.6,
|
|
"valid_targets_min": 3034
|
|
},
|
|
{
|
|
"epoch": 3.4482758620689653,
|
|
"grad_norm": 0.19377905050918098,
|
|
"learning_rate": 1.0703192936639481e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08651701360940933,
|
|
"step": 700,
|
|
"valid_targets_mean": 8115.2,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 3.4729064039408866,
|
|
"grad_norm": 0.18949986820075895,
|
|
"learning_rate": 1.0399923540868712e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10459959506988525,
|
|
"step": 705,
|
|
"valid_targets_mean": 9690.8,
|
|
"valid_targets_min": 3185
|
|
},
|
|
{
|
|
"epoch": 3.497536945812808,
|
|
"grad_norm": 0.2011804523127738,
|
|
"learning_rate": 1.0099495740086454e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10490408539772034,
|
|
"step": 710,
|
|
"valid_targets_mean": 8907.2,
|
|
"valid_targets_min": 2179
|
|
},
|
|
{
|
|
"epoch": 3.522167487684729,
|
|
"grad_norm": 0.20548088230585662,
|
|
"learning_rate": 9.801998460056643e-06,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10383732616901398,
|
|
"step": 715,
|
|
"valid_targets_mean": 8324.9,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 3.5467980295566504,
|
|
"grad_norm": 0.19683329981111766,
|
|
"learning_rate": 9.507519759117546e-06,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09702172875404358,
|
|
"step": 720,
|
|
"valid_targets_mean": 8744.2,
|
|
"valid_targets_min": 3967
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.19625336438708468,
|
|
"learning_rate": 9.216146802116676e-06,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09151807427406311,
|
|
"step": 725,
|
|
"valid_targets_mean": 8401.1,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 3.596059113300493,
|
|
"grad_norm": 0.18708247392345115,
|
|
"learning_rate": 8.92796583461031e-06,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09643843024969101,
|
|
"step": 730,
|
|
"valid_targets_mean": 8835.6,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 3.6206896551724137,
|
|
"grad_norm": 0.19360040135948756,
|
|
"learning_rate": 8.643062157335e-06,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08726485073566437,
|
|
"step": 735,
|
|
"valid_targets_mean": 8767.6,
|
|
"valid_targets_min": 1621
|
|
},
|
|
{
|
|
"epoch": 3.645320197044335,
|
|
"grad_norm": 0.18440066609355732,
|
|
"learning_rate": 8.361520100958856e-06,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08847818523645401,
|
|
"step": 740,
|
|
"valid_targets_mean": 9271.4,
|
|
"valid_targets_min": 4099
|
|
},
|
|
{
|
|
"epoch": 3.6699507389162562,
|
|
"grad_norm": 0.18646457682094164,
|
|
"learning_rate": 8.083423001119855e-06,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09122344851493835,
|
|
"step": 745,
|
|
"valid_targets_mean": 8637.3,
|
|
"valid_targets_min": 2856
|
|
},
|
|
{
|
|
"epoch": 3.6945812807881775,
|
|
"grad_norm": 0.20175487117378538,
|
|
"learning_rate": 7.80885317375877e-06,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10354309529066086,
|
|
"step": 750,
|
|
"valid_targets_mean": 8972.3,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 3.7192118226600988,
|
|
"grad_norm": 0.18258436718922763,
|
|
"learning_rate": 7.537891890753879e-06,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10517409443855286,
|
|
"step": 755,
|
|
"valid_targets_mean": 9500.0,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 3.7438423645320196,
|
|
"grad_norm": 0.1888454473684914,
|
|
"learning_rate": 7.27061935586471e-06,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10511292517185211,
|
|
"step": 760,
|
|
"valid_targets_mean": 9314.8,
|
|
"valid_targets_min": 3498
|
|
},
|
|
{
|
|
"epoch": 3.768472906403941,
|
|
"grad_norm": 0.1906383407925554,
|
|
"learning_rate": 7.007114680991995e-06,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11089085787534714,
|
|
"step": 765,
|
|
"valid_targets_mean": 9594.2,
|
|
"valid_targets_min": 3836
|
|
},
|
|
{
|
|
"epoch": 3.793103448275862,
|
|
"grad_norm": 0.18789808493594767,
|
|
"learning_rate": 6.747455862760723e-06,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0920998752117157,
|
|
"step": 770,
|
|
"valid_targets_mean": 7893.3,
|
|
"valid_targets_min": 3364
|
|
},
|
|
{
|
|
"epoch": 3.8177339901477834,
|
|
"grad_norm": 0.1946470476381285,
|
|
"learning_rate": 6.491719759433414e-06,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09941431879997253,
|
|
"step": 775,
|
|
"valid_targets_mean": 8573.0,
|
|
"valid_targets_min": 2428
|
|
},
|
|
{
|
|
"epoch": 3.8423645320197046,
|
|
"grad_norm": 0.18335741825763685,
|
|
"learning_rate": 6.239982068160251e-06,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10005414485931396,
|
|
"step": 780,
|
|
"valid_targets_mean": 8930.7,
|
|
"valid_targets_min": 3592
|
|
},
|
|
{
|
|
"epoch": 3.8669950738916254,
|
|
"grad_norm": 0.19394167335220494,
|
|
"learning_rate": 5.9923173025729895e-06,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0917779877781868,
|
|
"step": 785,
|
|
"valid_targets_mean": 8102.1,
|
|
"valid_targets_min": 2504
|
|
},
|
|
{
|
|
"epoch": 3.8916256157635467,
|
|
"grad_norm": 0.18120225767769269,
|
|
"learning_rate": 5.748798770729071e-06,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09662172943353653,
|
|
"step": 790,
|
|
"valid_targets_mean": 9064.0,
|
|
"valid_targets_min": 2340
|
|
},
|
|
{
|
|
"epoch": 3.916256157635468,
|
|
"grad_norm": 0.19190077238373823,
|
|
"learning_rate": 5.509498553412727e-06,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10005126893520355,
|
|
"step": 795,
|
|
"valid_targets_mean": 8898.5,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 3.9408866995073892,
|
|
"grad_norm": 0.17974352936982052,
|
|
"learning_rate": 5.274487482799206e-06,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09953851997852325,
|
|
"step": 800,
|
|
"valid_targets_mean": 8959.3,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 3.9655172413793105,
|
|
"grad_norm": 0.18698159849014778,
|
|
"learning_rate": 5.04383512148871e-06,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10734543204307556,
|
|
"step": 805,
|
|
"valid_targets_mean": 9183.6,
|
|
"valid_targets_min": 3696
|
|
},
|
|
{
|
|
"epoch": 3.9901477832512313,
|
|
"grad_norm": 0.18442090586080964,
|
|
"learning_rate": 4.817609741916009e-06,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0857645720243454,
|
|
"step": 810,
|
|
"valid_targets_mean": 7837.0,
|
|
"valid_targets_min": 2597
|
|
},
|
|
{
|
|
"epoch": 4.014778325123153,
|
|
"grad_norm": 0.18552650025158898,
|
|
"learning_rate": 4.595878306142059e-06,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09557130932807922,
|
|
"step": 815,
|
|
"valid_targets_mean": 8530.3,
|
|
"valid_targets_min": 1990
|
|
},
|
|
{
|
|
"epoch": 4.039408866995074,
|
|
"grad_norm": 0.19107540271966575,
|
|
"learning_rate": 4.37870644603336e-06,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08814047276973724,
|
|
"step": 820,
|
|
"valid_targets_mean": 8476.8,
|
|
"valid_targets_min": 2709
|
|
},
|
|
{
|
|
"epoch": 4.064039408866995,
|
|
"grad_norm": 0.21852387313861962,
|
|
"learning_rate": 4.1661584438351645e-06,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08739292621612549,
|
|
"step": 825,
|
|
"valid_targets_mean": 8461.2,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 4.088669950738916,
|
|
"grad_norm": 0.1839668348467172,
|
|
"learning_rate": 3.958297213144084e-06,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07950074970722198,
|
|
"step": 830,
|
|
"valid_targets_mean": 7643.1,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 4.113300492610837,
|
|
"grad_norm": 0.18845310147717448,
|
|
"learning_rate": 3.7551842802858772e-06,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10723338276147842,
|
|
"step": 835,
|
|
"valid_targets_mean": 9729.5,
|
|
"valid_targets_min": 4630
|
|
},
|
|
{
|
|
"epoch": 4.137931034482759,
|
|
"grad_norm": 0.1940491672102659,
|
|
"learning_rate": 3.5568797661038004e-06,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09230605512857437,
|
|
"step": 840,
|
|
"valid_targets_mean": 8704.1,
|
|
"valid_targets_min": 3915
|
|
},
|
|
{
|
|
"epoch": 4.16256157635468,
|
|
"grad_norm": 0.18469213419035746,
|
|
"learning_rate": 3.3634423681630392e-06,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10094676911830902,
|
|
"step": 845,
|
|
"valid_targets_mean": 9719.5,
|
|
"valid_targets_min": 4368
|
|
},
|
|
{
|
|
"epoch": 4.187192118226601,
|
|
"grad_norm": 0.18737436904852822,
|
|
"learning_rate": 3.174929343376374e-06,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09700489789247513,
|
|
"step": 850,
|
|
"valid_targets_mean": 9506.8,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 4.211822660098522,
|
|
"grad_norm": 0.18423499331496113,
|
|
"learning_rate": 2.991396491056331e-06,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10383116453886032,
|
|
"step": 855,
|
|
"valid_targets_mean": 9004.8,
|
|
"valid_targets_min": 2993
|
|
},
|
|
{
|
|
"epoch": 4.236453201970443,
|
|
"grad_norm": 0.19530539384844203,
|
|
"learning_rate": 2.812898136398705e-06,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10272261500358582,
|
|
"step": 860,
|
|
"valid_targets_mean": 10417.8,
|
|
"valid_targets_min": 2835
|
|
},
|
|
{
|
|
"epoch": 4.261083743842365,
|
|
"grad_norm": 0.1768936566418224,
|
|
"learning_rate": 2.6394871144024926e-06,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08824898302555084,
|
|
"step": 865,
|
|
"valid_targets_mean": 8173.8,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.18803866498160385,
|
|
"learning_rate": 2.471214754230866e-06,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09874047338962555,
|
|
"step": 870,
|
|
"valid_targets_mean": 8255.1,
|
|
"valid_targets_min": 3232
|
|
},
|
|
{
|
|
"epoch": 4.310344827586207,
|
|
"grad_norm": 0.18305855723051326,
|
|
"learning_rate": 2.3081308640178945e-06,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09291456639766693,
|
|
"step": 875,
|
|
"valid_targets_mean": 8635.8,
|
|
"valid_targets_min": 3871
|
|
},
|
|
{
|
|
"epoch": 4.334975369458128,
|
|
"grad_norm": 0.17966738406863703,
|
|
"learning_rate": 2.1502837161254873e-06,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10657632350921631,
|
|
"step": 880,
|
|
"valid_targets_mean": 9381.9,
|
|
"valid_targets_min": 3764
|
|
},
|
|
{
|
|
"epoch": 4.359605911330049,
|
|
"grad_norm": 0.16961839703667175,
|
|
"learning_rate": 1.9977200328548953e-06,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09088948369026184,
|
|
"step": 885,
|
|
"valid_targets_mean": 8735.8,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 4.384236453201971,
|
|
"grad_norm": 0.17218142432918168,
|
|
"learning_rate": 1.8504849726170637e-06,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09188289195299149,
|
|
"step": 890,
|
|
"valid_targets_mean": 8830.7,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 4.4088669950738915,
|
|
"grad_norm": 0.1813446374116515,
|
|
"learning_rate": 1.7086221165658544e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09599706530570984,
|
|
"step": 895,
|
|
"valid_targets_mean": 8826.4,
|
|
"valid_targets_min": 3172
|
|
},
|
|
{
|
|
"epoch": 4.433497536945813,
|
|
"grad_norm": 0.18498320583010483,
|
|
"learning_rate": 1.5721734556981761e-06,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09900371730327606,
|
|
"step": 900,
|
|
"valid_targets_mean": 8690.2,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 4.458128078817734,
|
|
"grad_norm": 0.20510849376299953,
|
|
"learning_rate": 1.4411793784247263e-06,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10054295510053635,
|
|
"step": 905,
|
|
"valid_targets_mean": 8741.5,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 4.482758620689655,
|
|
"grad_norm": 0.1922020499715368,
|
|
"learning_rate": 1.3156786586151916e-06,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10521981865167618,
|
|
"step": 910,
|
|
"valid_targets_mean": 9091.2,
|
|
"valid_targets_min": 3243
|
|
},
|
|
{
|
|
"epoch": 4.5073891625615765,
|
|
"grad_norm": 0.17566941817494466,
|
|
"learning_rate": 1.195708444121253e-06,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0981191098690033,
|
|
"step": 915,
|
|
"valid_targets_mean": 8294.6,
|
|
"valid_targets_min": 3384
|
|
},
|
|
{
|
|
"epoch": 4.532019704433497,
|
|
"grad_norm": 0.18568976979390986,
|
|
"learning_rate": 1.0813042457809497e-06,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08985172212123871,
|
|
"step": 920,
|
|
"valid_targets_mean": 8663.1,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.556650246305419,
|
|
"grad_norm": 0.1809563651428104,
|
|
"learning_rate": 9.724999269075598e-07,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09896715730428696,
|
|
"step": 925,
|
|
"valid_targets_mean": 8589.9,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 4.58128078817734,
|
|
"grad_norm": 0.1826346698165506,
|
|
"learning_rate": 8.693276932661732e-07,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09368403255939484,
|
|
"step": 930,
|
|
"valid_targets_mean": 8382.1,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 4.605911330049262,
|
|
"grad_norm": 0.17397784390869644,
|
|
"learning_rate": 7.718180835408584e-07,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09835617244243622,
|
|
"step": 935,
|
|
"valid_targets_mean": 8955.2,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 4.630541871921182,
|
|
"grad_norm": 0.17704960582110957,
|
|
"learning_rate": 6.799999602953189e-07,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09321548044681549,
|
|
"step": 940,
|
|
"valid_targets_mean": 8988.6,
|
|
"valid_targets_min": 2533
|
|
},
|
|
{
|
|
"epoch": 4.655172413793103,
|
|
"grad_norm": 0.1741304537218507,
|
|
"learning_rate": 5.939005014296428e-07,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09430979192256927,
|
|
"step": 945,
|
|
"valid_targets_mean": 8396.6,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 4.679802955665025,
|
|
"grad_norm": 0.16592953591918666,
|
|
"learning_rate": 5.135451921357337e-07,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10379470884799957,
|
|
"step": 950,
|
|
"valid_targets_mean": 9645.2,
|
|
"valid_targets_min": 2567
|
|
},
|
|
{
|
|
"epoch": 4.704433497536946,
|
|
"grad_norm": 0.17443346928975734,
|
|
"learning_rate": 4.3895781735375566e-07,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09814979881048203,
|
|
"step": 955,
|
|
"valid_targets_mean": 8773.4,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 4.7290640394088665,
|
|
"grad_norm": 0.17648712594362892,
|
|
"learning_rate": 3.70160454731876e-07,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09249699860811234,
|
|
"step": 960,
|
|
"valid_targets_mean": 8434.4,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 4.753694581280788,
|
|
"grad_norm": 0.16739065144158322,
|
|
"learning_rate": 3.0717346809132407e-07,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10244876146316528,
|
|
"step": 965,
|
|
"valid_targets_mean": 9382.5,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 4.778325123152709,
|
|
"grad_norm": 0.1667695775270446,
|
|
"learning_rate": 2.5001550139877707e-07,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09757320582866669,
|
|
"step": 970,
|
|
"valid_targets_mean": 9711.1,
|
|
"valid_targets_min": 5200
|
|
},
|
|
{
|
|
"epoch": 4.802955665024631,
|
|
"grad_norm": 0.17184028910965868,
|
|
"learning_rate": 1.987034732477877e-07,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09788213670253754,
|
|
"step": 975,
|
|
"valid_targets_mean": 8935.1,
|
|
"valid_targets_min": 3319
|
|
},
|
|
{
|
|
"epoch": 4.827586206896552,
|
|
"grad_norm": 0.16740666268964893,
|
|
"learning_rate": 1.5325257185093923e-07,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09289947897195816,
|
|
"step": 980,
|
|
"valid_targets_mean": 8989.3,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 4.852216748768473,
|
|
"grad_norm": 0.17517346033665723,
|
|
"learning_rate": 1.1367625054416575e-07,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09196974337100983,
|
|
"step": 985,
|
|
"valid_targets_mean": 8455.2,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 4.876847290640394,
|
|
"grad_norm": 0.17165728117501497,
|
|
"learning_rate": 7.998622380461563e-08,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10432468354701996,
|
|
"step": 990,
|
|
"valid_targets_mean": 8684.4,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 4.901477832512315,
|
|
"grad_norm": 0.16816491897161798,
|
|
"learning_rate": 5.219246378319387e-08,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0968252569437027,
|
|
"step": 995,
|
|
"valid_targets_mean": 8864.2,
|
|
"valid_targets_min": 4695
|
|
},
|
|
{
|
|
"epoch": 4.926108374384237,
|
|
"grad_norm": 0.18359960757267443,
|
|
"learning_rate": 3.030319735283449e-08,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09898233413696289,
|
|
"step": 1000,
|
|
"valid_targets_mean": 8488.2,
|
|
"valid_targets_min": 3522
|
|
},
|
|
{
|
|
"epoch": 4.9507389162561575,
|
|
"grad_norm": 0.1729827087715371,
|
|
"learning_rate": 1.4324903673370583e-08,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10422653704881668,
|
|
"step": 1005,
|
|
"valid_targets_mean": 9734.5,
|
|
"valid_targets_min": 5199
|
|
},
|
|
{
|
|
"epoch": 4.975369458128079,
|
|
"grad_norm": 0.1665182843059149,
|
|
"learning_rate": 4.262312273721758e-09,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08595976233482361,
|
|
"step": 1010,
|
|
"valid_targets_mean": 7786.4,
|
|
"valid_targets_min": 2848
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.17102014635406348,
|
|
"learning_rate": 1.184016519673037e-10,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0942406952381134,
|
|
"step": 1015,
|
|
"valid_targets_mean": 8724.4,
|
|
"valid_targets_min": 2268
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"step": 1015,
|
|
"total_flos": 3.756182037619278e+18,
|
|
"train_loss": 0.0,
|
|
"train_runtime": 2.2907,
|
|
"train_samples_per_second": 42523.793,
|
|
"train_steps_per_second": 443.093
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1015,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3.756182037619278e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|