Model: laion/exp-uns-r2egym-33_6x_glm_4_7_traces_jupiter_cleaned Source: Original Platform
9871 lines
274 KiB
JSON
9871 lines
274 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4466,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00784313725490196,
|
|
"grad_norm": 23.404166183174258,
|
|
"learning_rate": 3.579418344519016e-07,
|
|
"loss": 0.8605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43860095739364624,
|
|
"step": 5,
|
|
"valid_targets_mean": 6052.9,
|
|
"valid_targets_min": 4026
|
|
},
|
|
{
|
|
"epoch": 0.01568627450980392,
|
|
"grad_norm": 22.42466445634641,
|
|
"learning_rate": 8.053691275167786e-07,
|
|
"loss": 0.8504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3826742172241211,
|
|
"step": 10,
|
|
"valid_targets_mean": 4323.8,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 0.023529411764705882,
|
|
"grad_norm": 19.492015734675334,
|
|
"learning_rate": 1.2527964205816557e-06,
|
|
"loss": 0.8154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3982408046722412,
|
|
"step": 15,
|
|
"valid_targets_mean": 5580.4,
|
|
"valid_targets_min": 3518
|
|
},
|
|
{
|
|
"epoch": 0.03137254901960784,
|
|
"grad_norm": 15.033360921231138,
|
|
"learning_rate": 1.7002237136465326e-06,
|
|
"loss": 0.7841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3529409170150757,
|
|
"step": 20,
|
|
"valid_targets_mean": 4555.9,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 0.0392156862745098,
|
|
"grad_norm": 8.483061410819625,
|
|
"learning_rate": 2.1476510067114096e-06,
|
|
"loss": 0.7342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3690146803855896,
|
|
"step": 25,
|
|
"valid_targets_mean": 5543.6,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 0.047058823529411764,
|
|
"grad_norm": 4.766054345997032,
|
|
"learning_rate": 2.5950782997762863e-06,
|
|
"loss": 0.6663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3129326105117798,
|
|
"step": 30,
|
|
"valid_targets_mean": 4514.2,
|
|
"valid_targets_min": 3605
|
|
},
|
|
{
|
|
"epoch": 0.054901960784313725,
|
|
"grad_norm": 2.5241369817655777,
|
|
"learning_rate": 3.0425055928411635e-06,
|
|
"loss": 0.6269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33344778418540955,
|
|
"step": 35,
|
|
"valid_targets_mean": 5949.8,
|
|
"valid_targets_min": 3442
|
|
},
|
|
{
|
|
"epoch": 0.06274509803921569,
|
|
"grad_norm": 1.7636147579433734,
|
|
"learning_rate": 3.4899328859060407e-06,
|
|
"loss": 0.6071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549435496330261,
|
|
"step": 40,
|
|
"valid_targets_mean": 4407.0,
|
|
"valid_targets_min": 3418
|
|
},
|
|
{
|
|
"epoch": 0.07058823529411765,
|
|
"grad_norm": 1.6055588352994365,
|
|
"learning_rate": 3.937360178970917e-06,
|
|
"loss": 0.5813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27976834774017334,
|
|
"step": 45,
|
|
"valid_targets_mean": 4382.0,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 0.0784313725490196,
|
|
"grad_norm": 1.2788686420150093,
|
|
"learning_rate": 4.384787472035795e-06,
|
|
"loss": 0.5541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605871260166168,
|
|
"step": 50,
|
|
"valid_targets_mean": 5089.8,
|
|
"valid_targets_min": 3683
|
|
},
|
|
{
|
|
"epoch": 0.08627450980392157,
|
|
"grad_norm": 1.0715989368516878,
|
|
"learning_rate": 4.832214765100672e-06,
|
|
"loss": 0.5491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28247833251953125,
|
|
"step": 55,
|
|
"valid_targets_mean": 4860.9,
|
|
"valid_targets_min": 3327
|
|
},
|
|
{
|
|
"epoch": 0.09411764705882353,
|
|
"grad_norm": 0.8690290795550152,
|
|
"learning_rate": 5.2796420581655485e-06,
|
|
"loss": 0.54,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264930784702301,
|
|
"step": 60,
|
|
"valid_targets_mean": 4219.0,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 0.10196078431372549,
|
|
"grad_norm": 0.8017318684322533,
|
|
"learning_rate": 5.727069351230425e-06,
|
|
"loss": 0.5082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22782692313194275,
|
|
"step": 65,
|
|
"valid_targets_mean": 4217.8,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 0.10980392156862745,
|
|
"grad_norm": 0.7128483128387519,
|
|
"learning_rate": 6.174496644295303e-06,
|
|
"loss": 0.5012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25505802035331726,
|
|
"step": 70,
|
|
"valid_targets_mean": 5386.5,
|
|
"valid_targets_min": 3532
|
|
},
|
|
{
|
|
"epoch": 0.11764705882352941,
|
|
"grad_norm": 0.7310910248118908,
|
|
"learning_rate": 6.6219239373601796e-06,
|
|
"loss": 0.4838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24710813164710999,
|
|
"step": 75,
|
|
"valid_targets_mean": 4911.4,
|
|
"valid_targets_min": 3582
|
|
},
|
|
{
|
|
"epoch": 0.12549019607843137,
|
|
"grad_norm": 0.6248033611994014,
|
|
"learning_rate": 7.069351230425056e-06,
|
|
"loss": 0.462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23145128786563873,
|
|
"step": 80,
|
|
"valid_targets_mean": 4827.0,
|
|
"valid_targets_min": 4254
|
|
},
|
|
{
|
|
"epoch": 0.13333333333333333,
|
|
"grad_norm": 0.7040581832739684,
|
|
"learning_rate": 7.516778523489934e-06,
|
|
"loss": 0.4706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24838227033615112,
|
|
"step": 85,
|
|
"valid_targets_mean": 6231.5,
|
|
"valid_targets_min": 4574
|
|
},
|
|
{
|
|
"epoch": 0.1411764705882353,
|
|
"grad_norm": 0.6539667876865614,
|
|
"learning_rate": 7.96420581655481e-06,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22435414791107178,
|
|
"step": 90,
|
|
"valid_targets_mean": 4949.2,
|
|
"valid_targets_min": 3877
|
|
},
|
|
{
|
|
"epoch": 0.14901960784313725,
|
|
"grad_norm": 0.5657786792857245,
|
|
"learning_rate": 8.411633109619688e-06,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18156567215919495,
|
|
"step": 95,
|
|
"valid_targets_mean": 4763.6,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 0.1568627450980392,
|
|
"grad_norm": 0.6230992224961203,
|
|
"learning_rate": 8.859060402684566e-06,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2146635353565216,
|
|
"step": 100,
|
|
"valid_targets_mean": 5103.5,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.16470588235294117,
|
|
"grad_norm": 0.8008553943611885,
|
|
"learning_rate": 9.306487695749442e-06,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2160729169845581,
|
|
"step": 105,
|
|
"valid_targets_mean": 4637.9,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 0.17254901960784313,
|
|
"grad_norm": 0.55231481253877,
|
|
"learning_rate": 9.753914988814318e-06,
|
|
"loss": 0.4013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1925559639930725,
|
|
"step": 110,
|
|
"valid_targets_mean": 4680.8,
|
|
"valid_targets_min": 3445
|
|
},
|
|
{
|
|
"epoch": 0.1803921568627451,
|
|
"grad_norm": 0.7664434280182179,
|
|
"learning_rate": 1.0201342281879197e-05,
|
|
"loss": 0.4104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16616085171699524,
|
|
"step": 115,
|
|
"valid_targets_mean": 3789.8,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 0.18823529411764706,
|
|
"grad_norm": 0.6385917861653485,
|
|
"learning_rate": 1.0648769574944073e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21854722499847412,
|
|
"step": 120,
|
|
"valid_targets_mean": 5809.2,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 0.19607843137254902,
|
|
"grad_norm": 0.5717964559070998,
|
|
"learning_rate": 1.109619686800895e-05,
|
|
"loss": 0.4069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16294127702713013,
|
|
"step": 125,
|
|
"valid_targets_mean": 4298.9,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 0.20392156862745098,
|
|
"grad_norm": 0.5787727391952345,
|
|
"learning_rate": 1.1543624161073828e-05,
|
|
"loss": 0.3932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15270715951919556,
|
|
"step": 130,
|
|
"valid_targets_mean": 4122.0,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 0.21176470588235294,
|
|
"grad_norm": 0.5678187673191607,
|
|
"learning_rate": 1.1991051454138702e-05,
|
|
"loss": 0.3844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21663323044776917,
|
|
"step": 135,
|
|
"valid_targets_mean": 5638.5,
|
|
"valid_targets_min": 3599
|
|
},
|
|
{
|
|
"epoch": 0.2196078431372549,
|
|
"grad_norm": 0.6092080710914324,
|
|
"learning_rate": 1.243847874720358e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18496207892894745,
|
|
"step": 140,
|
|
"valid_targets_mean": 4467.4,
|
|
"valid_targets_min": 3917
|
|
},
|
|
{
|
|
"epoch": 0.22745098039215686,
|
|
"grad_norm": 0.5613437236831338,
|
|
"learning_rate": 1.2885906040268457e-05,
|
|
"loss": 0.3835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14188259840011597,
|
|
"step": 145,
|
|
"valid_targets_mean": 4013.1,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 0.23529411764705882,
|
|
"grad_norm": 0.6080216938444823,
|
|
"learning_rate": 1.3333333333333333e-05,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16894058883190155,
|
|
"step": 150,
|
|
"valid_targets_mean": 4505.0,
|
|
"valid_targets_min": 4012
|
|
},
|
|
{
|
|
"epoch": 0.24313725490196078,
|
|
"grad_norm": 0.5520368797273758,
|
|
"learning_rate": 1.3780760626398211e-05,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753460109233856,
|
|
"step": 155,
|
|
"valid_targets_mean": 6673.6,
|
|
"valid_targets_min": 2505
|
|
},
|
|
{
|
|
"epoch": 0.25098039215686274,
|
|
"grad_norm": 0.594414769473616,
|
|
"learning_rate": 1.4228187919463088e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16259320080280304,
|
|
"step": 160,
|
|
"valid_targets_mean": 4557.5,
|
|
"valid_targets_min": 2179
|
|
},
|
|
{
|
|
"epoch": 0.25882352941176473,
|
|
"grad_norm": 0.5567846516507987,
|
|
"learning_rate": 1.4675615212527964e-05,
|
|
"loss": 0.369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16312125325202942,
|
|
"step": 165,
|
|
"valid_targets_mean": 4479.1,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 0.26666666666666666,
|
|
"grad_norm": 0.6225467093351309,
|
|
"learning_rate": 1.5123042505592842e-05,
|
|
"loss": 0.3852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1725965440273285,
|
|
"step": 170,
|
|
"valid_targets_mean": 4417.2,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 0.27450980392156865,
|
|
"grad_norm": 0.5576011799774071,
|
|
"learning_rate": 1.5570469798657718e-05,
|
|
"loss": 0.3726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1948225051164627,
|
|
"step": 175,
|
|
"valid_targets_mean": 5861.2,
|
|
"valid_targets_min": 3404
|
|
},
|
|
{
|
|
"epoch": 0.2823529411764706,
|
|
"grad_norm": 0.6703064314693946,
|
|
"learning_rate": 1.6017897091722595e-05,
|
|
"loss": 0.3871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1748293787240982,
|
|
"step": 180,
|
|
"valid_targets_mean": 3518.5,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.2901960784313726,
|
|
"grad_norm": 0.5285588101578,
|
|
"learning_rate": 1.6465324384787473e-05,
|
|
"loss": 0.3487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18657265603542328,
|
|
"step": 185,
|
|
"valid_targets_mean": 5753.5,
|
|
"valid_targets_min": 4620
|
|
},
|
|
{
|
|
"epoch": 0.2980392156862745,
|
|
"grad_norm": 0.5477656717148982,
|
|
"learning_rate": 1.691275167785235e-05,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21630387008190155,
|
|
"step": 190,
|
|
"valid_targets_mean": 6037.1,
|
|
"valid_targets_min": 3270
|
|
},
|
|
{
|
|
"epoch": 0.3058823529411765,
|
|
"grad_norm": 0.5338456128957902,
|
|
"learning_rate": 1.7360178970917228e-05,
|
|
"loss": 0.3725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20467782020568848,
|
|
"step": 195,
|
|
"valid_targets_mean": 6507.9,
|
|
"valid_targets_min": 3275
|
|
},
|
|
{
|
|
"epoch": 0.3137254901960784,
|
|
"grad_norm": 0.5964011091475677,
|
|
"learning_rate": 1.7807606263982106e-05,
|
|
"loss": 0.357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15161500871181488,
|
|
"step": 200,
|
|
"valid_targets_mean": 4578.1,
|
|
"valid_targets_min": 4111
|
|
},
|
|
{
|
|
"epoch": 0.3215686274509804,
|
|
"grad_norm": 0.648451617296332,
|
|
"learning_rate": 1.825503355704698e-05,
|
|
"loss": 0.3661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17796659469604492,
|
|
"step": 205,
|
|
"valid_targets_mean": 4392.1,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 0.32941176470588235,
|
|
"grad_norm": 0.6160415343109967,
|
|
"learning_rate": 1.8702460850111858e-05,
|
|
"loss": 0.3633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1387397199869156,
|
|
"step": 210,
|
|
"valid_targets_mean": 4299.2,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 0.33725490196078434,
|
|
"grad_norm": 0.6631176281408109,
|
|
"learning_rate": 1.9149888143176735e-05,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20661580562591553,
|
|
"step": 215,
|
|
"valid_targets_mean": 5151.0,
|
|
"valid_targets_min": 3970
|
|
},
|
|
{
|
|
"epoch": 0.34509803921568627,
|
|
"grad_norm": 0.6276600909818704,
|
|
"learning_rate": 1.9597315436241613e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23764261603355408,
|
|
"step": 220,
|
|
"valid_targets_mean": 5458.9,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 0.35294117647058826,
|
|
"grad_norm": 0.5480298452065722,
|
|
"learning_rate": 2.004474272930649e-05,
|
|
"loss": 0.3385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2032904177904129,
|
|
"step": 225,
|
|
"valid_targets_mean": 5639.2,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 0.3607843137254902,
|
|
"grad_norm": 0.6052496860061815,
|
|
"learning_rate": 2.0492170022371365e-05,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16637581586837769,
|
|
"step": 230,
|
|
"valid_targets_mean": 5272.0,
|
|
"valid_targets_min": 4070
|
|
},
|
|
{
|
|
"epoch": 0.3686274509803922,
|
|
"grad_norm": 0.5353645428716092,
|
|
"learning_rate": 2.0939597315436246e-05,
|
|
"loss": 0.3661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20312991738319397,
|
|
"step": 235,
|
|
"valid_targets_mean": 6574.4,
|
|
"valid_targets_min": 4332
|
|
},
|
|
{
|
|
"epoch": 0.3764705882352941,
|
|
"grad_norm": 0.6055642792922042,
|
|
"learning_rate": 2.138702460850112e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13369685411453247,
|
|
"step": 240,
|
|
"valid_targets_mean": 3771.4,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 0.3843137254901961,
|
|
"grad_norm": 0.6076478924209101,
|
|
"learning_rate": 2.1834451901565997e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1511588990688324,
|
|
"step": 245,
|
|
"valid_targets_mean": 3938.1,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 0.39215686274509803,
|
|
"grad_norm": 0.5666914468270962,
|
|
"learning_rate": 2.228187919463087e-05,
|
|
"loss": 0.3452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1792019009590149,
|
|
"step": 250,
|
|
"valid_targets_mean": 5226.8,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.5862615571287226,
|
|
"learning_rate": 2.2729306487695753e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1689986288547516,
|
|
"step": 255,
|
|
"valid_targets_mean": 5211.1,
|
|
"valid_targets_min": 2843
|
|
},
|
|
{
|
|
"epoch": 0.40784313725490196,
|
|
"grad_norm": 0.5953417088677341,
|
|
"learning_rate": 2.3176733780760627e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1630881428718567,
|
|
"step": 260,
|
|
"valid_targets_mean": 4988.6,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 0.41568627450980394,
|
|
"grad_norm": 0.7762599901002093,
|
|
"learning_rate": 2.3624161073825508e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17452527582645416,
|
|
"step": 265,
|
|
"valid_targets_mean": 4630.8,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 0.4235294117647059,
|
|
"grad_norm": 0.5624026993600691,
|
|
"learning_rate": 2.4071588366890382e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1494782269001007,
|
|
"step": 270,
|
|
"valid_targets_mean": 4994.5,
|
|
"valid_targets_min": 3067
|
|
},
|
|
{
|
|
"epoch": 0.43137254901960786,
|
|
"grad_norm": 0.6211202450823641,
|
|
"learning_rate": 2.451901565995526e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17344659566879272,
|
|
"step": 275,
|
|
"valid_targets_mean": 5621.2,
|
|
"valid_targets_min": 3628
|
|
},
|
|
{
|
|
"epoch": 0.4392156862745098,
|
|
"grad_norm": 0.5907379572421464,
|
|
"learning_rate": 2.4966442953020137e-05,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1780165135860443,
|
|
"step": 280,
|
|
"valid_targets_mean": 5702.2,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 0.4470588235294118,
|
|
"grad_norm": 0.6633596923954129,
|
|
"learning_rate": 2.5413870246085015e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20204931497573853,
|
|
"step": 285,
|
|
"valid_targets_mean": 4570.2,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 0.4549019607843137,
|
|
"grad_norm": 0.6460508692646398,
|
|
"learning_rate": 2.586129753914989e-05,
|
|
"loss": 0.3405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.137963205575943,
|
|
"step": 290,
|
|
"valid_targets_mean": 4302.2,
|
|
"valid_targets_min": 2734
|
|
},
|
|
{
|
|
"epoch": 0.4627450980392157,
|
|
"grad_norm": 0.5846169636835284,
|
|
"learning_rate": 2.630872483221477e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13973931968212128,
|
|
"step": 295,
|
|
"valid_targets_mean": 4808.9,
|
|
"valid_targets_min": 3739
|
|
},
|
|
{
|
|
"epoch": 0.47058823529411764,
|
|
"grad_norm": 0.6357428540730443,
|
|
"learning_rate": 2.6756152125279644e-05,
|
|
"loss": 0.3305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1832273304462433,
|
|
"step": 300,
|
|
"valid_targets_mean": 4868.2,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.47843137254901963,
|
|
"grad_norm": 0.6974536575167599,
|
|
"learning_rate": 2.7203579418344522e-05,
|
|
"loss": 0.3161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16381505131721497,
|
|
"step": 305,
|
|
"valid_targets_mean": 4578.1,
|
|
"valid_targets_min": 3890
|
|
},
|
|
{
|
|
"epoch": 0.48627450980392156,
|
|
"grad_norm": 0.5984384331505284,
|
|
"learning_rate": 2.76510067114094e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16002431511878967,
|
|
"step": 310,
|
|
"valid_targets_mean": 5190.5,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 0.49411764705882355,
|
|
"grad_norm": 0.5804040000332875,
|
|
"learning_rate": 2.8098434004474274e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1939573585987091,
|
|
"step": 315,
|
|
"valid_targets_mean": 5327.6,
|
|
"valid_targets_min": 3876
|
|
},
|
|
{
|
|
"epoch": 0.5019607843137255,
|
|
"grad_norm": 0.5388268955671826,
|
|
"learning_rate": 2.854586129753915e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15114423632621765,
|
|
"step": 320,
|
|
"valid_targets_mean": 5396.5,
|
|
"valid_targets_min": 4033
|
|
},
|
|
{
|
|
"epoch": 0.5098039215686274,
|
|
"grad_norm": 0.5762558083368537,
|
|
"learning_rate": 2.899328859060403e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11769633740186691,
|
|
"step": 325,
|
|
"valid_targets_mean": 4748.0,
|
|
"valid_targets_min": 3230
|
|
},
|
|
{
|
|
"epoch": 0.5176470588235295,
|
|
"grad_norm": 0.5804008895711301,
|
|
"learning_rate": 2.9440715883668906e-05,
|
|
"loss": 0.3305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15715950727462769,
|
|
"step": 330,
|
|
"valid_targets_mean": 4616.2,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 0.5254901960784314,
|
|
"grad_norm": 0.6310527874881205,
|
|
"learning_rate": 2.988814317673378e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16706666350364685,
|
|
"step": 335,
|
|
"valid_targets_mean": 4441.6,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 0.5333333333333333,
|
|
"grad_norm": 0.5961120881314318,
|
|
"learning_rate": 3.033557046979866e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1769622415304184,
|
|
"step": 340,
|
|
"valid_targets_mean": 4994.9,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 0.5411764705882353,
|
|
"grad_norm": 0.5602180018599069,
|
|
"learning_rate": 3.078299776286353e-05,
|
|
"loss": 0.3228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12255988270044327,
|
|
"step": 345,
|
|
"valid_targets_mean": 4047.5,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 0.5490196078431373,
|
|
"grad_norm": 0.6173542294001342,
|
|
"learning_rate": 3.123042505592841e-05,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16029052436351776,
|
|
"step": 350,
|
|
"valid_targets_mean": 5549.2,
|
|
"valid_targets_min": 2882
|
|
},
|
|
{
|
|
"epoch": 0.5568627450980392,
|
|
"grad_norm": 0.5679599191517015,
|
|
"learning_rate": 3.167785234899329e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17976143956184387,
|
|
"step": 355,
|
|
"valid_targets_mean": 5364.1,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 0.5647058823529412,
|
|
"grad_norm": 0.5672331814348699,
|
|
"learning_rate": 3.212527964205817e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18882597982883453,
|
|
"step": 360,
|
|
"valid_targets_mean": 5831.8,
|
|
"valid_targets_min": 3932
|
|
},
|
|
{
|
|
"epoch": 0.5725490196078431,
|
|
"grad_norm": 0.6591143588007065,
|
|
"learning_rate": 3.257270693512304e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17840611934661865,
|
|
"step": 365,
|
|
"valid_targets_mean": 5092.4,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 0.5803921568627451,
|
|
"grad_norm": 0.5553054618232748,
|
|
"learning_rate": 3.3020134228187924e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1302947998046875,
|
|
"step": 370,
|
|
"valid_targets_mean": 4231.9,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 0.5644363564911854,
|
|
"learning_rate": 3.34675615212528e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16010509431362152,
|
|
"step": 375,
|
|
"valid_targets_mean": 4744.2,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 0.596078431372549,
|
|
"grad_norm": 0.58333430931337,
|
|
"learning_rate": 3.391498881431768e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14787736535072327,
|
|
"step": 380,
|
|
"valid_targets_mean": 4678.5,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 0.6039215686274509,
|
|
"grad_norm": 0.5889015215527756,
|
|
"learning_rate": 3.436241610738255e-05,
|
|
"loss": 0.313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18845373392105103,
|
|
"step": 385,
|
|
"valid_targets_mean": 5673.2,
|
|
"valid_targets_min": 4270
|
|
},
|
|
{
|
|
"epoch": 0.611764705882353,
|
|
"grad_norm": 0.6223859924552718,
|
|
"learning_rate": 3.4809843400447434e-05,
|
|
"loss": 0.3202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16207769513130188,
|
|
"step": 390,
|
|
"valid_targets_mean": 4658.5,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 0.6196078431372549,
|
|
"grad_norm": 0.7638890797874454,
|
|
"learning_rate": 3.525727069351231e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15491528809070587,
|
|
"step": 395,
|
|
"valid_targets_mean": 5542.0,
|
|
"valid_targets_min": 3454
|
|
},
|
|
{
|
|
"epoch": 0.6274509803921569,
|
|
"grad_norm": 0.5751502933648137,
|
|
"learning_rate": 3.570469798657719e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20562687516212463,
|
|
"step": 400,
|
|
"valid_targets_mean": 6888.8,
|
|
"valid_targets_min": 3784
|
|
},
|
|
{
|
|
"epoch": 0.6352941176470588,
|
|
"grad_norm": 0.597294642401522,
|
|
"learning_rate": 3.6152125279642063e-05,
|
|
"loss": 0.3247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13165810704231262,
|
|
"step": 405,
|
|
"valid_targets_mean": 4793.1,
|
|
"valid_targets_min": 4053
|
|
},
|
|
{
|
|
"epoch": 0.6431372549019608,
|
|
"grad_norm": 0.5669919007326062,
|
|
"learning_rate": 3.659955257270694e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18990609049797058,
|
|
"step": 410,
|
|
"valid_targets_mean": 5629.0,
|
|
"valid_targets_min": 4213
|
|
},
|
|
{
|
|
"epoch": 0.6509803921568628,
|
|
"grad_norm": 0.6468208121665522,
|
|
"learning_rate": 3.704697986577181e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1645640730857849,
|
|
"step": 415,
|
|
"valid_targets_mean": 4735.1,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 0.6588235294117647,
|
|
"grad_norm": 0.540502132183462,
|
|
"learning_rate": 3.749440715883669e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19879060983657837,
|
|
"step": 420,
|
|
"valid_targets_mean": 5724.0,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 0.5490704341871493,
|
|
"learning_rate": 3.794183445190157e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14878222346305847,
|
|
"step": 425,
|
|
"valid_targets_mean": 5043.6,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 0.6745098039215687,
|
|
"grad_norm": 0.5675730862377629,
|
|
"learning_rate": 3.838926174496644e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14112448692321777,
|
|
"step": 430,
|
|
"valid_targets_mean": 4815.8,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.6823529411764706,
|
|
"grad_norm": 0.49675453114238977,
|
|
"learning_rate": 3.883668903803132e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14784574508666992,
|
|
"step": 435,
|
|
"valid_targets_mean": 5252.1,
|
|
"valid_targets_min": 3838
|
|
},
|
|
{
|
|
"epoch": 0.6901960784313725,
|
|
"grad_norm": 0.48200436187256407,
|
|
"learning_rate": 3.9284116331096196e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13503950834274292,
|
|
"step": 440,
|
|
"valid_targets_mean": 5491.2,
|
|
"valid_targets_min": 3145
|
|
},
|
|
{
|
|
"epoch": 0.6980392156862745,
|
|
"grad_norm": 0.5712073487308454,
|
|
"learning_rate": 3.973154362416108e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19644859433174133,
|
|
"step": 445,
|
|
"valid_targets_mean": 5789.0,
|
|
"valid_targets_min": 3587
|
|
},
|
|
{
|
|
"epoch": 0.7058823529411765,
|
|
"grad_norm": 0.526830081315818,
|
|
"learning_rate": 3.999997555873748e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14665110409259796,
|
|
"step": 450,
|
|
"valid_targets_mean": 5491.9,
|
|
"valid_targets_min": 3758
|
|
},
|
|
{
|
|
"epoch": 0.7137254901960784,
|
|
"grad_norm": 0.5474564322386588,
|
|
"learning_rate": 3.999970059522009e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1494222730398178,
|
|
"step": 455,
|
|
"valid_targets_mean": 4707.6,
|
|
"valid_targets_min": 3849
|
|
},
|
|
{
|
|
"epoch": 0.7215686274509804,
|
|
"grad_norm": 0.5518182142090695,
|
|
"learning_rate": 3.9999120120821446e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2486301213502884,
|
|
"step": 460,
|
|
"valid_targets_mean": 6441.1,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 0.7294117647058823,
|
|
"grad_norm": 0.5706154037456665,
|
|
"learning_rate": 3.999823414440874e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13550716638565063,
|
|
"step": 465,
|
|
"valid_targets_mean": 4467.9,
|
|
"valid_targets_min": 2927
|
|
},
|
|
{
|
|
"epoch": 0.7372549019607844,
|
|
"grad_norm": 0.5629521649561858,
|
|
"learning_rate": 3.999704267951594e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1534888595342636,
|
|
"step": 470,
|
|
"valid_targets_mean": 4402.6,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 0.7450980392156863,
|
|
"grad_norm": 0.5871057366051168,
|
|
"learning_rate": 3.999554574434359e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11759912967681885,
|
|
"step": 475,
|
|
"valid_targets_mean": 3519.2,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 0.7529411764705882,
|
|
"grad_norm": 0.5888528130085586,
|
|
"learning_rate": 3.999374336175854e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16305159032344818,
|
|
"step": 480,
|
|
"valid_targets_mean": 4139.1,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.7607843137254902,
|
|
"grad_norm": 0.5346424212043335,
|
|
"learning_rate": 3.999163555929357e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14926356077194214,
|
|
"step": 485,
|
|
"valid_targets_mean": 4719.6,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 0.7686274509803922,
|
|
"grad_norm": 0.532795561287944,
|
|
"learning_rate": 3.9989222369147e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14986297488212585,
|
|
"step": 490,
|
|
"valid_targets_mean": 5001.2,
|
|
"valid_targets_min": 3946
|
|
},
|
|
{
|
|
"epoch": 0.7764705882352941,
|
|
"grad_norm": 0.5842848266555563,
|
|
"learning_rate": 3.998650382818217e-05,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2318124771118164,
|
|
"step": 495,
|
|
"valid_targets_mean": 5869.8,
|
|
"valid_targets_min": 3481
|
|
},
|
|
{
|
|
"epoch": 0.7843137254901961,
|
|
"grad_norm": 0.5200319327863147,
|
|
"learning_rate": 3.998347997792689e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17700932919979095,
|
|
"step": 500,
|
|
"valid_targets_mean": 5104.8,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 0.792156862745098,
|
|
"grad_norm": 0.5239084681367949,
|
|
"learning_rate": 3.9980150864572815e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14928527176380157,
|
|
"step": 505,
|
|
"valid_targets_mean": 5159.4,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.5519810693913533,
|
|
"learning_rate": 3.997651653897472e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430630087852478,
|
|
"step": 510,
|
|
"valid_targets_mean": 4216.4,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.807843137254902,
|
|
"grad_norm": 0.5439450316689649,
|
|
"learning_rate": 3.997257705664974e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13359497487545013,
|
|
"step": 515,
|
|
"valid_targets_mean": 4604.4,
|
|
"valid_targets_min": 3499
|
|
},
|
|
{
|
|
"epoch": 0.8156862745098039,
|
|
"grad_norm": 0.515452404322428,
|
|
"learning_rate": 3.9968332477776505e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11366050690412521,
|
|
"step": 520,
|
|
"valid_targets_mean": 4262.4,
|
|
"valid_targets_min": 3479
|
|
},
|
|
{
|
|
"epoch": 0.8235294117647058,
|
|
"grad_norm": 0.5875375799090892,
|
|
"learning_rate": 3.996378286719425e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15862804651260376,
|
|
"step": 525,
|
|
"valid_targets_mean": 4425.1,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 0.8313725490196079,
|
|
"grad_norm": 0.516273404525732,
|
|
"learning_rate": 3.995892829440178e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1501372754573822,
|
|
"step": 530,
|
|
"valid_targets_mean": 5350.5,
|
|
"valid_targets_min": 3222
|
|
},
|
|
{
|
|
"epoch": 0.8392156862745098,
|
|
"grad_norm": 0.6025942652272187,
|
|
"learning_rate": 3.995376883355645e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16278386116027832,
|
|
"step": 535,
|
|
"valid_targets_mean": 5388.0,
|
|
"valid_targets_min": 3841
|
|
},
|
|
{
|
|
"epoch": 0.8470588235294118,
|
|
"grad_norm": 0.5287170966635809,
|
|
"learning_rate": 3.994830456347302e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13605378568172455,
|
|
"step": 540,
|
|
"valid_targets_mean": 4475.0,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 0.8549019607843137,
|
|
"grad_norm": 0.5609798073618312,
|
|
"learning_rate": 3.994253556762243e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1425262838602066,
|
|
"step": 545,
|
|
"valid_targets_mean": 4783.4,
|
|
"valid_targets_min": 4019
|
|
},
|
|
{
|
|
"epoch": 0.8627450980392157,
|
|
"grad_norm": 0.534037643572686,
|
|
"learning_rate": 3.993646193413053e-05,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12146712094545364,
|
|
"step": 550,
|
|
"valid_targets_mean": 4630.2,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 0.8705882352941177,
|
|
"grad_norm": 0.5327517365703102,
|
|
"learning_rate": 3.99300837557768e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12988287210464478,
|
|
"step": 555,
|
|
"valid_targets_mean": 4601.2,
|
|
"valid_targets_min": 3465
|
|
},
|
|
{
|
|
"epoch": 0.8784313725490196,
|
|
"grad_norm": 0.6104458931487282,
|
|
"learning_rate": 3.9923401129992826e-05,
|
|
"loss": 0.3223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22504258155822754,
|
|
"step": 560,
|
|
"valid_targets_mean": 4843.1,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 0.8862745098039215,
|
|
"grad_norm": 0.5816928916805613,
|
|
"learning_rate": 3.991641415886089e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14014427363872528,
|
|
"step": 565,
|
|
"valid_targets_mean": 4873.4,
|
|
"valid_targets_min": 3458
|
|
},
|
|
{
|
|
"epoch": 0.8941176470588236,
|
|
"grad_norm": 0.5341354749305198,
|
|
"learning_rate": 3.990912294911236e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14114820957183838,
|
|
"step": 570,
|
|
"valid_targets_mean": 5001.8,
|
|
"valid_targets_min": 3638
|
|
},
|
|
{
|
|
"epoch": 0.9019607843137255,
|
|
"grad_norm": 0.4891281706911189,
|
|
"learning_rate": 3.99015276121261e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18456187844276428,
|
|
"step": 575,
|
|
"valid_targets_mean": 7555.2,
|
|
"valid_targets_min": 4641
|
|
},
|
|
{
|
|
"epoch": 0.9098039215686274,
|
|
"grad_norm": 0.5134006529875681,
|
|
"learning_rate": 3.989362826392676e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15537822246551514,
|
|
"step": 580,
|
|
"valid_targets_mean": 5178.5,
|
|
"valid_targets_min": 3262
|
|
},
|
|
{
|
|
"epoch": 0.9176470588235294,
|
|
"grad_norm": 0.5019601221086468,
|
|
"learning_rate": 3.9885425025183e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12703928351402283,
|
|
"step": 585,
|
|
"valid_targets_mean": 4999.5,
|
|
"valid_targets_min": 3634
|
|
},
|
|
{
|
|
"epoch": 0.9254901960784314,
|
|
"grad_norm": 0.5652890656745809,
|
|
"learning_rate": 3.9876918021205606e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16273213922977448,
|
|
"step": 590,
|
|
"valid_targets_mean": 4657.9,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 0.9333333333333333,
|
|
"grad_norm": 0.5446495034127701,
|
|
"learning_rate": 3.9868107381945646e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14110511541366577,
|
|
"step": 595,
|
|
"valid_targets_mean": 4137.2,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 0.9411764705882353,
|
|
"grad_norm": 0.544143879383346,
|
|
"learning_rate": 3.9858993241992454e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1653507947921753,
|
|
"step": 600,
|
|
"valid_targets_mean": 5535.8,
|
|
"valid_targets_min": 3537
|
|
},
|
|
{
|
|
"epoch": 0.9490196078431372,
|
|
"grad_norm": 0.5537973321054328,
|
|
"learning_rate": 3.984957574057155e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17208953201770782,
|
|
"step": 605,
|
|
"valid_targets_mean": 5035.8,
|
|
"valid_targets_min": 3531
|
|
},
|
|
{
|
|
"epoch": 0.9568627450980393,
|
|
"grad_norm": 0.5609951204704824,
|
|
"learning_rate": 3.983985502154254e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13309073448181152,
|
|
"step": 610,
|
|
"valid_targets_mean": 4509.8,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 0.9647058823529412,
|
|
"grad_norm": 0.6094983245356734,
|
|
"learning_rate": 3.982983123339694e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1601308286190033,
|
|
"step": 615,
|
|
"valid_targets_mean": 4618.6,
|
|
"valid_targets_min": 3628
|
|
},
|
|
{
|
|
"epoch": 0.9725490196078431,
|
|
"grad_norm": 0.4899136852504708,
|
|
"learning_rate": 3.9819504529255836e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1296088844537735,
|
|
"step": 620,
|
|
"valid_targets_mean": 5019.6,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 0.9803921568627451,
|
|
"grad_norm": 0.5177436460074714,
|
|
"learning_rate": 3.980887506686763e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11718728393316269,
|
|
"step": 625,
|
|
"valid_targets_mean": 4191.5,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 0.9882352941176471,
|
|
"grad_norm": 0.5165007897600375,
|
|
"learning_rate": 3.9797943008605575e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14613491296768188,
|
|
"step": 630,
|
|
"valid_targets_mean": 4653.8,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 0.996078431372549,
|
|
"grad_norm": 0.5746054692877476,
|
|
"learning_rate": 3.978670852146529e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13336493074893951,
|
|
"step": 635,
|
|
"valid_targets_mean": 4390.6,
|
|
"valid_targets_min": 3725
|
|
},
|
|
{
|
|
"epoch": 1.0031372549019608,
|
|
"grad_norm": 0.552658454300446,
|
|
"learning_rate": 3.977517177706226e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12799662351608276,
|
|
"step": 640,
|
|
"valid_targets_mean": 5032.6,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 1.0109803921568628,
|
|
"grad_norm": 0.515229853898195,
|
|
"learning_rate": 3.976333295162918e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15825995802879333,
|
|
"step": 645,
|
|
"valid_targets_mean": 4782.9,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 1.0188235294117647,
|
|
"grad_norm": 0.5475534187240159,
|
|
"learning_rate": 3.975119222601323e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13792580366134644,
|
|
"step": 650,
|
|
"valid_targets_mean": 4856.1,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 1.0266666666666666,
|
|
"grad_norm": 0.4751392720015022,
|
|
"learning_rate": 3.9738749785673406e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15653687715530396,
|
|
"step": 655,
|
|
"valid_targets_mean": 4985.4,
|
|
"valid_targets_min": 3526
|
|
},
|
|
{
|
|
"epoch": 1.0345098039215685,
|
|
"grad_norm": 0.5817528452352356,
|
|
"learning_rate": 3.972600582067758e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15338078141212463,
|
|
"step": 660,
|
|
"valid_targets_mean": 4498.0,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 1.0423529411764705,
|
|
"grad_norm": 0.5789119601464775,
|
|
"learning_rate": 3.971296052569967e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11939240992069244,
|
|
"step": 665,
|
|
"valid_targets_mean": 4117.6,
|
|
"valid_targets_min": 3617
|
|
},
|
|
{
|
|
"epoch": 1.0501960784313726,
|
|
"grad_norm": 0.48150978430570174,
|
|
"learning_rate": 3.9699614100016634e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16545331478118896,
|
|
"step": 670,
|
|
"valid_targets_mean": 5447.8,
|
|
"valid_targets_min": 4101
|
|
},
|
|
{
|
|
"epoch": 1.0580392156862746,
|
|
"grad_norm": 0.6629488468799677,
|
|
"learning_rate": 3.968596674750545e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14370295405387878,
|
|
"step": 675,
|
|
"valid_targets_mean": 5414.5,
|
|
"valid_targets_min": 3837
|
|
},
|
|
{
|
|
"epoch": 1.0658823529411765,
|
|
"grad_norm": 0.46578037676441547,
|
|
"learning_rate": 3.967201867663996e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16255153715610504,
|
|
"step": 680,
|
|
"valid_targets_mean": 6025.1,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 1.0737254901960784,
|
|
"grad_norm": 0.5104197023855348,
|
|
"learning_rate": 3.9657770100487736e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1391524076461792,
|
|
"step": 685,
|
|
"valid_targets_mean": 5004.2,
|
|
"valid_targets_min": 3757
|
|
},
|
|
{
|
|
"epoch": 1.0815686274509804,
|
|
"grad_norm": 0.5492845945511039,
|
|
"learning_rate": 3.964322123670678e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14761832356452942,
|
|
"step": 690,
|
|
"valid_targets_mean": 4180.2,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 1.0894117647058823,
|
|
"grad_norm": 0.7674203646730808,
|
|
"learning_rate": 3.9628372307542225e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17910337448120117,
|
|
"step": 695,
|
|
"valid_targets_mean": 5319.0,
|
|
"valid_targets_min": 2231
|
|
},
|
|
{
|
|
"epoch": 1.0972549019607842,
|
|
"grad_norm": 0.507265509432112,
|
|
"learning_rate": 3.961322353982295e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11319584399461746,
|
|
"step": 700,
|
|
"valid_targets_mean": 4365.6,
|
|
"valid_targets_min": 3235
|
|
},
|
|
{
|
|
"epoch": 1.1050980392156862,
|
|
"grad_norm": 0.5873176986497535,
|
|
"learning_rate": 3.959777516495809e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365930587053299,
|
|
"step": 705,
|
|
"valid_targets_mean": 3845.6,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 1.1129411764705883,
|
|
"grad_norm": 0.4861204771711863,
|
|
"learning_rate": 3.95820274189335e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17216171324253082,
|
|
"step": 710,
|
|
"valid_targets_mean": 5469.4,
|
|
"valid_targets_min": 4127
|
|
},
|
|
{
|
|
"epoch": 1.1207843137254903,
|
|
"grad_norm": 0.49983625309605856,
|
|
"learning_rate": 3.956598054230816e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13429877161979675,
|
|
"step": 715,
|
|
"valid_targets_mean": 4805.4,
|
|
"valid_targets_min": 3414
|
|
},
|
|
{
|
|
"epoch": 1.1286274509803922,
|
|
"grad_norm": 0.5255876190536094,
|
|
"learning_rate": 3.9549634780210536e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1541307419538498,
|
|
"step": 720,
|
|
"valid_targets_mean": 5294.5,
|
|
"valid_targets_min": 3954
|
|
},
|
|
{
|
|
"epoch": 1.1364705882352941,
|
|
"grad_norm": 0.4912047704969661,
|
|
"learning_rate": 3.953299038233476e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16229791939258575,
|
|
"step": 725,
|
|
"valid_targets_mean": 6202.2,
|
|
"valid_targets_min": 4193
|
|
},
|
|
{
|
|
"epoch": 1.144313725490196,
|
|
"grad_norm": 0.4773361514621409,
|
|
"learning_rate": 3.9516047602936864e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13887500762939453,
|
|
"step": 730,
|
|
"valid_targets_mean": 5224.0,
|
|
"valid_targets_min": 3954
|
|
},
|
|
{
|
|
"epoch": 1.152156862745098,
|
|
"grad_norm": 0.566181518889908,
|
|
"learning_rate": 3.949880670083091e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15443825721740723,
|
|
"step": 735,
|
|
"valid_targets_mean": 4433.6,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"grad_norm": 0.5154133999940106,
|
|
"learning_rate": 3.9481267939385e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1726064532995224,
|
|
"step": 740,
|
|
"valid_targets_mean": 4607.8,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 1.167843137254902,
|
|
"grad_norm": 0.5294109628279158,
|
|
"learning_rate": 3.946343158651725e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19238482415676117,
|
|
"step": 745,
|
|
"valid_targets_mean": 6692.2,
|
|
"valid_targets_min": 4235
|
|
},
|
|
{
|
|
"epoch": 1.175686274509804,
|
|
"grad_norm": 0.5052721867495388,
|
|
"learning_rate": 3.944529791469175e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1571299433708191,
|
|
"step": 750,
|
|
"valid_targets_mean": 5801.4,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 1.183529411764706,
|
|
"grad_norm": 0.5028992517595353,
|
|
"learning_rate": 3.9426867200914355e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11698798835277557,
|
|
"step": 755,
|
|
"valid_targets_mean": 4200.5,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 1.1913725490196079,
|
|
"grad_norm": 0.5668059778989796,
|
|
"learning_rate": 3.9408139726728444e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13860933482646942,
|
|
"step": 760,
|
|
"valid_targets_mean": 4887.9,
|
|
"valid_targets_min": 3979
|
|
},
|
|
{
|
|
"epoch": 1.1992156862745098,
|
|
"grad_norm": 0.5273786750525696,
|
|
"learning_rate": 3.9389115778210666e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1581505835056305,
|
|
"step": 765,
|
|
"valid_targets_mean": 5229.5,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 1.2070588235294117,
|
|
"grad_norm": 0.5096133597670871,
|
|
"learning_rate": 3.936979564596653e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14591318368911743,
|
|
"step": 770,
|
|
"valid_targets_mean": 4233.5,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 1.2149019607843137,
|
|
"grad_norm": 0.5094892679529109,
|
|
"learning_rate": 3.935017962512599e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1134859174489975,
|
|
"step": 775,
|
|
"valid_targets_mean": 4776.5,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 1.2227450980392156,
|
|
"grad_norm": 0.48559437202992656,
|
|
"learning_rate": 3.933026801533893e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15698082745075226,
|
|
"step": 780,
|
|
"valid_targets_mean": 5618.9,
|
|
"valid_targets_min": 4369
|
|
},
|
|
{
|
|
"epoch": 1.2305882352941175,
|
|
"grad_norm": 0.48072649355968317,
|
|
"learning_rate": 3.9310061120770556e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1375308632850647,
|
|
"step": 785,
|
|
"valid_targets_mean": 5107.0,
|
|
"valid_targets_min": 4087
|
|
},
|
|
{
|
|
"epoch": 1.2384313725490197,
|
|
"grad_norm": 0.5052916757132446,
|
|
"learning_rate": 3.928955925009682e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15378865599632263,
|
|
"step": 790,
|
|
"valid_targets_mean": 4994.2,
|
|
"valid_targets_min": 3250
|
|
},
|
|
{
|
|
"epoch": 1.2462745098039216,
|
|
"grad_norm": 0.46829147777070945,
|
|
"learning_rate": 3.9268762716499615e-05,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14035716652870178,
|
|
"step": 795,
|
|
"valid_targets_mean": 6135.1,
|
|
"valid_targets_min": 3933
|
|
},
|
|
{
|
|
"epoch": 1.2541176470588236,
|
|
"grad_norm": 0.4701253789676099,
|
|
"learning_rate": 3.924767183766208e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1210692971944809,
|
|
"step": 800,
|
|
"valid_targets_mean": 4518.0,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 1.2619607843137255,
|
|
"grad_norm": 0.6003498152850449,
|
|
"learning_rate": 3.922628693576369e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14886260032653809,
|
|
"step": 805,
|
|
"valid_targets_mean": 4616.0,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 1.2698039215686274,
|
|
"grad_norm": 0.47056032724895036,
|
|
"learning_rate": 3.9204608337475323e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14457407593727112,
|
|
"step": 810,
|
|
"valid_targets_mean": 5869.6,
|
|
"valid_targets_min": 4108
|
|
},
|
|
{
|
|
"epoch": 1.2776470588235294,
|
|
"grad_norm": 0.5387640533112291,
|
|
"learning_rate": 3.9182636373954345e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18448501825332642,
|
|
"step": 815,
|
|
"valid_targets_mean": 5140.8,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 1.2854901960784313,
|
|
"grad_norm": 0.5109916217490196,
|
|
"learning_rate": 3.916037138083947e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12615464627742767,
|
|
"step": 820,
|
|
"valid_targets_mean": 5185.1,
|
|
"valid_targets_min": 3440
|
|
},
|
|
{
|
|
"epoch": 1.2933333333333334,
|
|
"grad_norm": 0.413337630916849,
|
|
"learning_rate": 3.913781369824567e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14597749710083008,
|
|
"step": 825,
|
|
"valid_targets_mean": 6277.1,
|
|
"valid_targets_min": 3898
|
|
},
|
|
{
|
|
"epoch": 1.3011764705882354,
|
|
"grad_norm": 0.444988594954341,
|
|
"learning_rate": 3.911496367075897e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08822401612997055,
|
|
"step": 830,
|
|
"valid_targets_mean": 4550.4,
|
|
"valid_targets_min": 3266
|
|
},
|
|
{
|
|
"epoch": 1.3090196078431373,
|
|
"grad_norm": 0.4983989478424751,
|
|
"learning_rate": 3.909182164743122e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12227452546358109,
|
|
"step": 835,
|
|
"valid_targets_mean": 5036.8,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 1.3168627450980392,
|
|
"grad_norm": 0.561060858311853,
|
|
"learning_rate": 3.906838798177469e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311677098274231,
|
|
"step": 840,
|
|
"valid_targets_mean": 4114.9,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 1.3247058823529412,
|
|
"grad_norm": 0.47894864098262196,
|
|
"learning_rate": 3.904466303175674e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1337360441684723,
|
|
"step": 845,
|
|
"valid_targets_mean": 5061.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.332549019607843,
|
|
"grad_norm": 0.47614638449340735,
|
|
"learning_rate": 3.90206471597943e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1335444450378418,
|
|
"step": 850,
|
|
"valid_targets_mean": 4545.1,
|
|
"valid_targets_min": 3721
|
|
},
|
|
{
|
|
"epoch": 1.340392156862745,
|
|
"grad_norm": 0.4677737814426354,
|
|
"learning_rate": 3.8996340732748396e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1509006768465042,
|
|
"step": 855,
|
|
"valid_targets_mean": 5181.5,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 1.348235294117647,
|
|
"grad_norm": 0.46833585385482007,
|
|
"learning_rate": 3.8971744121918465e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13002841174602509,
|
|
"step": 860,
|
|
"valid_targets_mean": 5174.0,
|
|
"valid_targets_min": 4119
|
|
},
|
|
{
|
|
"epoch": 1.356078431372549,
|
|
"grad_norm": 0.5363904152736998,
|
|
"learning_rate": 3.894685770303675e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1599728763103485,
|
|
"step": 865,
|
|
"valid_targets_mean": 4943.9,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 1.3639215686274508,
|
|
"grad_norm": 0.5168195017978404,
|
|
"learning_rate": 3.8921681856262535e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1535210907459259,
|
|
"step": 870,
|
|
"valid_targets_mean": 5614.4,
|
|
"valid_targets_min": 3721
|
|
},
|
|
{
|
|
"epoch": 1.371764705882353,
|
|
"grad_norm": 0.4489152633927091,
|
|
"learning_rate": 3.889621696617633e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13121816515922546,
|
|
"step": 875,
|
|
"valid_targets_mean": 4431.6,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 1.379607843137255,
|
|
"grad_norm": 0.5318353134954502,
|
|
"learning_rate": 3.887046342177401e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1512901484966278,
|
|
"step": 880,
|
|
"valid_targets_mean": 3760.1,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 1.3874509803921569,
|
|
"grad_norm": 0.47434667654611545,
|
|
"learning_rate": 3.884442161646086e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12792325019836426,
|
|
"step": 885,
|
|
"valid_targets_mean": 4732.4,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 1.3952941176470588,
|
|
"grad_norm": 0.45916298821601925,
|
|
"learning_rate": 3.881809194804559e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14497032761573792,
|
|
"step": 890,
|
|
"valid_targets_mean": 5339.4,
|
|
"valid_targets_min": 3849
|
|
},
|
|
{
|
|
"epoch": 1.4031372549019607,
|
|
"grad_norm": 0.5147058496422254,
|
|
"learning_rate": 3.879147481873423e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14493274688720703,
|
|
"step": 895,
|
|
"valid_targets_mean": 4245.0,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 1.4109803921568629,
|
|
"grad_norm": 0.5079699928075103,
|
|
"learning_rate": 3.876457063512399e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1499646008014679,
|
|
"step": 900,
|
|
"valid_targets_mean": 5170.5,
|
|
"valid_targets_min": 3806
|
|
},
|
|
{
|
|
"epoch": 1.4188235294117648,
|
|
"grad_norm": 0.48521231587984825,
|
|
"learning_rate": 3.873737980819707e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13502424955368042,
|
|
"step": 905,
|
|
"valid_targets_mean": 4743.6,
|
|
"valid_targets_min": 2538
|
|
},
|
|
{
|
|
"epoch": 1.4266666666666667,
|
|
"grad_norm": 0.49002430410787634,
|
|
"learning_rate": 3.870990275331437e-05,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17316213250160217,
|
|
"step": 910,
|
|
"valid_targets_mean": 4804.0,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 1.4345098039215687,
|
|
"grad_norm": 0.448319116370862,
|
|
"learning_rate": 3.8682139890209124e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17228415608406067,
|
|
"step": 915,
|
|
"valid_targets_mean": 6150.1,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 1.4423529411764706,
|
|
"grad_norm": 0.5130134184980588,
|
|
"learning_rate": 3.865409164298052e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1336456686258316,
|
|
"step": 920,
|
|
"valid_targets_mean": 4057.1,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 1.4501960784313725,
|
|
"grad_norm": 0.4980402994846629,
|
|
"learning_rate": 3.8625758440087213e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16118893027305603,
|
|
"step": 925,
|
|
"valid_targets_mean": 4822.4,
|
|
"valid_targets_min": 3827
|
|
},
|
|
{
|
|
"epoch": 1.4580392156862745,
|
|
"grad_norm": 0.5009845187637655,
|
|
"learning_rate": 3.859714071434078e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16723394393920898,
|
|
"step": 930,
|
|
"valid_targets_mean": 5267.9,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 1.4658823529411764,
|
|
"grad_norm": 0.4863356628955196,
|
|
"learning_rate": 3.8568238902899085e-05,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14159944653511047,
|
|
"step": 935,
|
|
"valid_targets_mean": 4978.8,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 1.4737254901960783,
|
|
"grad_norm": 0.4497822611903985,
|
|
"learning_rate": 3.853905344725963e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13361850380897522,
|
|
"step": 940,
|
|
"valid_targets_mean": 5665.6,
|
|
"valid_targets_min": 3905
|
|
},
|
|
{
|
|
"epoch": 1.4815686274509803,
|
|
"grad_norm": 0.5146300814678416,
|
|
"learning_rate": 3.850958479325281e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12134017795324326,
|
|
"step": 945,
|
|
"valid_targets_mean": 4427.0,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 1.4894117647058824,
|
|
"grad_norm": 0.5281264265225604,
|
|
"learning_rate": 3.8479833391035085e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11657969653606415,
|
|
"step": 950,
|
|
"valid_targets_mean": 4344.9,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 1.4972549019607844,
|
|
"grad_norm": 0.479812574389006,
|
|
"learning_rate": 3.844979969508211e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.177751362323761,
|
|
"step": 955,
|
|
"valid_targets_mean": 5552.0,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.5050980392156863,
|
|
"grad_norm": 0.4936250416594059,
|
|
"learning_rate": 3.84194841641818e-05,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.137670636177063,
|
|
"step": 960,
|
|
"valid_targets_mean": 4584.0,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 1.5129411764705882,
|
|
"grad_norm": 0.4538053026161223,
|
|
"learning_rate": 3.838888726142732e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12420643121004105,
|
|
"step": 965,
|
|
"valid_targets_mean": 4935.8,
|
|
"valid_targets_min": 3706
|
|
},
|
|
{
|
|
"epoch": 1.5207843137254902,
|
|
"grad_norm": 0.5155129837104027,
|
|
"learning_rate": 3.8358009454210006e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16140836477279663,
|
|
"step": 970,
|
|
"valid_targets_mean": 5325.9,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 1.5286274509803923,
|
|
"grad_norm": 0.49336171684185426,
|
|
"learning_rate": 3.8326851214212206e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10420454293489456,
|
|
"step": 975,
|
|
"valid_targets_mean": 3031.6,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.5364705882352943,
|
|
"grad_norm": 0.5216785235800537,
|
|
"learning_rate": 3.829541301740014e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11610683798789978,
|
|
"step": 980,
|
|
"valid_targets_mean": 4581.1,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 1.5443137254901962,
|
|
"grad_norm": 0.4982786592132544,
|
|
"learning_rate": 3.826369534401653e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12638339400291443,
|
|
"step": 985,
|
|
"valid_targets_mean": 4808.4,
|
|
"valid_targets_min": 3196
|
|
},
|
|
{
|
|
"epoch": 1.5521568627450981,
|
|
"grad_norm": 0.46001833393154,
|
|
"learning_rate": 3.823169867857337e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11307136714458466,
|
|
"step": 990,
|
|
"valid_targets_mean": 4355.0,
|
|
"valid_targets_min": 3430
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"grad_norm": 0.4833662698038769,
|
|
"learning_rate": 3.819942350984444e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11104926466941833,
|
|
"step": 995,
|
|
"valid_targets_mean": 4540.6,
|
|
"valid_targets_min": 4192
|
|
},
|
|
{
|
|
"epoch": 1.567843137254902,
|
|
"grad_norm": 0.44997560353011723,
|
|
"learning_rate": 3.816687033085788e-05,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1426372528076172,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4483.5,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 1.575686274509804,
|
|
"grad_norm": 0.5050412828032175,
|
|
"learning_rate": 3.813403963888866e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10803429782390594,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4369.6,
|
|
"valid_targets_min": 1821
|
|
},
|
|
{
|
|
"epoch": 1.5835294117647059,
|
|
"grad_norm": 0.44621975247023177,
|
|
"learning_rate": 3.810093193545097e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17702722549438477,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5999.9,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 1.5913725490196078,
|
|
"grad_norm": 0.8651595892595954,
|
|
"learning_rate": 3.806754772629055e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14309613406658173,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4706.1,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.5992156862745097,
|
|
"grad_norm": 0.44048850251332833,
|
|
"learning_rate": 3.8033887521377015e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14977940917015076,
|
|
"step": 1020,
|
|
"valid_targets_mean": 5189.2,
|
|
"valid_targets_min": 3654
|
|
},
|
|
{
|
|
"epoch": 1.6070588235294117,
|
|
"grad_norm": 0.45488785447156654,
|
|
"learning_rate": 3.799995183489599e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17549139261245728,
|
|
"step": 1025,
|
|
"valid_targets_mean": 6152.9,
|
|
"valid_targets_min": 4129
|
|
},
|
|
{
|
|
"epoch": 1.6149019607843136,
|
|
"grad_norm": 0.5061271912231851,
|
|
"learning_rate": 3.796574118524131e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12901580333709717,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4406.8,
|
|
"valid_targets_min": 3784
|
|
},
|
|
{
|
|
"epoch": 1.6227450980392157,
|
|
"grad_norm": 0.52053743574784,
|
|
"learning_rate": 3.793125609500709e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15901905298233032,
|
|
"step": 1035,
|
|
"valid_targets_mean": 6584.6,
|
|
"valid_targets_min": 3599
|
|
},
|
|
{
|
|
"epoch": 1.6305882352941177,
|
|
"grad_norm": 0.45222189135415203,
|
|
"learning_rate": 3.789649709097973e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1181027889251709,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4745.6,
|
|
"valid_targets_min": 2823
|
|
},
|
|
{
|
|
"epoch": 1.6384313725490196,
|
|
"grad_norm": 0.4851710635859494,
|
|
"learning_rate": 3.786146470412988e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13692988455295563,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4307.1,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 1.6462745098039215,
|
|
"grad_norm": 0.4449926143396252,
|
|
"learning_rate": 3.782615946960432e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12634792923927307,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5285.6,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 1.6541176470588237,
|
|
"grad_norm": 0.4255159233509927,
|
|
"learning_rate": 3.779058192671777e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17466798424720764,
|
|
"step": 1055,
|
|
"valid_targets_mean": 6423.2,
|
|
"valid_targets_min": 3217
|
|
},
|
|
{
|
|
"epoch": 1.6619607843137256,
|
|
"grad_norm": 0.4661072625409414,
|
|
"learning_rate": 3.775473261894472e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16991335153579712,
|
|
"step": 1060,
|
|
"valid_targets_mean": 6138.6,
|
|
"valid_targets_min": 3941
|
|
},
|
|
{
|
|
"epoch": 1.6698039215686276,
|
|
"grad_norm": 0.4601255216887334,
|
|
"learning_rate": 3.771861209391103e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13610923290252686,
|
|
"step": 1065,
|
|
"valid_targets_mean": 5478.5,
|
|
"valid_targets_min": 3265
|
|
},
|
|
{
|
|
"epoch": 1.6776470588235295,
|
|
"grad_norm": 0.5230661717446022,
|
|
"learning_rate": 3.768222090338564e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13504083454608917,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3810.9,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 1.6854901960784314,
|
|
"grad_norm": 0.46292040093035497,
|
|
"learning_rate": 3.7645559603272104e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1403416395187378,
|
|
"step": 1075,
|
|
"valid_targets_mean": 5192.9,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 1.6933333333333334,
|
|
"grad_norm": 0.4743311898519161,
|
|
"learning_rate": 3.76086287536001e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.136699378490448,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4379.9,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 1.7011764705882353,
|
|
"grad_norm": 0.4578590976308974,
|
|
"learning_rate": 3.757142891851691e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11357353627681732,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4284.6,
|
|
"valid_targets_min": 2690
|
|
},
|
|
{
|
|
"epoch": 1.7090196078431372,
|
|
"grad_norm": 0.4582742532967724,
|
|
"learning_rate": 3.753396066627876e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13202431797981262,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4889.5,
|
|
"valid_targets_min": 3500
|
|
},
|
|
{
|
|
"epoch": 1.7168627450980392,
|
|
"grad_norm": 0.6419543594016554,
|
|
"learning_rate": 3.749622456924215e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293194741010666,
|
|
"step": 1095,
|
|
"valid_targets_mean": 5156.4,
|
|
"valid_targets_min": 3659
|
|
},
|
|
{
|
|
"epoch": 1.724705882352941,
|
|
"grad_norm": 0.45768521600117745,
|
|
"learning_rate": 3.745822120385512e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11098246276378632,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4385.8,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 1.732549019607843,
|
|
"grad_norm": 0.49857946385847945,
|
|
"learning_rate": 3.7419951150648445e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10976303368806839,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3149.5,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 1.740392156862745,
|
|
"grad_norm": 0.5223095413990212,
|
|
"learning_rate": 3.738141499422677e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13085030019283295,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4533.4,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 1.748235294117647,
|
|
"grad_norm": 0.4719135078151772,
|
|
"learning_rate": 3.7342613323259654e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1135668084025383,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4173.2,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 1.756078431372549,
|
|
"grad_norm": 0.41643702050626275,
|
|
"learning_rate": 3.7303546730472607e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1426919400691986,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5989.4,
|
|
"valid_targets_min": 4148
|
|
},
|
|
{
|
|
"epoch": 1.763921568627451,
|
|
"grad_norm": 1.1579455301589454,
|
|
"learning_rate": 3.726421581263802e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12776562571525574,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4249.0,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 1.771764705882353,
|
|
"grad_norm": 0.43820783536742886,
|
|
"learning_rate": 3.722462117056607e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17049311101436615,
|
|
"step": 1130,
|
|
"valid_targets_mean": 5343.8,
|
|
"valid_targets_min": 3800
|
|
},
|
|
{
|
|
"epoch": 1.779607843137255,
|
|
"grad_norm": 0.4757596511200127,
|
|
"learning_rate": 3.718476340909548e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12422124296426773,
|
|
"step": 1135,
|
|
"valid_targets_mean": 5063.4,
|
|
"valid_targets_min": 3835
|
|
},
|
|
{
|
|
"epoch": 1.787450980392157,
|
|
"grad_norm": 0.41826233562035153,
|
|
"learning_rate": 3.714464313708439e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12859922647476196,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5404.1,
|
|
"valid_targets_min": 3878
|
|
},
|
|
{
|
|
"epoch": 1.795294117647059,
|
|
"grad_norm": 0.44162788017321236,
|
|
"learning_rate": 3.710426096740094e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11257381737232208,
|
|
"step": 1145,
|
|
"valid_targets_mean": 5128.1,
|
|
"valid_targets_min": 3606
|
|
},
|
|
{
|
|
"epoch": 1.8031372549019609,
|
|
"grad_norm": 0.4551068777852194,
|
|
"learning_rate": 3.7063617516913974e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10090089589357376,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3786.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.8109803921568628,
|
|
"grad_norm": 0.4583894818863251,
|
|
"learning_rate": 3.7022713406483626e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12224529683589935,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4558.8,
|
|
"valid_targets_min": 2708
|
|
},
|
|
{
|
|
"epoch": 1.8188235294117647,
|
|
"grad_norm": 0.43081368426656125,
|
|
"learning_rate": 3.698154926095177e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12552262842655182,
|
|
"step": 1160,
|
|
"valid_targets_mean": 5213.6,
|
|
"valid_targets_min": 3787
|
|
},
|
|
{
|
|
"epoch": 1.8266666666666667,
|
|
"grad_norm": 0.41341290941153364,
|
|
"learning_rate": 3.694012570913254e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1356113851070404,
|
|
"step": 1165,
|
|
"valid_targets_mean": 5838.6,
|
|
"valid_targets_min": 4390
|
|
},
|
|
{
|
|
"epoch": 1.8345098039215686,
|
|
"grad_norm": 0.4158583955688108,
|
|
"learning_rate": 3.689844338380271e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16743731498718262,
|
|
"step": 1170,
|
|
"valid_targets_mean": 6286.6,
|
|
"valid_targets_min": 3685
|
|
},
|
|
{
|
|
"epoch": 1.8423529411764705,
|
|
"grad_norm": 0.484500471235257,
|
|
"learning_rate": 3.6856502921692004e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14831358194351196,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4034.6,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 1.8501960784313725,
|
|
"grad_norm": 0.455779334606989,
|
|
"learning_rate": 3.681430496347339e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1260300576686859,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4453.2,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 1.8580392156862744,
|
|
"grad_norm": 0.5096323281143307,
|
|
"learning_rate": 3.677185015375329e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11872885376214981,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3511.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 1.8658823529411763,
|
|
"grad_norm": 0.43999152241492123,
|
|
"learning_rate": 3.672913914106173e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1158122569322586,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4290.5,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 1.8737254901960785,
|
|
"grad_norm": 0.48920594620970465,
|
|
"learning_rate": 3.6686172577842425e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13682183623313904,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4568.4,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 1.8815686274509804,
|
|
"grad_norm": 0.44927886511604515,
|
|
"learning_rate": 3.6642951120442834e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1265093982219696,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5306.5,
|
|
"valid_targets_min": 4122
|
|
},
|
|
{
|
|
"epoch": 1.8894117647058823,
|
|
"grad_norm": 0.44852880714293136,
|
|
"learning_rate": 3.6599475429104125e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20821548998355865,
|
|
"step": 1205,
|
|
"valid_targets_mean": 7194.6,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 1.8972549019607843,
|
|
"grad_norm": 0.45975323373931865,
|
|
"learning_rate": 3.655574616795108e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14038732647895813,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5352.9,
|
|
"valid_targets_min": 3593
|
|
},
|
|
{
|
|
"epoch": 1.9050980392156864,
|
|
"grad_norm": 0.4542669953517701,
|
|
"learning_rate": 3.651176400498194e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.162247896194458,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5459.9,
|
|
"valid_targets_min": 4407
|
|
},
|
|
{
|
|
"epoch": 1.9129411764705884,
|
|
"grad_norm": 0.45445970473597597,
|
|
"learning_rate": 3.646752961205825e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17922481894493103,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5047.5,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 1.9207843137254903,
|
|
"grad_norm": 0.4440437608773382,
|
|
"learning_rate": 3.642304366489453e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17314383387565613,
|
|
"step": 1225,
|
|
"valid_targets_mean": 6348.2,
|
|
"valid_targets_min": 4423
|
|
},
|
|
{
|
|
"epoch": 1.9286274509803922,
|
|
"grad_norm": 0.4455272581677328,
|
|
"learning_rate": 3.6378306843047996e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10712941735982895,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4371.1,
|
|
"valid_targets_min": 3719
|
|
},
|
|
{
|
|
"epoch": 1.9364705882352942,
|
|
"grad_norm": 0.43745751180019976,
|
|
"learning_rate": 3.6333319829908196e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1654994934797287,
|
|
"step": 1235,
|
|
"valid_targets_mean": 6174.9,
|
|
"valid_targets_min": 3664
|
|
},
|
|
{
|
|
"epoch": 1.944313725490196,
|
|
"grad_norm": 0.4912403616178532,
|
|
"learning_rate": 3.628808331268649e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13106322288513184,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4144.4,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 1.952156862745098,
|
|
"grad_norm": 0.452445374085872,
|
|
"learning_rate": 3.624259798240565e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15350663661956787,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5449.9,
|
|
"valid_targets_min": 3396
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"grad_norm": 0.4411165119879023,
|
|
"learning_rate": 3.6196864533889245e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12838168442249298,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4447.8,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 1.967843137254902,
|
|
"grad_norm": 0.4543534263411601,
|
|
"learning_rate": 3.615088366575104e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13006886839866638,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4545.5,
|
|
"valid_targets_min": 3821
|
|
},
|
|
{
|
|
"epoch": 1.9756862745098038,
|
|
"grad_norm": 0.4601904040094776,
|
|
"learning_rate": 3.610465608038432e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13418616354465485,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4651.2,
|
|
"valid_targets_min": 3695
|
|
},
|
|
{
|
|
"epoch": 1.9835294117647058,
|
|
"grad_norm": 0.4815101194517065,
|
|
"learning_rate": 3.605818248395118e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12441887706518173,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3983.6,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.9913725490196077,
|
|
"grad_norm": 0.4277499013050439,
|
|
"learning_rate": 3.6011463586371715e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1192770004272461,
|
|
"step": 1270,
|
|
"valid_targets_mean": 5400.0,
|
|
"valid_targets_min": 3763
|
|
},
|
|
{
|
|
"epoch": 1.9992156862745099,
|
|
"grad_norm": 0.4347555154251497,
|
|
"learning_rate": 3.596450010131319e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1098412424325943,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4126.8,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 2.0062745098039216,
|
|
"grad_norm": 0.49406934913702216,
|
|
"learning_rate": 3.5917292746179134e-05,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14379660785198212,
|
|
"step": 1280,
|
|
"valid_targets_mean": 5874.0,
|
|
"valid_targets_min": 4065
|
|
},
|
|
{
|
|
"epoch": 2.0141176470588236,
|
|
"grad_norm": 0.47747163458178515,
|
|
"learning_rate": 3.586984224209837e-05,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11529427021741867,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4390.4,
|
|
"valid_targets_min": 3582
|
|
},
|
|
{
|
|
"epoch": 2.0219607843137255,
|
|
"grad_norm": 0.47614601274638535,
|
|
"learning_rate": 3.582214931391402e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1334877908229828,
|
|
"step": 1290,
|
|
"valid_targets_mean": 4999.0,
|
|
"valid_targets_min": 3822
|
|
},
|
|
{
|
|
"epoch": 2.0298039215686274,
|
|
"grad_norm": 0.5435783800690973,
|
|
"learning_rate": 3.5774214690172405e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15348975360393524,
|
|
"step": 1295,
|
|
"valid_targets_mean": 5010.4,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 2.0376470588235294,
|
|
"grad_norm": 0.44424346250698454,
|
|
"learning_rate": 3.572603910311196e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14961959421634674,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5940.9,
|
|
"valid_targets_min": 4218
|
|
},
|
|
{
|
|
"epoch": 2.0454901960784313,
|
|
"grad_norm": 0.4725572481432307,
|
|
"learning_rate": 3.5677623288652e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12347137928009033,
|
|
"step": 1305,
|
|
"valid_targets_mean": 4268.1,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 2.0533333333333332,
|
|
"grad_norm": 0.45253545085688257,
|
|
"learning_rate": 3.5628967986381485e-05,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1305963397026062,
|
|
"step": 1310,
|
|
"valid_targets_mean": 5040.6,
|
|
"valid_targets_min": 3270
|
|
},
|
|
{
|
|
"epoch": 2.061176470588235,
|
|
"grad_norm": 0.48319012058587707,
|
|
"learning_rate": 3.558007393954778e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12236924469470978,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4753.5,
|
|
"valid_targets_min": 3988
|
|
},
|
|
{
|
|
"epoch": 2.069019607843137,
|
|
"grad_norm": 0.47863506721894705,
|
|
"learning_rate": 3.553094189504522e-05,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12937095761299133,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5278.0,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 2.076862745098039,
|
|
"grad_norm": 0.4469316289866355,
|
|
"learning_rate": 3.548157260340376e-05,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12116946280002594,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4327.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 2.084705882352941,
|
|
"grad_norm": 0.488329122471083,
|
|
"learning_rate": 3.5431966818777476e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14848512411117554,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4556.0,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 2.0925490196078433,
|
|
"grad_norm": 0.47486994431176777,
|
|
"learning_rate": 3.5382125298933055e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13251954317092896,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4969.9,
|
|
"valid_targets_min": 3886
|
|
},
|
|
{
|
|
"epoch": 2.1003921568627453,
|
|
"grad_norm": 0.4253423768345962,
|
|
"learning_rate": 3.533204880523823e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1283872276544571,
|
|
"step": 1340,
|
|
"valid_targets_mean": 6134.9,
|
|
"valid_targets_min": 3973
|
|
},
|
|
{
|
|
"epoch": 2.108235294117647,
|
|
"grad_norm": 0.496767996454418,
|
|
"learning_rate": 3.528173810265015e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13580843806266785,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4923.4,
|
|
"valid_targets_min": 3575
|
|
},
|
|
{
|
|
"epoch": 2.116078431372549,
|
|
"grad_norm": 0.4660080527453285,
|
|
"learning_rate": 3.5231193959703654e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17615219950675964,
|
|
"step": 1350,
|
|
"valid_targets_mean": 5835.4,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 2.123921568627451,
|
|
"grad_norm": 0.46711806043598725,
|
|
"learning_rate": 3.51804171484996e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15228474140167236,
|
|
"step": 1355,
|
|
"valid_targets_mean": 6393.4,
|
|
"valid_targets_min": 4455
|
|
},
|
|
{
|
|
"epoch": 2.131764705882353,
|
|
"grad_norm": 0.41490391447867236,
|
|
"learning_rate": 3.5129408444693014e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15093214809894562,
|
|
"step": 1360,
|
|
"valid_targets_mean": 5871.1,
|
|
"valid_targets_min": 3873
|
|
},
|
|
{
|
|
"epoch": 2.139607843137255,
|
|
"grad_norm": 0.42139998046562105,
|
|
"learning_rate": 3.507816862748126e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11608295142650604,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4608.5,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 2.147450980392157,
|
|
"grad_norm": 0.49943677396847425,
|
|
"learning_rate": 3.502669847959213e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1275392472743988,
|
|
"step": 1370,
|
|
"valid_targets_mean": 5043.8,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 2.155294117647059,
|
|
"grad_norm": 0.4563761892623548,
|
|
"learning_rate": 3.497499878727193e-05,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14616072177886963,
|
|
"step": 1375,
|
|
"valid_targets_mean": 7183.4,
|
|
"valid_targets_min": 3707
|
|
},
|
|
{
|
|
"epoch": 2.1631372549019607,
|
|
"grad_norm": 0.4482274006261775,
|
|
"learning_rate": 3.49230703402734e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14997485280036926,
|
|
"step": 1380,
|
|
"valid_targets_mean": 5360.2,
|
|
"valid_targets_min": 3605
|
|
},
|
|
{
|
|
"epoch": 2.1709803921568627,
|
|
"grad_norm": 0.4481538100768857,
|
|
"learning_rate": 3.487091393184369e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1007670909166336,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3932.6,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.1788235294117646,
|
|
"grad_norm": 0.4703243318763376,
|
|
"learning_rate": 3.481853035871224e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15726238489151,
|
|
"step": 1390,
|
|
"valid_targets_mean": 5147.0,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 2.1866666666666665,
|
|
"grad_norm": 0.44451201605423757,
|
|
"learning_rate": 3.476592042107862e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13600096106529236,
|
|
"step": 1395,
|
|
"valid_targets_mean": 5580.8,
|
|
"valid_targets_min": 3203
|
|
},
|
|
{
|
|
"epoch": 2.1945098039215685,
|
|
"grad_norm": 0.4912807228525545,
|
|
"learning_rate": 3.4713084922600274e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09468073397874832,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3824.4,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 2.2023529411764704,
|
|
"grad_norm": 0.45830741770005073,
|
|
"learning_rate": 3.466002467038028e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.143297016620636,
|
|
"step": 1405,
|
|
"valid_targets_mean": 5275.2,
|
|
"valid_targets_min": 3251
|
|
},
|
|
{
|
|
"epoch": 2.2101960784313723,
|
|
"grad_norm": 0.4668274014535714,
|
|
"learning_rate": 3.460674047495497e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09991783648729324,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3815.8,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 2.2180392156862747,
|
|
"grad_norm": 0.4489570344754549,
|
|
"learning_rate": 3.455323315028164e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12543469667434692,
|
|
"step": 1415,
|
|
"valid_targets_mean": 5017.8,
|
|
"valid_targets_min": 3620
|
|
},
|
|
{
|
|
"epoch": 2.2258823529411766,
|
|
"grad_norm": 0.5333401777644601,
|
|
"learning_rate": 3.449950351372599e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12710413336753845,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3627.4,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 2.2337254901960786,
|
|
"grad_norm": 0.44109891090686365,
|
|
"learning_rate": 3.444555238604974e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11362719535827637,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4074.1,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 2.2415686274509805,
|
|
"grad_norm": 0.4595201489275352,
|
|
"learning_rate": 3.439138059139808e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1319398432970047,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5178.9,
|
|
"valid_targets_min": 3668
|
|
},
|
|
{
|
|
"epoch": 2.2494117647058824,
|
|
"grad_norm": 0.4208099104043227,
|
|
"learning_rate": 3.433698895728701e-05,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11416264623403549,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5147.2,
|
|
"valid_targets_min": 3650
|
|
},
|
|
{
|
|
"epoch": 2.2572549019607844,
|
|
"grad_norm": 0.43793374535667273,
|
|
"learning_rate": 3.428237831459078e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1420862078666687,
|
|
"step": 1440,
|
|
"valid_targets_mean": 5160.4,
|
|
"valid_targets_min": 3250
|
|
},
|
|
{
|
|
"epoch": 2.2650980392156863,
|
|
"grad_norm": 0.45067716884729503,
|
|
"learning_rate": 3.422754949752917e-05,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13499972224235535,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5317.6,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 2.2729411764705882,
|
|
"grad_norm": 0.4580659976380947,
|
|
"learning_rate": 3.41725033436547e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12633192539215088,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4831.5,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 2.28078431372549,
|
|
"grad_norm": 0.516177217353346,
|
|
"learning_rate": 3.411724069383993e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310024857521057,
|
|
"step": 1455,
|
|
"valid_targets_mean": 5435.0,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 2.288627450980392,
|
|
"grad_norm": 0.48697865571605803,
|
|
"learning_rate": 3.4061762392264545e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293107569217682,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4807.9,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 2.296470588235294,
|
|
"grad_norm": 0.4354195667810358,
|
|
"learning_rate": 3.400606928640245e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14635756611824036,
|
|
"step": 1465,
|
|
"valid_targets_mean": 6255.8,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 2.304313725490196,
|
|
"grad_norm": 0.48611114803778643,
|
|
"learning_rate": 3.3950162227008884e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11857324838638306,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4925.6,
|
|
"valid_targets_min": 3386
|
|
},
|
|
{
|
|
"epoch": 2.312156862745098,
|
|
"grad_norm": 0.42191866868399774,
|
|
"learning_rate": 3.389404206810739e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12723243236541748,
|
|
"step": 1475,
|
|
"valid_targets_mean": 5264.6,
|
|
"valid_targets_min": 3238
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"grad_norm": 0.4396679490258797,
|
|
"learning_rate": 3.383770966697675e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11699369549751282,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5793.8,
|
|
"valid_targets_min": 3558
|
|
},
|
|
{
|
|
"epoch": 2.3278431372549018,
|
|
"grad_norm": 0.4902899568659877,
|
|
"learning_rate": 3.378116588413792e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24559059739112854,
|
|
"step": 1485,
|
|
"valid_targets_mean": 7003.0,
|
|
"valid_targets_min": 3702
|
|
},
|
|
{
|
|
"epoch": 2.335686274509804,
|
|
"grad_norm": 0.47633799485479983,
|
|
"learning_rate": 3.372441158334089e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12257198244333267,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4004.5,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 2.343529411764706,
|
|
"grad_norm": 0.44672499904897717,
|
|
"learning_rate": 3.3667447631551456e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1181827187538147,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4820.8,
|
|
"valid_targets_min": 3175
|
|
},
|
|
{
|
|
"epoch": 2.351372549019608,
|
|
"grad_norm": 0.4455879996610867,
|
|
"learning_rate": 3.361027489893799e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1280711591243744,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4960.4,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 2.35921568627451,
|
|
"grad_norm": 0.43424311853827474,
|
|
"learning_rate": 3.3552894258858173e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14889156818389893,
|
|
"step": 1505,
|
|
"valid_targets_mean": 5842.0,
|
|
"valid_targets_min": 3544
|
|
},
|
|
{
|
|
"epoch": 2.367058823529412,
|
|
"grad_norm": 0.4922387509028004,
|
|
"learning_rate": 3.3495306587845616e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13032880425453186,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3905.0,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.374901960784314,
|
|
"grad_norm": 0.5966977015829197,
|
|
"learning_rate": 3.343751276559651e-05,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11142288148403168,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4647.2,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 2.3827450980392157,
|
|
"grad_norm": 0.4454058382601194,
|
|
"learning_rate": 3.3379513674956134e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259860396385193,
|
|
"step": 1520,
|
|
"valid_targets_mean": 5245.9,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 2.3905882352941177,
|
|
"grad_norm": 0.45951903603306143,
|
|
"learning_rate": 3.332131020190542e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11192375421524048,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4238.2,
|
|
"valid_targets_min": 3391
|
|
},
|
|
{
|
|
"epoch": 2.3984313725490196,
|
|
"grad_norm": 0.4609884325539377,
|
|
"learning_rate": 3.326290323554739e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12740443646907806,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3959.6,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 2.4062745098039215,
|
|
"grad_norm": 0.44078240901878263,
|
|
"learning_rate": 3.320429366809361e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11498337239027023,
|
|
"step": 1535,
|
|
"valid_targets_mean": 4679.1,
|
|
"valid_targets_min": 3561
|
|
},
|
|
{
|
|
"epoch": 2.4141176470588235,
|
|
"grad_norm": 0.4310670652741434,
|
|
"learning_rate": 3.314548239485048e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1284569501876831,
|
|
"step": 1540,
|
|
"valid_targets_mean": 5744.0,
|
|
"valid_targets_min": 4080
|
|
},
|
|
{
|
|
"epoch": 2.4219607843137254,
|
|
"grad_norm": 0.4714813535920024,
|
|
"learning_rate": 3.308647031420567e-05,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10750456899404526,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3619.2,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 2.4298039215686273,
|
|
"grad_norm": 0.44293101635244536,
|
|
"learning_rate": 3.3027258327614305e-05,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13039925694465637,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4860.4,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 2.4376470588235293,
|
|
"grad_norm": 0.43484803493865387,
|
|
"learning_rate": 3.296784733958524e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11183600127696991,
|
|
"step": 1555,
|
|
"valid_targets_mean": 5222.6,
|
|
"valid_targets_min": 3930
|
|
},
|
|
{
|
|
"epoch": 2.445490196078431,
|
|
"grad_norm": 0.46765151602123217,
|
|
"learning_rate": 3.2908238257667214e-05,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11906329542398453,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4362.2,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 2.453333333333333,
|
|
"grad_norm": 0.43182305636606233,
|
|
"learning_rate": 3.2848431992435037e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15811893343925476,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5607.9,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 2.461176470588235,
|
|
"grad_norm": 0.4387652065332147,
|
|
"learning_rate": 3.278842945747561e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13541805744171143,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4755.2,
|
|
"valid_targets_min": 4177
|
|
},
|
|
{
|
|
"epoch": 2.469019607843137,
|
|
"grad_norm": 0.4185733835268311,
|
|
"learning_rate": 3.272823156937403e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12970206141471863,
|
|
"step": 1575,
|
|
"valid_targets_mean": 5787.6,
|
|
"valid_targets_min": 3621
|
|
},
|
|
{
|
|
"epoch": 2.4768627450980394,
|
|
"grad_norm": 0.4711281508785475,
|
|
"learning_rate": 3.266783924769954e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15308530628681183,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4942.0,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 2.4847058823529413,
|
|
"grad_norm": 0.45447871841348364,
|
|
"learning_rate": 3.2607253414991534e-05,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11619564890861511,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4674.5,
|
|
"valid_targets_min": 1464
|
|
},
|
|
{
|
|
"epoch": 2.4925490196078433,
|
|
"grad_norm": 0.4465160796630261,
|
|
"learning_rate": 3.2546474996745424e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13548265397548676,
|
|
"step": 1590,
|
|
"valid_targets_mean": 5046.5,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 2.500392156862745,
|
|
"grad_norm": 0.4144953208811738,
|
|
"learning_rate": 3.248550492139851e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13685083389282227,
|
|
"step": 1595,
|
|
"valid_targets_mean": 5657.9,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 2.508235294117647,
|
|
"grad_norm": 0.43182304422630846,
|
|
"learning_rate": 3.242434412031581e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12013621628284454,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4616.9,
|
|
"valid_targets_min": 3414
|
|
},
|
|
{
|
|
"epoch": 2.516078431372549,
|
|
"grad_norm": 0.4660775498895559,
|
|
"learning_rate": 3.236299352777583e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11420007050037384,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4527.4,
|
|
"valid_targets_min": 3291
|
|
},
|
|
{
|
|
"epoch": 2.523921568627451,
|
|
"grad_norm": 0.45754948586110533,
|
|
"learning_rate": 3.230145408095626e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16273413598537445,
|
|
"step": 1610,
|
|
"valid_targets_mean": 6180.2,
|
|
"valid_targets_min": 4221
|
|
},
|
|
{
|
|
"epoch": 2.531764705882353,
|
|
"grad_norm": 0.45687545602469465,
|
|
"learning_rate": 3.223972671991972e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11283574998378754,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4565.6,
|
|
"valid_targets_min": 2368
|
|
},
|
|
{
|
|
"epoch": 2.539607843137255,
|
|
"grad_norm": 0.479865914246267,
|
|
"learning_rate": 3.217781238759935e-05,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13249436020851135,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4217.1,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 2.547450980392157,
|
|
"grad_norm": 0.4776400280048058,
|
|
"learning_rate": 3.211571202978442e-05,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1418047398328781,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3918.2,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 2.5552941176470587,
|
|
"grad_norm": 0.43020556971208407,
|
|
"learning_rate": 3.2053426595105865e-05,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1324760615825653,
|
|
"step": 1630,
|
|
"valid_targets_mean": 5967.1,
|
|
"valid_targets_min": 3776
|
|
},
|
|
{
|
|
"epoch": 2.5631372549019606,
|
|
"grad_norm": 0.4185589917837543,
|
|
"learning_rate": 3.199095703502185e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12262311577796936,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5342.9,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 2.5709803921568626,
|
|
"grad_norm": 0.47589527083065164,
|
|
"learning_rate": 3.1928304303803174e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15121924877166748,
|
|
"step": 1640,
|
|
"valid_targets_mean": 5145.8,
|
|
"valid_targets_min": 3736
|
|
},
|
|
{
|
|
"epoch": 2.578823529411765,
|
|
"grad_norm": 0.42863147588811173,
|
|
"learning_rate": 3.1865469358518726e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12953457236289978,
|
|
"step": 1645,
|
|
"valid_targets_mean": 5564.0,
|
|
"valid_targets_min": 3317
|
|
},
|
|
{
|
|
"epoch": 2.586666666666667,
|
|
"grad_norm": 0.46873159466077136,
|
|
"learning_rate": 3.180245315902084e-05,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.129906564950943,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4264.9,
|
|
"valid_targets_min": 3863
|
|
},
|
|
{
|
|
"epoch": 2.594509803921569,
|
|
"grad_norm": 0.49845920847390646,
|
|
"learning_rate": 3.173925666793065e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11657537519931793,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4386.8,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 2.6023529411764708,
|
|
"grad_norm": 0.43434473275438257,
|
|
"learning_rate": 3.1675880850623416e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11459280550479889,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4374.2,
|
|
"valid_targets_min": 3576
|
|
},
|
|
{
|
|
"epoch": 2.6101960784313727,
|
|
"grad_norm": 0.44211704342614505,
|
|
"learning_rate": 3.1612326675213717e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13624370098114014,
|
|
"step": 1665,
|
|
"valid_targets_mean": 5264.5,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 2.6180392156862746,
|
|
"grad_norm": 0.40733426030617187,
|
|
"learning_rate": 3.154859511254067e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14822620153427124,
|
|
"step": 1670,
|
|
"valid_targets_mean": 5660.6,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 2.6258823529411766,
|
|
"grad_norm": 0.4650866384206454,
|
|
"learning_rate": 3.148468713615318e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17924489080905914,
|
|
"step": 1675,
|
|
"valid_targets_mean": 5058.1,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 2.6337254901960785,
|
|
"grad_norm": 0.4365772877935849,
|
|
"learning_rate": 3.1420603722294935e-05,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09969859570264816,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4293.0,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 2.6415686274509804,
|
|
"grad_norm": 0.45178883265618114,
|
|
"learning_rate": 3.135634584988962e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15429271757602692,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5230.4,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 2.6494117647058824,
|
|
"grad_norm": 0.42393672537729515,
|
|
"learning_rate": 3.1291914500525886e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12069963663816452,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4799.1,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 2.6572549019607843,
|
|
"grad_norm": 0.4472349743410201,
|
|
"learning_rate": 3.1227310658442395e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13962559401988983,
|
|
"step": 1695,
|
|
"valid_targets_mean": 5052.2,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 2.665098039215686,
|
|
"grad_norm": 0.4435255408265362,
|
|
"learning_rate": 3.1162535310512745e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18697775900363922,
|
|
"step": 1700,
|
|
"valid_targets_mean": 6966.9,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 2.672941176470588,
|
|
"grad_norm": 0.400378639093422,
|
|
"learning_rate": 3.109758944623042e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14106003940105438,
|
|
"step": 1705,
|
|
"valid_targets_mean": 5895.4,
|
|
"valid_targets_min": 4538
|
|
},
|
|
{
|
|
"epoch": 2.68078431372549,
|
|
"grad_norm": 0.39366406496277145,
|
|
"learning_rate": 3.103247405769372e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1421252191066742,
|
|
"step": 1710,
|
|
"valid_targets_mean": 6712.8,
|
|
"valid_targets_min": 3756
|
|
},
|
|
{
|
|
"epoch": 2.688627450980392,
|
|
"grad_norm": 0.6232248293351081,
|
|
"learning_rate": 3.0967190139590484e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11953885853290558,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4721.9,
|
|
"valid_targets_min": 3757
|
|
},
|
|
{
|
|
"epoch": 2.696470588235294,
|
|
"grad_norm": 0.44110015445192224,
|
|
"learning_rate": 3.090173868918303e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1372273862361908,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5184.2,
|
|
"valid_targets_min": 3311
|
|
},
|
|
{
|
|
"epoch": 2.704313725490196,
|
|
"grad_norm": 0.47470877708753634,
|
|
"learning_rate": 3.083612070629283e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13897572457790375,
|
|
"step": 1725,
|
|
"valid_targets_mean": 5535.2,
|
|
"valid_targets_min": 3820
|
|
},
|
|
{
|
|
"epoch": 2.712156862745098,
|
|
"grad_norm": 0.4238654201759072,
|
|
"learning_rate": 3.077033719328529e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13699355721473694,
|
|
"step": 1730,
|
|
"valid_targets_mean": 6275.8,
|
|
"valid_targets_min": 4239
|
|
},
|
|
{
|
|
"epoch": 2.7199999999999998,
|
|
"grad_norm": 0.42493381878695413,
|
|
"learning_rate": 3.070438915505439e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10450537502765656,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4946.8,
|
|
"valid_targets_min": 3587
|
|
},
|
|
{
|
|
"epoch": 2.7278431372549017,
|
|
"grad_norm": 0.4106939373497361,
|
|
"learning_rate": 3.063827759900739e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17494139075279236,
|
|
"step": 1740,
|
|
"valid_targets_mean": 6192.5,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 2.735686274509804,
|
|
"grad_norm": 0.4428920480136722,
|
|
"learning_rate": 3.057200353504938e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16369616985321045,
|
|
"step": 1745,
|
|
"valid_targets_mean": 5451.5,
|
|
"valid_targets_min": 3356
|
|
},
|
|
{
|
|
"epoch": 2.743529411764706,
|
|
"grad_norm": 0.4626566972156148,
|
|
"learning_rate": 3.0505567975567915e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15678079426288605,
|
|
"step": 1750,
|
|
"valid_targets_mean": 5895.6,
|
|
"valid_targets_min": 3764
|
|
},
|
|
{
|
|
"epoch": 2.751372549019608,
|
|
"grad_norm": 0.4322955570050535,
|
|
"learning_rate": 3.04389719354175e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1328631043434143,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3907.8,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 2.75921568627451,
|
|
"grad_norm": 0.44543125908794073,
|
|
"learning_rate": 3.0372216431904103e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10451729595661163,
|
|
"step": 1760,
|
|
"valid_targets_mean": 4064.8,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 2.767058823529412,
|
|
"grad_norm": 0.4479690161498993,
|
|
"learning_rate": 3.030530248476963e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14125706255435944,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4940.9,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 2.7749019607843137,
|
|
"grad_norm": 0.42835308933585325,
|
|
"learning_rate": 3.0238231116176338e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14682066440582275,
|
|
"step": 1770,
|
|
"valid_targets_mean": 5570.1,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 2.7827450980392157,
|
|
"grad_norm": 0.4648163924246701,
|
|
"learning_rate": 3.0171003350691194e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11747677624225616,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4642.2,
|
|
"valid_targets_min": 3477
|
|
},
|
|
{
|
|
"epoch": 2.7905882352941176,
|
|
"grad_norm": 0.5334908826522222,
|
|
"learning_rate": 3.0103620215270285e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1429332196712494,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4720.1,
|
|
"valid_targets_min": 4152
|
|
},
|
|
{
|
|
"epoch": 2.7984313725490195,
|
|
"grad_norm": 0.4449881064053317,
|
|
"learning_rate": 3.0036082739243064e-05,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11575782299041748,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4842.4,
|
|
"valid_targets_min": 3042
|
|
},
|
|
{
|
|
"epoch": 2.8062745098039215,
|
|
"grad_norm": 0.4290932726739035,
|
|
"learning_rate": 2.996839195429667e-05,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13041456043720245,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4798.0,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 2.8141176470588234,
|
|
"grad_norm": 0.4646067543054696,
|
|
"learning_rate": 2.9900548894460146e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1131235733628273,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4532.4,
|
|
"valid_targets_min": 3948
|
|
},
|
|
{
|
|
"epoch": 2.8219607843137258,
|
|
"grad_norm": 0.4232279944636071,
|
|
"learning_rate": 2.9832554596088653e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11675336211919785,
|
|
"step": 1800,
|
|
"valid_targets_mean": 4799.2,
|
|
"valid_targets_min": 3593
|
|
},
|
|
{
|
|
"epoch": 2.8298039215686277,
|
|
"grad_norm": 0.41084241487504936,
|
|
"learning_rate": 2.9764410097847657e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1458739936351776,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5765.4,
|
|
"valid_targets_min": 4453
|
|
},
|
|
{
|
|
"epoch": 2.8376470588235296,
|
|
"grad_norm": 0.4241203124969836,
|
|
"learning_rate": 2.9696116440697008e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12164306640625,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4533.0,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 2.8454901960784316,
|
|
"grad_norm": 0.42588082618706485,
|
|
"learning_rate": 2.9627674667875104e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10476063191890717,
|
|
"step": 1815,
|
|
"valid_targets_mean": 4577.5,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 2.8533333333333335,
|
|
"grad_norm": 0.41335493887260444,
|
|
"learning_rate": 2.9559085824882916e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12935465574264526,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5700.8,
|
|
"valid_targets_min": 3426
|
|
},
|
|
{
|
|
"epoch": 2.8611764705882354,
|
|
"grad_norm": 0.5337788816004103,
|
|
"learning_rate": 2.9490350959468014e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10734622180461884,
|
|
"step": 1825,
|
|
"valid_targets_mean": 5307.0,
|
|
"valid_targets_min": 3690
|
|
},
|
|
{
|
|
"epoch": 2.8690196078431374,
|
|
"grad_norm": 0.44830080430862884,
|
|
"learning_rate": 2.9421471121608588e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11297869682312012,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4190.8,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 2.8768627450980393,
|
|
"grad_norm": 0.4007711643122302,
|
|
"learning_rate": 2.9352447363497378e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16458137333393097,
|
|
"step": 1835,
|
|
"valid_targets_mean": 6978.6,
|
|
"valid_targets_min": 3874
|
|
},
|
|
{
|
|
"epoch": 2.8847058823529412,
|
|
"grad_norm": 0.39773763766796294,
|
|
"learning_rate": 2.928328073952564e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1286367028951645,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5583.5,
|
|
"valid_targets_min": 3800
|
|
},
|
|
{
|
|
"epoch": 2.892549019607843,
|
|
"grad_norm": 0.454040617571797,
|
|
"learning_rate": 2.921397230626699e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1450670063495636,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5594.1,
|
|
"valid_targets_min": 3706
|
|
},
|
|
{
|
|
"epoch": 2.900392156862745,
|
|
"grad_norm": 0.41116945699577084,
|
|
"learning_rate": 2.914452312246131e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14225062727928162,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5994.2,
|
|
"valid_targets_min": 4215
|
|
},
|
|
{
|
|
"epoch": 2.908235294117647,
|
|
"grad_norm": 0.40390666482787463,
|
|
"learning_rate": 2.9074934248998557e-05,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13591104745864868,
|
|
"step": 1855,
|
|
"valid_targets_mean": 5521.1,
|
|
"valid_targets_min": 2581
|
|
},
|
|
{
|
|
"epoch": 2.916078431372549,
|
|
"grad_norm": 0.4442386605722065,
|
|
"learning_rate": 2.9005206748902538e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.158932626247406,
|
|
"step": 1860,
|
|
"valid_targets_mean": 6273.5,
|
|
"valid_targets_min": 3548
|
|
},
|
|
{
|
|
"epoch": 2.923921568627451,
|
|
"grad_norm": 0.41974772349567996,
|
|
"learning_rate": 2.8935341687314703e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16174735128879547,
|
|
"step": 1865,
|
|
"valid_targets_mean": 6337.6,
|
|
"valid_targets_min": 4558
|
|
},
|
|
{
|
|
"epoch": 2.931764705882353,
|
|
"grad_norm": 0.44476931989709534,
|
|
"learning_rate": 2.8865340131477846e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10695183277130127,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3961.9,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 2.9396078431372548,
|
|
"grad_norm": 0.416211474169275,
|
|
"learning_rate": 2.8795203150719836e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12205926328897476,
|
|
"step": 1875,
|
|
"valid_targets_mean": 4913.1,
|
|
"valid_targets_min": 3659
|
|
},
|
|
{
|
|
"epoch": 2.9474509803921567,
|
|
"grad_norm": 0.4015858546768977,
|
|
"learning_rate": 2.8724931816437255e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10511714965105057,
|
|
"step": 1880,
|
|
"valid_targets_mean": 5183.1,
|
|
"valid_targets_min": 3549
|
|
},
|
|
{
|
|
"epoch": 2.9552941176470586,
|
|
"grad_norm": 0.4310599320558801,
|
|
"learning_rate": 2.8654527202079027e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14543288946151733,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4850.9,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 2.9631372549019606,
|
|
"grad_norm": 0.46151553503203574,
|
|
"learning_rate": 2.8583990383130043e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12393741309642792,
|
|
"step": 1890,
|
|
"valid_targets_mean": 4401.6,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 2.9709803921568625,
|
|
"grad_norm": 0.4186468327330401,
|
|
"learning_rate": 2.8513322437094727e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12815925478935242,
|
|
"step": 1895,
|
|
"valid_targets_mean": 5884.8,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 2.978823529411765,
|
|
"grad_norm": 0.44909415275917536,
|
|
"learning_rate": 2.844252444348055e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12026601284742355,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4397.2,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 2.986666666666667,
|
|
"grad_norm": 0.4178535789052994,
|
|
"learning_rate": 2.8371597483781577e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09047207981348038,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4132.5,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 2.9945098039215687,
|
|
"grad_norm": 0.41450796554632624,
|
|
"learning_rate": 2.8300542641461937e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13465872406959534,
|
|
"step": 1910,
|
|
"valid_targets_mean": 6010.4,
|
|
"valid_targets_min": 4250
|
|
},
|
|
{
|
|
"epoch": 3.0015686274509803,
|
|
"grad_norm": 0.4470382089291171,
|
|
"learning_rate": 2.822936100193924e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11736665666103363,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4557.0,
|
|
"valid_targets_min": 3016
|
|
},
|
|
{
|
|
"epoch": 3.0094117647058822,
|
|
"grad_norm": 0.39975791197038996,
|
|
"learning_rate": 2.8158053652568046e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13188017904758453,
|
|
"step": 1920,
|
|
"valid_targets_mean": 6199.8,
|
|
"valid_targets_min": 4235
|
|
},
|
|
{
|
|
"epoch": 3.017254901960784,
|
|
"grad_norm": 0.4621722858358028,
|
|
"learning_rate": 2.808662168262321e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11106382310390472,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4623.4,
|
|
"valid_targets_min": 3713
|
|
},
|
|
{
|
|
"epoch": 3.025098039215686,
|
|
"grad_norm": 0.46398242680006196,
|
|
"learning_rate": 2.8015066183283272e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11143679916858673,
|
|
"step": 1930,
|
|
"valid_targets_mean": 4373.8,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 3.032941176470588,
|
|
"grad_norm": 0.46429899136506897,
|
|
"learning_rate": 2.7943388247613787e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1419023871421814,
|
|
"step": 1935,
|
|
"valid_targets_mean": 5609.1,
|
|
"valid_targets_min": 4401
|
|
},
|
|
{
|
|
"epoch": 3.0407843137254904,
|
|
"grad_norm": 0.4177935456004596,
|
|
"learning_rate": 2.787158897055061e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13458001613616943,
|
|
"step": 1940,
|
|
"valid_targets_mean": 5785.2,
|
|
"valid_targets_min": 4146
|
|
},
|
|
{
|
|
"epoch": 3.0486274509803923,
|
|
"grad_norm": 0.4798329507670239,
|
|
"learning_rate": 2.7799669448883165e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12980468571186066,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3767.0,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 3.0564705882352943,
|
|
"grad_norm": 0.45370620057025185,
|
|
"learning_rate": 2.7727630781237743e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11602476239204407,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4428.5,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 3.064313725490196,
|
|
"grad_norm": 0.42386002564337155,
|
|
"learning_rate": 2.7655474068060644e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11869405210018158,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4955.6,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 3.072156862745098,
|
|
"grad_norm": 0.4624724916837085,
|
|
"learning_rate": 2.7583200411601424e-05,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11248797178268433,
|
|
"step": 1960,
|
|
"valid_targets_mean": 5139.6,
|
|
"valid_targets_min": 2463
|
|
},
|
|
{
|
|
"epoch": 3.08,
|
|
"grad_norm": 0.49276300943649953,
|
|
"learning_rate": 2.7510810915896043e-05,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1347253918647766,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4627.1,
|
|
"valid_targets_min": 3925
|
|
},
|
|
{
|
|
"epoch": 3.087843137254902,
|
|
"grad_norm": 0.4537105665837089,
|
|
"learning_rate": 2.7438306686749978e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1028389185667038,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4340.6,
|
|
"valid_targets_min": 3457
|
|
},
|
|
{
|
|
"epoch": 3.095686274509804,
|
|
"grad_norm": 0.4786772286893058,
|
|
"learning_rate": 2.7365688831721358e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1394977569580078,
|
|
"step": 1975,
|
|
"valid_targets_mean": 5662.8,
|
|
"valid_targets_min": 4784
|
|
},
|
|
{
|
|
"epoch": 3.103529411764706,
|
|
"grad_norm": 0.4221521099004864,
|
|
"learning_rate": 2.7292958460104027e-05,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12005674839019775,
|
|
"step": 1980,
|
|
"valid_targets_mean": 5236.9,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 3.111372549019608,
|
|
"grad_norm": 0.4713294355913343,
|
|
"learning_rate": 2.7220116682910628e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13472476601600647,
|
|
"step": 1985,
|
|
"valid_targets_mean": 5587.2,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 3.1192156862745097,
|
|
"grad_norm": 0.48485785894809524,
|
|
"learning_rate": 2.714716461285559e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11079380661249161,
|
|
"step": 1990,
|
|
"valid_targets_mean": 5059.9,
|
|
"valid_targets_min": 4067
|
|
},
|
|
{
|
|
"epoch": 3.1270588235294117,
|
|
"grad_norm": 0.3865155820887114,
|
|
"learning_rate": 2.7074103364338155e-05,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14102301001548767,
|
|
"step": 1995,
|
|
"valid_targets_mean": 7066.0,
|
|
"valid_targets_min": 4159
|
|
},
|
|
{
|
|
"epoch": 3.1349019607843136,
|
|
"grad_norm": 0.4167007093343525,
|
|
"learning_rate": 2.7000934053425347e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12152238935232162,
|
|
"step": 2000,
|
|
"valid_targets_mean": 5553.2,
|
|
"valid_targets_min": 3258
|
|
},
|
|
{
|
|
"epoch": 3.1427450980392155,
|
|
"grad_norm": 0.46178770323833723,
|
|
"learning_rate": 2.692765779783494e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11298070102930069,
|
|
"step": 2005,
|
|
"valid_targets_mean": 4834.6,
|
|
"valid_targets_min": 3572
|
|
},
|
|
{
|
|
"epoch": 3.1505882352941175,
|
|
"grad_norm": 0.4254803800819983,
|
|
"learning_rate": 2.6854275716918352e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14271336793899536,
|
|
"step": 2010,
|
|
"valid_targets_mean": 5455.9,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 3.1584313725490194,
|
|
"grad_norm": 0.40766272201286496,
|
|
"learning_rate": 2.678078893164359e-05,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11576013267040253,
|
|
"step": 2015,
|
|
"valid_targets_mean": 5689.1,
|
|
"valid_targets_min": 3770
|
|
},
|
|
{
|
|
"epoch": 3.1662745098039213,
|
|
"grad_norm": 0.4479413965395343,
|
|
"learning_rate": 2.6707198564578066e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11379249393939972,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4206.0,
|
|
"valid_targets_min": 2551
|
|
},
|
|
{
|
|
"epoch": 3.1741176470588237,
|
|
"grad_norm": 0.4808618564097989,
|
|
"learning_rate": 2.663350573987152e-05,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11528089642524719,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4083.5,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 3.1819607843137256,
|
|
"grad_norm": 0.4791059262260379,
|
|
"learning_rate": 2.655971158323879e-05,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11764775961637497,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4272.1,
|
|
"valid_targets_min": 3329
|
|
},
|
|
{
|
|
"epoch": 3.1898039215686276,
|
|
"grad_norm": 0.4079561136254852,
|
|
"learning_rate": 2.648581722194264e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12682706117630005,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5694.6,
|
|
"valid_targets_min": 4027
|
|
},
|
|
{
|
|
"epoch": 3.1976470588235295,
|
|
"grad_norm": 0.46614542573474316,
|
|
"learning_rate": 2.6411823784776537e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10366252809762955,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4016.1,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 3.2054901960784314,
|
|
"grad_norm": 0.44433799334657453,
|
|
"learning_rate": 2.6337732402047422e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08220021426677704,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4368.2,
|
|
"valid_targets_min": 3319
|
|
},
|
|
{
|
|
"epoch": 3.2133333333333334,
|
|
"grad_norm": 0.40293548415907443,
|
|
"learning_rate": 2.626354420555841e-05,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10658583045005798,
|
|
"step": 2050,
|
|
"valid_targets_mean": 5266.0,
|
|
"valid_targets_min": 4091
|
|
},
|
|
{
|
|
"epoch": 3.2211764705882353,
|
|
"grad_norm": 0.4666325695872482,
|
|
"learning_rate": 2.618926032859154e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10789386183023453,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4211.2,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 3.2290196078431372,
|
|
"grad_norm": 0.45655733977587615,
|
|
"learning_rate": 2.611488190589043e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12257785350084305,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4696.2,
|
|
"valid_targets_min": 4053
|
|
},
|
|
{
|
|
"epoch": 3.236862745098039,
|
|
"grad_norm": 0.4329709174203213,
|
|
"learning_rate": 2.6040410073642965e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10476210713386536,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4275.8,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 3.244705882352941,
|
|
"grad_norm": 0.440155231341118,
|
|
"learning_rate": 2.596584596946392e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11637284606695175,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4477.5,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 3.252549019607843,
|
|
"grad_norm": 0.4449824012501871,
|
|
"learning_rate": 2.589119073237762e-05,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.149437814950943,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5425.0,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 3.260392156862745,
|
|
"grad_norm": 0.4685964216290081,
|
|
"learning_rate": 2.5816445502800494e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13167878985404968,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4829.0,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 3.268235294117647,
|
|
"grad_norm": 0.4439347617185429,
|
|
"learning_rate": 2.5741611422523684e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10784819722175598,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4812.5,
|
|
"valid_targets_min": 3609
|
|
},
|
|
{
|
|
"epoch": 3.276078431372549,
|
|
"grad_norm": 0.5838494963864344,
|
|
"learning_rate": 2.566668963469559e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13364551961421967,
|
|
"step": 2090,
|
|
"valid_targets_mean": 5934.4,
|
|
"valid_targets_min": 3554
|
|
},
|
|
{
|
|
"epoch": 3.283921568627451,
|
|
"grad_norm": 0.452780636676896,
|
|
"learning_rate": 2.5591681283804426e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14745384454727173,
|
|
"step": 2095,
|
|
"valid_targets_mean": 6183.4,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 3.291764705882353,
|
|
"grad_norm": 0.4082550380034591,
|
|
"learning_rate": 2.5516587515660706e-05,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12805598974227905,
|
|
"step": 2100,
|
|
"valid_targets_mean": 5796.4,
|
|
"valid_targets_min": 3701
|
|
},
|
|
{
|
|
"epoch": 3.299607843137255,
|
|
"grad_norm": 0.44512108489373853,
|
|
"learning_rate": 2.5441409477379764e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0928315594792366,
|
|
"step": 2105,
|
|
"valid_targets_mean": 4362.0,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 3.307450980392157,
|
|
"grad_norm": 0.4491075718064251,
|
|
"learning_rate": 2.5366148317364237e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10094700753688812,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3963.4,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 3.315294117647059,
|
|
"grad_norm": 0.4084634028281077,
|
|
"learning_rate": 2.5290805185286494e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10314441472291946,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4324.1,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 3.323137254901961,
|
|
"grad_norm": 0.40305743758028045,
|
|
"learning_rate": 2.521538123207111e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1330246478319168,
|
|
"step": 2120,
|
|
"valid_targets_mean": 5806.5,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 3.330980392156863,
|
|
"grad_norm": 0.490413218157373,
|
|
"learning_rate": 2.5139877609877244e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13471296429634094,
|
|
"step": 2125,
|
|
"valid_targets_mean": 5983.1,
|
|
"valid_targets_min": 3689
|
|
},
|
|
{
|
|
"epoch": 3.3388235294117647,
|
|
"grad_norm": 0.4221485138866272,
|
|
"learning_rate": 2.506429547208107e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10946246236562729,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4768.0,
|
|
"valid_targets_min": 4005
|
|
},
|
|
{
|
|
"epoch": 3.3466666666666667,
|
|
"grad_norm": 0.43619763140515655,
|
|
"learning_rate": 2.498863597325815e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11581180989742279,
|
|
"step": 2135,
|
|
"valid_targets_mean": 5030.1,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 3.3545098039215686,
|
|
"grad_norm": 0.4340185313236203,
|
|
"learning_rate": 2.4912900269165797e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13056831061840057,
|
|
"step": 2140,
|
|
"valid_targets_mean": 6122.5,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 3.3623529411764705,
|
|
"grad_norm": 0.4339416916920078,
|
|
"learning_rate": 2.483708951672541e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12624061107635498,
|
|
"step": 2145,
|
|
"valid_targets_mean": 5268.2,
|
|
"valid_targets_min": 3765
|
|
},
|
|
{
|
|
"epoch": 3.3701960784313725,
|
|
"grad_norm": 0.4185465223206931,
|
|
"learning_rate": 2.4761204874004818e-05,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12540462613105774,
|
|
"step": 2150,
|
|
"valid_targets_mean": 5548.9,
|
|
"valid_targets_min": 3340
|
|
},
|
|
{
|
|
"epoch": 3.3780392156862744,
|
|
"grad_norm": 0.4629433162956907,
|
|
"learning_rate": 2.4685247500200583e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1088143140077591,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4770.6,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 3.3858823529411763,
|
|
"grad_norm": 0.4469362553316339,
|
|
"learning_rate": 2.4609218555620275e-05,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12578721344470978,
|
|
"step": 2160,
|
|
"valid_targets_mean": 5227.4,
|
|
"valid_targets_min": 4045
|
|
},
|
|
{
|
|
"epoch": 3.3937254901960783,
|
|
"grad_norm": 0.4083540026794189,
|
|
"learning_rate": 2.4533119201664785e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10173282027244568,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4978.2,
|
|
"valid_targets_min": 3524
|
|
},
|
|
{
|
|
"epoch": 3.40156862745098,
|
|
"grad_norm": 0.45005039909609124,
|
|
"learning_rate": 2.4456950600810542e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15307322144508362,
|
|
"step": 2170,
|
|
"valid_targets_mean": 6141.0,
|
|
"valid_targets_min": 3708
|
|
},
|
|
{
|
|
"epoch": 3.409411764705882,
|
|
"grad_norm": 0.456971465581117,
|
|
"learning_rate": 2.4380713916591785e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12354742735624313,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4147.4,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.417254901960784,
|
|
"grad_norm": 0.4192464726745103,
|
|
"learning_rate": 2.4304410313582776e-05,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12310852110385895,
|
|
"step": 2180,
|
|
"valid_targets_mean": 5426.9,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 3.4250980392156865,
|
|
"grad_norm": 0.4623057342766921,
|
|
"learning_rate": 2.422804095738002e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08867044746875763,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3166.9,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 3.4329411764705884,
|
|
"grad_norm": 0.42272630015621676,
|
|
"learning_rate": 2.4151607014584437e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15862008929252625,
|
|
"step": 2190,
|
|
"valid_targets_mean": 7008.6,
|
|
"valid_targets_min": 4008
|
|
},
|
|
{
|
|
"epoch": 3.4407843137254903,
|
|
"grad_norm": 0.43328267483970184,
|
|
"learning_rate": 2.4075109652783573e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11292370408773422,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4449.6,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 3.4486274509803923,
|
|
"grad_norm": 0.4768501124653413,
|
|
"learning_rate": 2.3998550040533743e-05,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13935428857803345,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4378.5,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 3.456470588235294,
|
|
"grad_norm": 0.48498689651676025,
|
|
"learning_rate": 2.392192934734219e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13610461354255676,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4721.4,
|
|
"valid_targets_min": 3900
|
|
},
|
|
{
|
|
"epoch": 3.464313725490196,
|
|
"grad_norm": 0.438002364845604,
|
|
"learning_rate": 2.3845248743649196e-05,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13045144081115723,
|
|
"step": 2210,
|
|
"valid_targets_mean": 5198.1,
|
|
"valid_targets_min": 3612
|
|
},
|
|
{
|
|
"epoch": 3.472156862745098,
|
|
"grad_norm": 0.43856726750377506,
|
|
"learning_rate": 2.376850940081025e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14291036128997803,
|
|
"step": 2215,
|
|
"valid_targets_mean": 5360.0,
|
|
"valid_targets_min": 3791
|
|
},
|
|
{
|
|
"epoch": 3.48,
|
|
"grad_norm": 0.42829004003882515,
|
|
"learning_rate": 2.3691712491078107e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09986069798469543,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4041.4,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 3.487843137254902,
|
|
"grad_norm": 0.42860874627618456,
|
|
"learning_rate": 2.3614859187584914e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1390523761510849,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4862.5,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 3.495686274509804,
|
|
"grad_norm": 0.45758430157224284,
|
|
"learning_rate": 2.353795066432427e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293371021747589,
|
|
"step": 2230,
|
|
"valid_targets_mean": 6039.2,
|
|
"valid_targets_min": 4011
|
|
},
|
|
{
|
|
"epoch": 3.503529411764706,
|
|
"grad_norm": 0.40690069816683694,
|
|
"learning_rate": 2.3460988096133284e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1241215318441391,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5175.9,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 3.5113725490196077,
|
|
"grad_norm": 0.42667398814165264,
|
|
"learning_rate": 2.338397265867468e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11234007775783539,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4937.1,
|
|
"valid_targets_min": 4136
|
|
},
|
|
{
|
|
"epoch": 3.5192156862745096,
|
|
"grad_norm": 0.45620590756787643,
|
|
"learning_rate": 2.3306905528418762e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14224466681480408,
|
|
"step": 2245,
|
|
"valid_targets_mean": 5523.1,
|
|
"valid_targets_min": 3314
|
|
},
|
|
{
|
|
"epoch": 3.527058823529412,
|
|
"grad_norm": 0.4355976647889371,
|
|
"learning_rate": 2.3229787882625496e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12049795687198639,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4769.6,
|
|
"valid_targets_min": 3255
|
|
},
|
|
{
|
|
"epoch": 3.534901960784314,
|
|
"grad_norm": 0.4249656649650876,
|
|
"learning_rate": 2.315262089932653e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12235796451568604,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5176.5,
|
|
"valid_targets_min": 4087
|
|
},
|
|
{
|
|
"epoch": 3.542745098039216,
|
|
"grad_norm": 0.39944541820812374,
|
|
"learning_rate": 2.3075405757307147e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12830835580825806,
|
|
"step": 2260,
|
|
"valid_targets_mean": 6016.6,
|
|
"valid_targets_min": 3494
|
|
},
|
|
{
|
|
"epoch": 3.550588235294118,
|
|
"grad_norm": 0.42800819746416663,
|
|
"learning_rate": 2.2998143636088323e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12007558345794678,
|
|
"step": 2265,
|
|
"valid_targets_mean": 5109.9,
|
|
"valid_targets_min": 3790
|
|
},
|
|
{
|
|
"epoch": 3.5584313725490198,
|
|
"grad_norm": 0.44878546033357863,
|
|
"learning_rate": 2.2920835715908654e-05,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13189059495925903,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4443.2,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 3.5662745098039217,
|
|
"grad_norm": 0.4146723757546106,
|
|
"learning_rate": 2.2843483177706363e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1503450870513916,
|
|
"step": 2275,
|
|
"valid_targets_mean": 6065.6,
|
|
"valid_targets_min": 3785
|
|
},
|
|
{
|
|
"epoch": 3.5741176470588236,
|
|
"grad_norm": 0.4225547761609464,
|
|
"learning_rate": 2.2766087203101245e-05,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12521719932556152,
|
|
"step": 2280,
|
|
"valid_targets_mean": 5207.5,
|
|
"valid_targets_min": 3569
|
|
},
|
|
{
|
|
"epoch": 3.5819607843137256,
|
|
"grad_norm": 0.4398977225101529,
|
|
"learning_rate": 2.2688648974376622e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10479364544153214,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3719.0,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 3.5898039215686275,
|
|
"grad_norm": 0.4527946589071924,
|
|
"learning_rate": 2.261116967446127e-05,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13563832640647888,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4599.1,
|
|
"valid_targets_min": 4026
|
|
},
|
|
{
|
|
"epoch": 3.5976470588235294,
|
|
"grad_norm": 0.47041472704564335,
|
|
"learning_rate": 2.2533650486911375e-05,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13656792044639587,
|
|
"step": 2295,
|
|
"valid_targets_mean": 4525.0,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.6054901960784314,
|
|
"grad_norm": 0.4086974075384432,
|
|
"learning_rate": 2.245609259589243e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10149809718132019,
|
|
"step": 2300,
|
|
"valid_targets_mean": 5378.1,
|
|
"valid_targets_min": 3568
|
|
},
|
|
{
|
|
"epoch": 3.6133333333333333,
|
|
"grad_norm": 0.4056430572188208,
|
|
"learning_rate": 2.2378497186161146e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1506578028202057,
|
|
"step": 2305,
|
|
"valid_targets_mean": 6498.0,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 3.621176470588235,
|
|
"grad_norm": 0.4841163523591486,
|
|
"learning_rate": 2.230086544304737e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12092341482639313,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4269.1,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 3.629019607843137,
|
|
"grad_norm": 0.45333346896349414,
|
|
"learning_rate": 2.222319855243597e-05,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11672091484069824,
|
|
"step": 2315,
|
|
"valid_targets_mean": 4143.0,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 3.636862745098039,
|
|
"grad_norm": 0.4016947816643865,
|
|
"learning_rate": 2.2145497700748723e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11376665532588959,
|
|
"step": 2320,
|
|
"valid_targets_mean": 5850.8,
|
|
"valid_targets_min": 3387
|
|
},
|
|
{
|
|
"epoch": 3.644705882352941,
|
|
"grad_norm": 0.4126136767330353,
|
|
"learning_rate": 2.2067764074926163e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13109418749809265,
|
|
"step": 2325,
|
|
"valid_targets_mean": 5778.8,
|
|
"valid_targets_min": 3775
|
|
},
|
|
{
|
|
"epoch": 3.652549019607843,
|
|
"grad_norm": 0.41578934552380625,
|
|
"learning_rate": 2.198999886240951e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10275600850582123,
|
|
"step": 2330,
|
|
"valid_targets_mean": 4524.2,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 3.660392156862745,
|
|
"grad_norm": 0.44452653520101426,
|
|
"learning_rate": 2.1912203251122475e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11369035392999649,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5199.6,
|
|
"valid_targets_min": 3988
|
|
},
|
|
{
|
|
"epoch": 3.668235294117647,
|
|
"grad_norm": 0.41591953670482334,
|
|
"learning_rate": 2.1834378429453133e-05,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13211897015571594,
|
|
"step": 2340,
|
|
"valid_targets_mean": 5778.9,
|
|
"valid_targets_min": 3808
|
|
},
|
|
{
|
|
"epoch": 3.6760784313725487,
|
|
"grad_norm": 0.4531417976566699,
|
|
"learning_rate": 2.175652558623577e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12305917590856552,
|
|
"step": 2345,
|
|
"valid_targets_mean": 5130.2,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 3.683921568627451,
|
|
"grad_norm": 0.4251294965950789,
|
|
"learning_rate": 2.1678645910732734e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13801831007003784,
|
|
"step": 2350,
|
|
"valid_targets_mean": 6060.2,
|
|
"valid_targets_min": 4055
|
|
},
|
|
{
|
|
"epoch": 3.691764705882353,
|
|
"grad_norm": 0.39161104888530957,
|
|
"learning_rate": 2.1600740592616245e-05,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09401237964630127,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4600.4,
|
|
"valid_targets_min": 3945
|
|
},
|
|
{
|
|
"epoch": 3.699607843137255,
|
|
"grad_norm": 0.4101883182219131,
|
|
"learning_rate": 2.152281082195024e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12118353694677353,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4882.1,
|
|
"valid_targets_min": 3774
|
|
},
|
|
{
|
|
"epoch": 3.707450980392157,
|
|
"grad_norm": 0.4512215902350842,
|
|
"learning_rate": 2.1444857789172185e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11571136862039566,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4519.5,
|
|
"valid_targets_min": 3414
|
|
},
|
|
{
|
|
"epoch": 3.715294117647059,
|
|
"grad_norm": 0.47626254423885345,
|
|
"learning_rate": 2.1366882685074892e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12955179810523987,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4775.5,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 3.723137254901961,
|
|
"grad_norm": 0.41785316223172714,
|
|
"learning_rate": 2.1288886700788335e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13508838415145874,
|
|
"step": 2375,
|
|
"valid_targets_mean": 5391.8,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 3.7309803921568627,
|
|
"grad_norm": 0.4192229406367633,
|
|
"learning_rate": 2.1210871027761438e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13090825080871582,
|
|
"step": 2380,
|
|
"valid_targets_mean": 5792.1,
|
|
"valid_targets_min": 3642
|
|
},
|
|
{
|
|
"epoch": 3.7388235294117647,
|
|
"grad_norm": 0.4176080198776354,
|
|
"learning_rate": 2.1132836857743903e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13488727807998657,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5541.0,
|
|
"valid_targets_min": 4262
|
|
},
|
|
{
|
|
"epoch": 3.7466666666666666,
|
|
"grad_norm": 0.44036817865626426,
|
|
"learning_rate": 2.105478538276797e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1133100837469101,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4884.6,
|
|
"valid_targets_min": 2117
|
|
},
|
|
{
|
|
"epoch": 3.7545098039215685,
|
|
"grad_norm": 0.41275929823495167,
|
|
"learning_rate": 2.0976717795130233e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12568649649620056,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5308.1,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 3.7623529411764705,
|
|
"grad_norm": 0.5082975043944233,
|
|
"learning_rate": 2.0898635287373423e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1803896129131317,
|
|
"step": 2400,
|
|
"valid_targets_mean": 5720.0,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 3.7701960784313724,
|
|
"grad_norm": 0.4541812584797464,
|
|
"learning_rate": 2.0820539052268186e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09599797427654266,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3557.0,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 3.7780392156862748,
|
|
"grad_norm": 0.4056475066376984,
|
|
"learning_rate": 2.0742430282794857e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09998275339603424,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4593.8,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 3.7858823529411767,
|
|
"grad_norm": 0.43025267393819644,
|
|
"learning_rate": 2.0664310172125242e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10418716073036194,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4152.6,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 3.7937254901960786,
|
|
"grad_norm": 0.4952415881512377,
|
|
"learning_rate": 2.0586179913604413e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11156953126192093,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4620.6,
|
|
"valid_targets_min": 3845
|
|
},
|
|
{
|
|
"epoch": 3.8015686274509806,
|
|
"grad_norm": 0.40593729890158115,
|
|
"learning_rate": 2.0508040700732438e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19600307941436768,
|
|
"step": 2425,
|
|
"valid_targets_mean": 7945.9,
|
|
"valid_targets_min": 3828
|
|
},
|
|
{
|
|
"epoch": 3.8094117647058825,
|
|
"grad_norm": 0.4425040889623802,
|
|
"learning_rate": 2.0429893727146167e-05,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11140649020671844,
|
|
"step": 2430,
|
|
"valid_targets_mean": 4031.5,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 3.8172549019607844,
|
|
"grad_norm": 0.43140264658950866,
|
|
"learning_rate": 2.0351740186601012e-05,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13726525008678436,
|
|
"step": 2435,
|
|
"valid_targets_mean": 5224.0,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 3.8250980392156864,
|
|
"grad_norm": 0.46830158593190896,
|
|
"learning_rate": 2.0273581272952708e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09190218150615692,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3842.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 3.8329411764705883,
|
|
"grad_norm": 0.4440042894022551,
|
|
"learning_rate": 2.0195418180139055e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11789971590042114,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4286.0,
|
|
"valid_targets_min": 3469
|
|
},
|
|
{
|
|
"epoch": 3.8407843137254902,
|
|
"grad_norm": 0.4658486778126832,
|
|
"learning_rate": 2.0117252102161687e-05,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12945345044136047,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3781.9,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 3.848627450980392,
|
|
"grad_norm": 0.43870048014904023,
|
|
"learning_rate": 2.0039084233067853e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09988433122634888,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4347.4,
|
|
"valid_targets_min": 3304
|
|
},
|
|
{
|
|
"epoch": 3.856470588235294,
|
|
"grad_norm": 0.42492966738094257,
|
|
"learning_rate": 1.9960915766932153e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14751821756362915,
|
|
"step": 2460,
|
|
"valid_targets_mean": 6676.2,
|
|
"valid_targets_min": 4367
|
|
},
|
|
{
|
|
"epoch": 3.864313725490196,
|
|
"grad_norm": 0.44542062652074005,
|
|
"learning_rate": 1.988274789783832e-05,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12891855835914612,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4793.4,
|
|
"valid_targets_min": 4049
|
|
},
|
|
{
|
|
"epoch": 3.872156862745098,
|
|
"grad_norm": 0.4233799855326381,
|
|
"learning_rate": 1.9804581819860952e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13386158645153046,
|
|
"step": 2470,
|
|
"valid_targets_mean": 5416.6,
|
|
"valid_targets_min": 4194
|
|
},
|
|
{
|
|
"epoch": 3.88,
|
|
"grad_norm": 0.4412280011457465,
|
|
"learning_rate": 1.9726418727047295e-05,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12778042256832123,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5579.5,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 3.887843137254902,
|
|
"grad_norm": 0.4023097325059467,
|
|
"learning_rate": 1.9648259813398987e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15061607956886292,
|
|
"step": 2480,
|
|
"valid_targets_mean": 6409.1,
|
|
"valid_targets_min": 3902
|
|
},
|
|
{
|
|
"epoch": 3.8956862745098038,
|
|
"grad_norm": 0.43138605272105507,
|
|
"learning_rate": 1.957010627285384e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13857755064964294,
|
|
"step": 2485,
|
|
"valid_targets_mean": 6154.2,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 3.9035294117647057,
|
|
"grad_norm": 0.4289359963511317,
|
|
"learning_rate": 1.9491959299267572e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12695324420928955,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4943.1,
|
|
"valid_targets_min": 3557
|
|
},
|
|
{
|
|
"epoch": 3.9113725490196076,
|
|
"grad_norm": 0.42620957261020587,
|
|
"learning_rate": 1.941382008639559e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10965865850448608,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4343.4,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 3.9192156862745096,
|
|
"grad_norm": 0.45057690502137776,
|
|
"learning_rate": 1.933568982787476e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14267298579216003,
|
|
"step": 2500,
|
|
"valid_targets_mean": 5751.5,
|
|
"valid_targets_min": 4181
|
|
},
|
|
{
|
|
"epoch": 3.9270588235294115,
|
|
"grad_norm": 0.4297300138951614,
|
|
"learning_rate": 1.9257569717205153e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10492973774671555,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4229.2,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 3.934901960784314,
|
|
"grad_norm": 0.43391774893973384,
|
|
"learning_rate": 1.9179460947731824e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11742952466011047,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4764.4,
|
|
"valid_targets_min": 3967
|
|
},
|
|
{
|
|
"epoch": 3.942745098039216,
|
|
"grad_norm": 0.4089672901012728,
|
|
"learning_rate": 1.9101364712626577e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1054595410823822,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4593.4,
|
|
"valid_targets_min": 3174
|
|
},
|
|
{
|
|
"epoch": 3.9505882352941177,
|
|
"grad_norm": 0.40999184897082624,
|
|
"learning_rate": 1.9023282204869767e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1085086241364479,
|
|
"step": 2520,
|
|
"valid_targets_mean": 5013.1,
|
|
"valid_targets_min": 3702
|
|
},
|
|
{
|
|
"epoch": 3.9584313725490197,
|
|
"grad_norm": 0.41882906394520675,
|
|
"learning_rate": 1.8945214617232036e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14806059002876282,
|
|
"step": 2525,
|
|
"valid_targets_mean": 5577.8,
|
|
"valid_targets_min": 4326
|
|
},
|
|
{
|
|
"epoch": 3.9662745098039216,
|
|
"grad_norm": 0.4304052346817819,
|
|
"learning_rate": 1.88671631422561e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09641310572624207,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4327.0,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 3.9741176470588235,
|
|
"grad_norm": 0.4317101934834094,
|
|
"learning_rate": 1.8789128972238565e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0965869277715683,
|
|
"step": 2535,
|
|
"valid_targets_mean": 4908.0,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 3.9819607843137255,
|
|
"grad_norm": 0.4184915818337338,
|
|
"learning_rate": 1.8711113299211675e-05,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12310964614152908,
|
|
"step": 2540,
|
|
"valid_targets_mean": 5048.0,
|
|
"valid_targets_min": 3740
|
|
},
|
|
{
|
|
"epoch": 3.9898039215686274,
|
|
"grad_norm": 0.35774652150205566,
|
|
"learning_rate": 1.8633117314925118e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11670214682817459,
|
|
"step": 2545,
|
|
"valid_targets_mean": 7279.1,
|
|
"valid_targets_min": 4113
|
|
},
|
|
{
|
|
"epoch": 3.9976470588235293,
|
|
"grad_norm": 0.44492714388414273,
|
|
"learning_rate": 1.855514221082782e-05,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12116006016731262,
|
|
"step": 2550,
|
|
"valid_targets_mean": 5609.5,
|
|
"valid_targets_min": 3329
|
|
},
|
|
{
|
|
"epoch": 4.004705882352941,
|
|
"grad_norm": 0.3905173807417699,
|
|
"learning_rate": 1.8477189178049764e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10767467319965363,
|
|
"step": 2555,
|
|
"valid_targets_mean": 5506.8,
|
|
"valid_targets_min": 3669
|
|
},
|
|
{
|
|
"epoch": 4.012549019607843,
|
|
"grad_norm": 0.4200360475787566,
|
|
"learning_rate": 1.839925940738376e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11823758482933044,
|
|
"step": 2560,
|
|
"valid_targets_mean": 5294.9,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 4.020392156862745,
|
|
"grad_norm": 0.44459223466418757,
|
|
"learning_rate": 1.8321354089267272e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10537134110927582,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4761.2,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 4.028235294117647,
|
|
"grad_norm": 0.44813095263169833,
|
|
"learning_rate": 1.8243474413764236e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1827888786792755,
|
|
"step": 2570,
|
|
"valid_targets_mean": 7468.4,
|
|
"valid_targets_min": 3878
|
|
},
|
|
{
|
|
"epoch": 4.036078431372549,
|
|
"grad_norm": 0.4065103840977924,
|
|
"learning_rate": 1.8165621570546874e-05,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11692411452531815,
|
|
"step": 2575,
|
|
"valid_targets_mean": 5616.6,
|
|
"valid_targets_min": 3473
|
|
},
|
|
{
|
|
"epoch": 4.043921568627451,
|
|
"grad_norm": 0.46152139221178673,
|
|
"learning_rate": 1.808779674887753e-05,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11232149600982666,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4916.9,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 4.051764705882353,
|
|
"grad_norm": 0.45609487602890614,
|
|
"learning_rate": 1.801000113759049e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1007688045501709,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4231.2,
|
|
"valid_targets_min": 2038
|
|
},
|
|
{
|
|
"epoch": 4.059607843137255,
|
|
"grad_norm": 0.4256474783072945,
|
|
"learning_rate": 1.7932235925073836e-05,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08823169767856598,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4712.8,
|
|
"valid_targets_min": 3705
|
|
},
|
|
{
|
|
"epoch": 4.067450980392157,
|
|
"grad_norm": 0.4241762520558462,
|
|
"learning_rate": 1.7854502299251284e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10531572997570038,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4965.0,
|
|
"valid_targets_min": 1951
|
|
},
|
|
{
|
|
"epoch": 4.075294117647059,
|
|
"grad_norm": 0.48597629679850013,
|
|
"learning_rate": 1.7776801447564032e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12261404097080231,
|
|
"step": 2600,
|
|
"valid_targets_mean": 4833.2,
|
|
"valid_targets_min": 3409
|
|
},
|
|
{
|
|
"epoch": 4.083137254901961,
|
|
"grad_norm": 0.43973803952933355,
|
|
"learning_rate": 1.7699134556952634e-05,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10298376530408859,
|
|
"step": 2605,
|
|
"valid_targets_mean": 4233.1,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 4.090980392156863,
|
|
"grad_norm": 0.42517819118994943,
|
|
"learning_rate": 1.7621502813838864e-05,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11034772545099258,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4333.8,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 4.0988235294117645,
|
|
"grad_norm": 0.4451569708702985,
|
|
"learning_rate": 1.754390740410758e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08107735216617584,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3548.9,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 4.1066666666666665,
|
|
"grad_norm": 0.5227693065341409,
|
|
"learning_rate": 1.7466349513088636e-05,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12296020984649658,
|
|
"step": 2620,
|
|
"valid_targets_mean": 4051.9,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 4.114509803921568,
|
|
"grad_norm": 0.4112722338165756,
|
|
"learning_rate": 1.738883032553873e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11214260011911392,
|
|
"step": 2625,
|
|
"valid_targets_mean": 5300.0,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 4.12235294117647,
|
|
"grad_norm": 0.45372894539093084,
|
|
"learning_rate": 1.7311351025623385e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12222674489021301,
|
|
"step": 2630,
|
|
"valid_targets_mean": 5129.0,
|
|
"valid_targets_min": 4048
|
|
},
|
|
{
|
|
"epoch": 4.130196078431372,
|
|
"grad_norm": 0.4339950271739696,
|
|
"learning_rate": 1.723391279689876e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17926743626594543,
|
|
"step": 2635,
|
|
"valid_targets_mean": 7041.9,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 4.138039215686274,
|
|
"grad_norm": 0.44250483314179057,
|
|
"learning_rate": 1.7156516822293644e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10012166202068329,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4177.1,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 4.145882352941176,
|
|
"grad_norm": 0.45918959483703164,
|
|
"learning_rate": 1.7079164284091353e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10847637057304382,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4084.9,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 4.153725490196078,
|
|
"grad_norm": 0.42030460267083325,
|
|
"learning_rate": 1.7001856363911687e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10665237903594971,
|
|
"step": 2650,
|
|
"valid_targets_mean": 5057.6,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 4.16156862745098,
|
|
"grad_norm": 0.5124514057765032,
|
|
"learning_rate": 1.692459424269286e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1121930330991745,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3674.1,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 4.169411764705882,
|
|
"grad_norm": 0.4655022868266258,
|
|
"learning_rate": 1.6847379100673474e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14640147984027863,
|
|
"step": 2660,
|
|
"valid_targets_mean": 5285.0,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 4.177254901960785,
|
|
"grad_norm": 0.4473665372096161,
|
|
"learning_rate": 1.6770212117374504e-05,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10988258570432663,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4780.2,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 4.185098039215687,
|
|
"grad_norm": 0.47429600307448816,
|
|
"learning_rate": 1.6693094471581244e-05,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10220754146575928,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4325.9,
|
|
"valid_targets_min": 3458
|
|
},
|
|
{
|
|
"epoch": 4.192941176470589,
|
|
"grad_norm": 0.43905684887483054,
|
|
"learning_rate": 1.6616027341325328e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10036975890398026,
|
|
"step": 2675,
|
|
"valid_targets_mean": 4408.9,
|
|
"valid_targets_min": 2176
|
|
},
|
|
{
|
|
"epoch": 4.2007843137254905,
|
|
"grad_norm": 0.4507592242283545,
|
|
"learning_rate": 1.653901190386672e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11135761439800262,
|
|
"step": 2680,
|
|
"valid_targets_mean": 4076.0,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 4.2086274509803925,
|
|
"grad_norm": 0.5042518199695983,
|
|
"learning_rate": 1.646204933567574e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11842361837625504,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4709.2,
|
|
"valid_targets_min": 3212
|
|
},
|
|
{
|
|
"epoch": 4.216470588235294,
|
|
"grad_norm": 0.46071191374599896,
|
|
"learning_rate": 1.638514081241509e-05,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15044963359832764,
|
|
"step": 2690,
|
|
"valid_targets_mean": 5774.9,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 4.224313725490196,
|
|
"grad_norm": 0.44389765559581246,
|
|
"learning_rate": 1.6308287508921893e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09105284512042999,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4297.5,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 4.232156862745098,
|
|
"grad_norm": 0.43590212363737624,
|
|
"learning_rate": 1.6231490599189753e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1226283609867096,
|
|
"step": 2700,
|
|
"valid_targets_mean": 5617.2,
|
|
"valid_targets_min": 3678
|
|
},
|
|
{
|
|
"epoch": 4.24,
|
|
"grad_norm": 0.5042830560755973,
|
|
"learning_rate": 1.615475125635081e-05,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09412622451782227,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4037.8,
|
|
"valid_targets_min": 3472
|
|
},
|
|
{
|
|
"epoch": 4.247843137254902,
|
|
"grad_norm": 0.5235601200803073,
|
|
"learning_rate": 1.607807065265782e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10899472236633301,
|
|
"step": 2710,
|
|
"valid_targets_mean": 5706.8,
|
|
"valid_targets_min": 2538
|
|
},
|
|
{
|
|
"epoch": 4.255686274509804,
|
|
"grad_norm": 0.5665857552679641,
|
|
"learning_rate": 1.600144995946626e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12623269855976105,
|
|
"step": 2715,
|
|
"valid_targets_mean": 4821.1,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.263529411764706,
|
|
"grad_norm": 0.44745675544665836,
|
|
"learning_rate": 1.5924890347216433e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11597345769405365,
|
|
"step": 2720,
|
|
"valid_targets_mean": 4474.1,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 4.271372549019608,
|
|
"grad_norm": 0.43576700508916605,
|
|
"learning_rate": 1.5848392985415573e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11160010099411011,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4962.0,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 4.27921568627451,
|
|
"grad_norm": 0.42040631139218665,
|
|
"learning_rate": 1.5771959042619983e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09204453229904175,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4499.6,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 4.287058823529412,
|
|
"grad_norm": 0.4185078430234579,
|
|
"learning_rate": 1.5695589686417224e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09613680839538574,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4420.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 4.294901960784314,
|
|
"grad_norm": 0.7464553074299054,
|
|
"learning_rate": 1.561928608340822e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09097284078598022,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3033.8,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 4.302745098039216,
|
|
"grad_norm": 0.47391683011563634,
|
|
"learning_rate": 1.554304939918946e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09764649718999863,
|
|
"step": 2745,
|
|
"valid_targets_mean": 3555.8,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 4.310588235294118,
|
|
"grad_norm": 0.4376348635513123,
|
|
"learning_rate": 1.5466880798335222e-05,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1270982027053833,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5968.5,
|
|
"valid_targets_min": 4153
|
|
},
|
|
{
|
|
"epoch": 4.3184313725490195,
|
|
"grad_norm": 0.40494854574033734,
|
|
"learning_rate": 1.539078144437973e-05,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11913061887025833,
|
|
"step": 2755,
|
|
"valid_targets_mean": 6189.2,
|
|
"valid_targets_min": 3457
|
|
},
|
|
{
|
|
"epoch": 4.3262745098039215,
|
|
"grad_norm": 0.45313713668287336,
|
|
"learning_rate": 1.5314752499799427e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1106049120426178,
|
|
"step": 2760,
|
|
"valid_targets_mean": 5059.8,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 4.334117647058823,
|
|
"grad_norm": 0.4702396390539599,
|
|
"learning_rate": 1.5238795125995189e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12513749301433563,
|
|
"step": 2765,
|
|
"valid_targets_mean": 4199.2,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 4.341960784313725,
|
|
"grad_norm": 0.46378041277396537,
|
|
"learning_rate": 1.5162910483274593e-05,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11600162088871002,
|
|
"step": 2770,
|
|
"valid_targets_mean": 4496.1,
|
|
"valid_targets_min": 3342
|
|
},
|
|
{
|
|
"epoch": 4.349803921568627,
|
|
"grad_norm": 0.42640707797202987,
|
|
"learning_rate": 1.5087099730834207e-05,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14321555197238922,
|
|
"step": 2775,
|
|
"valid_targets_mean": 6175.4,
|
|
"valid_targets_min": 3887
|
|
},
|
|
{
|
|
"epoch": 4.357647058823529,
|
|
"grad_norm": 0.4133009855709835,
|
|
"learning_rate": 1.5011364026741855e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14013375341892242,
|
|
"step": 2780,
|
|
"valid_targets_mean": 6673.4,
|
|
"valid_targets_min": 3618
|
|
},
|
|
{
|
|
"epoch": 4.365490196078431,
|
|
"grad_norm": 0.43137866627609533,
|
|
"learning_rate": 1.4935704527918937e-05,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14634394645690918,
|
|
"step": 2785,
|
|
"valid_targets_mean": 6581.8,
|
|
"valid_targets_min": 4414
|
|
},
|
|
{
|
|
"epoch": 4.373333333333333,
|
|
"grad_norm": 0.44165769202726085,
|
|
"learning_rate": 1.4860122390122764e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14546926319599152,
|
|
"step": 2790,
|
|
"valid_targets_mean": 6116.5,
|
|
"valid_targets_min": 4051
|
|
},
|
|
{
|
|
"epoch": 4.381176470588235,
|
|
"grad_norm": 0.4689645233485951,
|
|
"learning_rate": 1.4784618767928898e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12605249881744385,
|
|
"step": 2795,
|
|
"valid_targets_mean": 4507.4,
|
|
"valid_targets_min": 3297
|
|
},
|
|
{
|
|
"epoch": 4.389019607843137,
|
|
"grad_norm": 0.4604717250693386,
|
|
"learning_rate": 1.4709194814713507e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338595151901245,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4666.4,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 4.396862745098039,
|
|
"grad_norm": 0.4416536553126919,
|
|
"learning_rate": 1.4633851682635766e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11108751595020294,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4645.6,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 4.404705882352941,
|
|
"grad_norm": 0.44800681368895473,
|
|
"learning_rate": 1.4558590522620239e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1400332748889923,
|
|
"step": 2810,
|
|
"valid_targets_mean": 5793.5,
|
|
"valid_targets_min": 3699
|
|
},
|
|
{
|
|
"epoch": 4.412549019607843,
|
|
"grad_norm": 0.47104018143833,
|
|
"learning_rate": 1.4483412484339301e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1196877583861351,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4785.8,
|
|
"valid_targets_min": 3638
|
|
},
|
|
{
|
|
"epoch": 4.420392156862745,
|
|
"grad_norm": 0.4473945707470302,
|
|
"learning_rate": 1.4408318716195581e-05,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13346664607524872,
|
|
"step": 2820,
|
|
"valid_targets_mean": 5117.9,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 4.428235294117647,
|
|
"grad_norm": 0.45604976355189436,
|
|
"learning_rate": 1.4333310365304413e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09396330267190933,
|
|
"step": 2825,
|
|
"valid_targets_mean": 4225.5,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 4.436078431372549,
|
|
"grad_norm": 0.4105412639086073,
|
|
"learning_rate": 1.4258388577476322e-05,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10879760980606079,
|
|
"step": 2830,
|
|
"valid_targets_mean": 5272.0,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 4.443921568627451,
|
|
"grad_norm": 0.43526705965860735,
|
|
"learning_rate": 1.4183554497199514e-05,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11302154511213303,
|
|
"step": 2835,
|
|
"valid_targets_mean": 5528.8,
|
|
"valid_targets_min": 2550
|
|
},
|
|
{
|
|
"epoch": 4.451764705882353,
|
|
"grad_norm": 0.4973859270038223,
|
|
"learning_rate": 1.4108809267622381e-05,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10618742555379868,
|
|
"step": 2840,
|
|
"valid_targets_mean": 4212.6,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 4.459607843137255,
|
|
"grad_norm": 0.42940709964563273,
|
|
"learning_rate": 1.4034154030536083e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12761543691158295,
|
|
"step": 2845,
|
|
"valid_targets_mean": 5532.8,
|
|
"valid_targets_min": 4137
|
|
},
|
|
{
|
|
"epoch": 4.467450980392157,
|
|
"grad_norm": 0.4530085663866589,
|
|
"learning_rate": 1.3959589926357042e-05,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1309853196144104,
|
|
"step": 2850,
|
|
"valid_targets_mean": 5463.6,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 4.475294117647059,
|
|
"grad_norm": 0.41103496373928117,
|
|
"learning_rate": 1.3885118094109575e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08192500472068787,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4375.1,
|
|
"valid_targets_min": 3878
|
|
},
|
|
{
|
|
"epoch": 4.483137254901961,
|
|
"grad_norm": 0.48130708299153196,
|
|
"learning_rate": 1.3810739671408467e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09816533327102661,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3727.5,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 4.490980392156863,
|
|
"grad_norm": 0.46469907149492495,
|
|
"learning_rate": 1.3736455794441596e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11729377508163452,
|
|
"step": 2865,
|
|
"valid_targets_mean": 4643.2,
|
|
"valid_targets_min": 3131
|
|
},
|
|
{
|
|
"epoch": 4.498823529411765,
|
|
"grad_norm": 0.4113549961505524,
|
|
"learning_rate": 1.3662267597952588e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12106738239526749,
|
|
"step": 2870,
|
|
"valid_targets_mean": 5856.4,
|
|
"valid_targets_min": 3370
|
|
},
|
|
{
|
|
"epoch": 4.506666666666667,
|
|
"grad_norm": 0.42611400758800033,
|
|
"learning_rate": 1.3588176215223463e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10529822111129761,
|
|
"step": 2875,
|
|
"valid_targets_mean": 4626.0,
|
|
"valid_targets_min": 3890
|
|
},
|
|
{
|
|
"epoch": 4.514509803921569,
|
|
"grad_norm": 0.42086670763755624,
|
|
"learning_rate": 1.3514182778057365e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10337953269481659,
|
|
"step": 2880,
|
|
"valid_targets_mean": 5135.2,
|
|
"valid_targets_min": 3790
|
|
},
|
|
{
|
|
"epoch": 4.522352941176471,
|
|
"grad_norm": 0.4645237355462507,
|
|
"learning_rate": 1.3440288416761216e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12111011892557144,
|
|
"step": 2885,
|
|
"valid_targets_mean": 4660.5,
|
|
"valid_targets_min": 3653
|
|
},
|
|
{
|
|
"epoch": 4.530196078431373,
|
|
"grad_norm": 0.47045742739732943,
|
|
"learning_rate": 1.3366494260128484e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10147681087255478,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4800.6,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 4.5380392156862746,
|
|
"grad_norm": 0.43344211525375426,
|
|
"learning_rate": 1.3292801435421935e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1245606392621994,
|
|
"step": 2895,
|
|
"valid_targets_mean": 5535.9,
|
|
"valid_targets_min": 3469
|
|
},
|
|
{
|
|
"epoch": 4.5458823529411765,
|
|
"grad_norm": 0.43479394266651805,
|
|
"learning_rate": 1.3219211068356418e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1493425965309143,
|
|
"step": 2900,
|
|
"valid_targets_mean": 5873.8,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 4.553725490196078,
|
|
"grad_norm": 0.45168797366945695,
|
|
"learning_rate": 1.3145724283081651e-05,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10814973711967468,
|
|
"step": 2905,
|
|
"valid_targets_mean": 4748.0,
|
|
"valid_targets_min": 4218
|
|
},
|
|
{
|
|
"epoch": 4.56156862745098,
|
|
"grad_norm": 0.42324683620062264,
|
|
"learning_rate": 1.3072342202165069e-05,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11011166125535965,
|
|
"step": 2910,
|
|
"valid_targets_mean": 5293.0,
|
|
"valid_targets_min": 3835
|
|
},
|
|
{
|
|
"epoch": 4.569411764705882,
|
|
"grad_norm": 0.5335817462643285,
|
|
"learning_rate": 1.2999065946574656e-05,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13597135245800018,
|
|
"step": 2915,
|
|
"valid_targets_mean": 5534.4,
|
|
"valid_targets_min": 3576
|
|
},
|
|
{
|
|
"epoch": 4.577254901960784,
|
|
"grad_norm": 0.42773280056494023,
|
|
"learning_rate": 1.2925896635661852e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17269366979599,
|
|
"step": 2920,
|
|
"valid_targets_mean": 7218.0,
|
|
"valid_targets_min": 4212
|
|
},
|
|
{
|
|
"epoch": 4.585098039215686,
|
|
"grad_norm": 0.45307534232943186,
|
|
"learning_rate": 1.2852835387144414e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10228618234395981,
|
|
"step": 2925,
|
|
"valid_targets_mean": 4194.5,
|
|
"valid_targets_min": 3159
|
|
},
|
|
{
|
|
"epoch": 4.592941176470588,
|
|
"grad_norm": 0.43811006064926683,
|
|
"learning_rate": 1.2779883317089374e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10418105125427246,
|
|
"step": 2930,
|
|
"valid_targets_mean": 4234.1,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 4.60078431372549,
|
|
"grad_norm": 0.45656150173445575,
|
|
"learning_rate": 1.2707041539895974e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10194677114486694,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4624.2,
|
|
"valid_targets_min": 3290
|
|
},
|
|
{
|
|
"epoch": 4.608627450980392,
|
|
"grad_norm": 0.4573206998177506,
|
|
"learning_rate": 1.2634311168278652e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1165328174829483,
|
|
"step": 2940,
|
|
"valid_targets_mean": 5227.2,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 4.616470588235294,
|
|
"grad_norm": 0.4024956320861985,
|
|
"learning_rate": 1.2561693313250034e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09726844727993011,
|
|
"step": 2945,
|
|
"valid_targets_mean": 5251.4,
|
|
"valid_targets_min": 3902
|
|
},
|
|
{
|
|
"epoch": 4.624313725490196,
|
|
"grad_norm": 0.4110909601170384,
|
|
"learning_rate": 1.248918908410396e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10999324917793274,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4927.2,
|
|
"valid_targets_min": 2015
|
|
},
|
|
{
|
|
"epoch": 4.632156862745098,
|
|
"grad_norm": 0.42815540335642654,
|
|
"learning_rate": 1.2416799588398576e-05,
|
|
"loss": 0.2212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08446561545133591,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4145.6,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 4.64,
|
|
"grad_norm": 0.43478734858411267,
|
|
"learning_rate": 1.2344525931939359e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11090641468763351,
|
|
"step": 2960,
|
|
"valid_targets_mean": 5010.8,
|
|
"valid_targets_min": 2231
|
|
},
|
|
{
|
|
"epoch": 4.647843137254902,
|
|
"grad_norm": 0.42886856376800964,
|
|
"learning_rate": 1.2272369218762266e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1292121261358261,
|
|
"step": 2965,
|
|
"valid_targets_mean": 5659.4,
|
|
"valid_targets_min": 3966
|
|
},
|
|
{
|
|
"epoch": 4.6556862745098035,
|
|
"grad_norm": 0.45723299636167947,
|
|
"learning_rate": 1.2200330551116838e-05,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1147545650601387,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4256.5,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 4.663529411764706,
|
|
"grad_norm": 0.46658697317522074,
|
|
"learning_rate": 1.2128411029449403e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11752147972583771,
|
|
"step": 2975,
|
|
"valid_targets_mean": 4721.1,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 4.671372549019608,
|
|
"grad_norm": 0.4507423455230143,
|
|
"learning_rate": 1.205661175238622e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12433111667633057,
|
|
"step": 2980,
|
|
"valid_targets_mean": 5843.1,
|
|
"valid_targets_min": 3848
|
|
},
|
|
{
|
|
"epoch": 4.67921568627451,
|
|
"grad_norm": 0.4250247278252158,
|
|
"learning_rate": 1.1984933816716726e-05,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12211208790540695,
|
|
"step": 2985,
|
|
"valid_targets_mean": 5752.2,
|
|
"valid_targets_min": 3848
|
|
},
|
|
{
|
|
"epoch": 4.687058823529412,
|
|
"grad_norm": 0.4558654826378166,
|
|
"learning_rate": 1.1913378317376796e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1177147775888443,
|
|
"step": 2990,
|
|
"valid_targets_mean": 5041.1,
|
|
"valid_targets_min": 3733
|
|
},
|
|
{
|
|
"epoch": 4.694901960784314,
|
|
"grad_norm": 0.4642878240620124,
|
|
"learning_rate": 1.1841946347431961e-05,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10511282086372375,
|
|
"step": 2995,
|
|
"valid_targets_mean": 4918.2,
|
|
"valid_targets_min": 3311
|
|
},
|
|
{
|
|
"epoch": 4.702745098039216,
|
|
"grad_norm": 0.42923733538301684,
|
|
"learning_rate": 1.1770638998060764e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10701652616262436,
|
|
"step": 3000,
|
|
"valid_targets_mean": 4699.8,
|
|
"valid_targets_min": 4095
|
|
},
|
|
{
|
|
"epoch": 4.710588235294118,
|
|
"grad_norm": 0.42308877788874244,
|
|
"learning_rate": 1.1699457358538072e-05,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09624885767698288,
|
|
"step": 3005,
|
|
"valid_targets_mean": 5704.4,
|
|
"valid_targets_min": 4123
|
|
},
|
|
{
|
|
"epoch": 4.71843137254902,
|
|
"grad_norm": 0.42462976456238305,
|
|
"learning_rate": 1.1628402516218432e-05,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10115936398506165,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4660.0,
|
|
"valid_targets_min": 3935
|
|
},
|
|
{
|
|
"epoch": 4.726274509803922,
|
|
"grad_norm": 0.5065232760367284,
|
|
"learning_rate": 1.1557475556519461e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11502327769994736,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3924.1,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 4.734117647058824,
|
|
"grad_norm": 0.47781825995136795,
|
|
"learning_rate": 1.1486677562905281e-05,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13359524309635162,
|
|
"step": 3020,
|
|
"valid_targets_mean": 5024.4,
|
|
"valid_targets_min": 3857
|
|
},
|
|
{
|
|
"epoch": 4.741960784313726,
|
|
"grad_norm": 0.374260772898809,
|
|
"learning_rate": 1.1416009616869959e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11183619499206543,
|
|
"step": 3025,
|
|
"valid_targets_mean": 5801.9,
|
|
"valid_targets_min": 2997
|
|
},
|
|
{
|
|
"epoch": 4.749803921568628,
|
|
"grad_norm": 0.42212140935918013,
|
|
"learning_rate": 1.134547279792098e-05,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1344376504421234,
|
|
"step": 3030,
|
|
"valid_targets_mean": 6771.4,
|
|
"valid_targets_min": 3721
|
|
},
|
|
{
|
|
"epoch": 4.75764705882353,
|
|
"grad_norm": 0.4533881421591952,
|
|
"learning_rate": 1.1275068183562747e-05,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1427106112241745,
|
|
"step": 3035,
|
|
"valid_targets_mean": 5687.2,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 4.7654901960784315,
|
|
"grad_norm": 0.4582135419128748,
|
|
"learning_rate": 1.1204796849280167e-05,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10185694694519043,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4214.5,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 4.773333333333333,
|
|
"grad_norm": 0.47286574596444375,
|
|
"learning_rate": 1.1134659868522158e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12713149189949036,
|
|
"step": 3045,
|
|
"valid_targets_mean": 5010.5,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 4.781176470588235,
|
|
"grad_norm": 0.44690109983254767,
|
|
"learning_rate": 1.106465831268531e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11765958368778229,
|
|
"step": 3050,
|
|
"valid_targets_mean": 5423.0,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 4.789019607843137,
|
|
"grad_norm": 0.4579985208699572,
|
|
"learning_rate": 1.0994793251097468e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10597206652164459,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4305.5,
|
|
"valid_targets_min": 3645
|
|
},
|
|
{
|
|
"epoch": 4.796862745098039,
|
|
"grad_norm": 0.48030434923869464,
|
|
"learning_rate": 1.0925065751001445e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10135907679796219,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3710.1,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 4.804705882352941,
|
|
"grad_norm": 0.4333841295140674,
|
|
"learning_rate": 1.0855476877538687e-05,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10850516706705093,
|
|
"step": 3065,
|
|
"valid_targets_mean": 4619.1,
|
|
"valid_targets_min": 3695
|
|
},
|
|
{
|
|
"epoch": 4.812549019607843,
|
|
"grad_norm": 0.4412416417133921,
|
|
"learning_rate": 1.0786027693733015e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09839709103107452,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4376.0,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 4.820392156862745,
|
|
"grad_norm": 0.42008114505080735,
|
|
"learning_rate": 1.0716719260474365e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08091733604669571,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3504.0,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.828235294117647,
|
|
"grad_norm": 0.45431806331048724,
|
|
"learning_rate": 1.0647552636502629e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1207878589630127,
|
|
"step": 3080,
|
|
"valid_targets_mean": 4412.9,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 4.836078431372549,
|
|
"grad_norm": 0.46237113906416066,
|
|
"learning_rate": 1.0578528878391419e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12674911320209503,
|
|
"step": 3085,
|
|
"valid_targets_mean": 5019.9,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 4.843921568627451,
|
|
"grad_norm": 0.49049374695583614,
|
|
"learning_rate": 1.0509649040531994e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09615875035524368,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4504.4,
|
|
"valid_targets_min": 3380
|
|
},
|
|
{
|
|
"epoch": 4.851764705882353,
|
|
"grad_norm": 0.4268656876870293,
|
|
"learning_rate": 1.044091417511709e-05,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11343152821063995,
|
|
"step": 3095,
|
|
"valid_targets_mean": 5271.6,
|
|
"valid_targets_min": 4448
|
|
},
|
|
{
|
|
"epoch": 4.859607843137255,
|
|
"grad_norm": 0.4418073904243916,
|
|
"learning_rate": 1.0372325332124896e-05,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1201629564166069,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5265.9,
|
|
"valid_targets_min": 3534
|
|
},
|
|
{
|
|
"epoch": 4.867450980392157,
|
|
"grad_norm": 0.43735738208173247,
|
|
"learning_rate": 1.0303883559302999e-05,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11078697443008423,
|
|
"step": 3105,
|
|
"valid_targets_mean": 5221.1,
|
|
"valid_targets_min": 3672
|
|
},
|
|
{
|
|
"epoch": 4.875294117647059,
|
|
"grad_norm": 0.4421808676971361,
|
|
"learning_rate": 1.0235589902152351e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12137161940336227,
|
|
"step": 3110,
|
|
"valid_targets_mean": 4906.5,
|
|
"valid_targets_min": 3891
|
|
},
|
|
{
|
|
"epoch": 4.8831372549019605,
|
|
"grad_norm": 0.4581736055281256,
|
|
"learning_rate": 1.0167445403911356e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11969655007123947,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4996.5,
|
|
"valid_targets_min": 3709
|
|
},
|
|
{
|
|
"epoch": 4.890980392156862,
|
|
"grad_norm": 0.46234205548706775,
|
|
"learning_rate": 1.0099451105539866e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11135657131671906,
|
|
"step": 3120,
|
|
"valid_targets_mean": 4615.5,
|
|
"valid_targets_min": 2185
|
|
},
|
|
{
|
|
"epoch": 4.898823529411764,
|
|
"grad_norm": 0.41582646703925863,
|
|
"learning_rate": 1.0031608045703347e-05,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11816100031137466,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4690.8,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 4.906666666666666,
|
|
"grad_norm": 0.5046311194334004,
|
|
"learning_rate": 9.963917260756937e-06,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1081245094537735,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3725.6,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 4.914509803921568,
|
|
"grad_norm": 0.44898161953642074,
|
|
"learning_rate": 9.89637978472972e-06,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12114117294549942,
|
|
"step": 3135,
|
|
"valid_targets_mean": 5526.5,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 4.92235294117647,
|
|
"grad_norm": 0.4616780181364111,
|
|
"learning_rate": 9.828996649308804e-06,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14261837303638458,
|
|
"step": 3140,
|
|
"valid_targets_mean": 5210.5,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 4.930196078431372,
|
|
"grad_norm": 0.44632304561319325,
|
|
"learning_rate": 9.76176888382367e-06,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1033644899725914,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4451.0,
|
|
"valid_targets_min": 3309
|
|
},
|
|
{
|
|
"epoch": 4.938039215686274,
|
|
"grad_norm": 0.40315955223798433,
|
|
"learning_rate": 9.694697515230371e-06,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12324383109807968,
|
|
"step": 3150,
|
|
"valid_targets_mean": 5916.8,
|
|
"valid_targets_min": 3639
|
|
},
|
|
{
|
|
"epoch": 4.945882352941177,
|
|
"grad_norm": 0.4347594206621477,
|
|
"learning_rate": 9.627783568095905e-06,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1376483291387558,
|
|
"step": 3155,
|
|
"valid_targets_mean": 6208.2,
|
|
"valid_targets_min": 3652
|
|
},
|
|
{
|
|
"epoch": 4.953725490196079,
|
|
"grad_norm": 0.4084729068773385,
|
|
"learning_rate": 9.561028064582507e-06,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10471275448799133,
|
|
"step": 3160,
|
|
"valid_targets_mean": 5257.2,
|
|
"valid_targets_min": 4459
|
|
},
|
|
{
|
|
"epoch": 4.961568627450981,
|
|
"grad_norm": 0.42400310983230527,
|
|
"learning_rate": 9.494432024432087e-06,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13439449667930603,
|
|
"step": 3165,
|
|
"valid_targets_mean": 5783.6,
|
|
"valid_targets_min": 3438
|
|
},
|
|
{
|
|
"epoch": 4.969411764705883,
|
|
"grad_norm": 0.43246443329360357,
|
|
"learning_rate": 9.42799646495062e-06,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10336959362030029,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4810.8,
|
|
"valid_targets_min": 3566
|
|
},
|
|
{
|
|
"epoch": 4.977254901960785,
|
|
"grad_norm": 0.4568959179301906,
|
|
"learning_rate": 9.361722400992618e-06,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13624556362628937,
|
|
"step": 3175,
|
|
"valid_targets_mean": 5052.2,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.9850980392156865,
|
|
"grad_norm": 0.43603921496187753,
|
|
"learning_rate": 9.295610844945613e-06,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14079561829566956,
|
|
"step": 3180,
|
|
"valid_targets_mean": 5369.8,
|
|
"valid_targets_min": 2914
|
|
},
|
|
{
|
|
"epoch": 4.992941176470588,
|
|
"grad_norm": 0.4059440081163407,
|
|
"learning_rate": 9.229662806714721e-06,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11058436334133148,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5307.1,
|
|
"valid_targets_min": 4409
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6353395355287816,
|
|
"learning_rate": 9.163879293707172e-06,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2786991000175476,
|
|
"step": 3190,
|
|
"valid_targets_mean": 5547.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 5.007843137254902,
|
|
"grad_norm": 0.4434426444600232,
|
|
"learning_rate": 9.09826131081698e-06,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10770189017057419,
|
|
"step": 3195,
|
|
"valid_targets_mean": 5887.5,
|
|
"valid_targets_min": 3970
|
|
},
|
|
{
|
|
"epoch": 5.015686274509804,
|
|
"grad_norm": 0.44956093077808323,
|
|
"learning_rate": 9.03280986040952e-06,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13977831602096558,
|
|
"step": 3200,
|
|
"valid_targets_mean": 5593.4,
|
|
"valid_targets_min": 3313
|
|
},
|
|
{
|
|
"epoch": 5.023529411764706,
|
|
"grad_norm": 0.4869853630720128,
|
|
"learning_rate": 8.967525942306285e-06,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1269673854112625,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4262.0,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 5.031372549019608,
|
|
"grad_norm": 0.4197921666192809,
|
|
"learning_rate": 8.902410553769575e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1173236221075058,
|
|
"step": 3210,
|
|
"valid_targets_mean": 6452.1,
|
|
"valid_targets_min": 4591
|
|
},
|
|
{
|
|
"epoch": 5.03921568627451,
|
|
"grad_norm": 0.4494438187333058,
|
|
"learning_rate": 8.837464689487261e-06,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14070060849189758,
|
|
"step": 3215,
|
|
"valid_targets_mean": 5643.9,
|
|
"valid_targets_min": 4702
|
|
},
|
|
{
|
|
"epoch": 5.047058823529412,
|
|
"grad_norm": 0.4740984647047455,
|
|
"learning_rate": 8.772689341557611e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08877275884151459,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3722.0,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 5.0549019607843135,
|
|
"grad_norm": 0.41928661783092586,
|
|
"learning_rate": 8.708085499474112e-06,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15495586395263672,
|
|
"step": 3225,
|
|
"valid_targets_mean": 6443.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 5.0627450980392155,
|
|
"grad_norm": 0.4936630984225002,
|
|
"learning_rate": 8.643654150110387e-06,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09484417736530304,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3947.6,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 5.070588235294117,
|
|
"grad_norm": 0.45106735902899286,
|
|
"learning_rate": 8.579396277705071e-06,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11588169634342194,
|
|
"step": 3235,
|
|
"valid_targets_mean": 5591.4,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 5.078431372549019,
|
|
"grad_norm": 0.414467523588827,
|
|
"learning_rate": 8.51531286384683e-06,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12981513142585754,
|
|
"step": 3240,
|
|
"valid_targets_mean": 6583.4,
|
|
"valid_targets_min": 4301
|
|
},
|
|
{
|
|
"epoch": 5.086274509803921,
|
|
"grad_norm": 0.45801771415641485,
|
|
"learning_rate": 8.451404887459325e-06,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10124361515045166,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4468.6,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 5.094117647058823,
|
|
"grad_norm": 0.41109767529688723,
|
|
"learning_rate": 8.387673324786292e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09609640389680862,
|
|
"step": 3250,
|
|
"valid_targets_mean": 5498.2,
|
|
"valid_targets_min": 3578
|
|
},
|
|
{
|
|
"epoch": 5.101960784313725,
|
|
"grad_norm": 0.44737320898342614,
|
|
"learning_rate": 8.324119149376584e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08587310463190079,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3785.8,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 5.109803921568627,
|
|
"grad_norm": 0.46916902022475687,
|
|
"learning_rate": 8.260743332069355e-06,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08537392318248749,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3495.5,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 5.117647058823529,
|
|
"grad_norm": 0.48625587727833747,
|
|
"learning_rate": 8.197546840979172e-06,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1296594887971878,
|
|
"step": 3265,
|
|
"valid_targets_mean": 5611.8,
|
|
"valid_targets_min": 4490
|
|
},
|
|
{
|
|
"epoch": 5.125490196078431,
|
|
"grad_norm": 0.42426042442109285,
|
|
"learning_rate": 8.134530641481289e-06,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.100534588098526,
|
|
"step": 3270,
|
|
"valid_targets_mean": 5074.5,
|
|
"valid_targets_min": 4181
|
|
},
|
|
{
|
|
"epoch": 5.133333333333334,
|
|
"grad_norm": 0.4738822432964966,
|
|
"learning_rate": 8.071695696196824e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12148615717887878,
|
|
"step": 3275,
|
|
"valid_targets_mean": 6402.9,
|
|
"valid_targets_min": 2974
|
|
},
|
|
{
|
|
"epoch": 5.141176470588236,
|
|
"grad_norm": 0.4605721146070712,
|
|
"learning_rate": 8.00904296497815e-06,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10829634964466095,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4721.6,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 5.149019607843138,
|
|
"grad_norm": 0.43834794430430113,
|
|
"learning_rate": 7.946573404894133e-06,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11548404395580292,
|
|
"step": 3285,
|
|
"valid_targets_mean": 4849.8,
|
|
"valid_targets_min": 3997
|
|
},
|
|
{
|
|
"epoch": 5.1568627450980395,
|
|
"grad_norm": 0.4419936793380277,
|
|
"learning_rate": 7.88428797021559e-06,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1079975962638855,
|
|
"step": 3290,
|
|
"valid_targets_mean": 6218.1,
|
|
"valid_targets_min": 3660
|
|
},
|
|
{
|
|
"epoch": 5.1647058823529415,
|
|
"grad_norm": 0.49318666357923957,
|
|
"learning_rate": 7.82218761240065e-06,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13725179433822632,
|
|
"step": 3295,
|
|
"valid_targets_mean": 4569.8,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 5.172549019607843,
|
|
"grad_norm": 0.5487376533142131,
|
|
"learning_rate": 7.760273280080282e-06,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10099410265684128,
|
|
"step": 3300,
|
|
"valid_targets_mean": 2956.1,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 5.180392156862745,
|
|
"grad_norm": 0.4882888789982505,
|
|
"learning_rate": 7.69854591904374e-06,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10672858357429504,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4359.8,
|
|
"valid_targets_min": 2734
|
|
},
|
|
{
|
|
"epoch": 5.188235294117647,
|
|
"grad_norm": 0.4976788184527717,
|
|
"learning_rate": 7.637006472224173e-06,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13328954577445984,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4612.8,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 5.196078431372549,
|
|
"grad_norm": 0.4820982572321674,
|
|
"learning_rate": 7.575655879684192e-06,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11873605847358704,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4777.1,
|
|
"valid_targets_min": 4313
|
|
},
|
|
{
|
|
"epoch": 5.203921568627451,
|
|
"grad_norm": 0.44038543583962614,
|
|
"learning_rate": 7.514495078601492e-06,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08787596970796585,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4427.4,
|
|
"valid_targets_min": 3267
|
|
},
|
|
{
|
|
"epoch": 5.211764705882353,
|
|
"grad_norm": 0.4544569376688998,
|
|
"learning_rate": 7.453525003254585e-06,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11807404458522797,
|
|
"step": 3325,
|
|
"valid_targets_mean": 5929.4,
|
|
"valid_targets_min": 3780
|
|
},
|
|
{
|
|
"epoch": 5.219607843137255,
|
|
"grad_norm": 0.4277976984406487,
|
|
"learning_rate": 7.39274658500847e-06,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1250154972076416,
|
|
"step": 3330,
|
|
"valid_targets_mean": 5617.8,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 5.227450980392157,
|
|
"grad_norm": 0.47873062684918105,
|
|
"learning_rate": 7.33216075230047e-06,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11199977993965149,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4802.5,
|
|
"valid_targets_min": 3568
|
|
},
|
|
{
|
|
"epoch": 5.235294117647059,
|
|
"grad_norm": 0.4604469615059107,
|
|
"learning_rate": 7.271768430625983e-06,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11360803246498108,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4633.6,
|
|
"valid_targets_min": 3254
|
|
},
|
|
{
|
|
"epoch": 5.243137254901961,
|
|
"grad_norm": 0.4750762705157017,
|
|
"learning_rate": 7.2115705425243996e-06,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09895454347133636,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4430.6,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 5.250980392156863,
|
|
"grad_norm": 0.43770437150223573,
|
|
"learning_rate": 7.151568007564962e-06,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11433374136686325,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4682.2,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 5.258823529411765,
|
|
"grad_norm": 0.43083632332802724,
|
|
"learning_rate": 7.091761742332786e-06,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12176376581192017,
|
|
"step": 3355,
|
|
"valid_targets_mean": 5216.8,
|
|
"valid_targets_min": 4379
|
|
},
|
|
{
|
|
"epoch": 5.266666666666667,
|
|
"grad_norm": 0.44657728131301777,
|
|
"learning_rate": 7.032152660414764e-06,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12760856747627258,
|
|
"step": 3360,
|
|
"valid_targets_mean": 5033.1,
|
|
"valid_targets_min": 3806
|
|
},
|
|
{
|
|
"epoch": 5.2745098039215685,
|
|
"grad_norm": 0.4311298801885589,
|
|
"learning_rate": 6.972741672385699e-06,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10898947715759277,
|
|
"step": 3365,
|
|
"valid_targets_mean": 4410.0,
|
|
"valid_targets_min": 3384
|
|
},
|
|
{
|
|
"epoch": 5.2823529411764705,
|
|
"grad_norm": 0.4498648501684603,
|
|
"learning_rate": 6.913529685794333e-06,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09574690461158752,
|
|
"step": 3370,
|
|
"valid_targets_mean": 4651.5,
|
|
"valid_targets_min": 4220
|
|
},
|
|
{
|
|
"epoch": 5.290196078431372,
|
|
"grad_norm": 0.44940116777056116,
|
|
"learning_rate": 6.854517605149526e-06,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10460729897022247,
|
|
"step": 3375,
|
|
"valid_targets_mean": 4497.6,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.298039215686274,
|
|
"grad_norm": 1.1413022811365738,
|
|
"learning_rate": 6.795706331906402e-06,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12758633494377136,
|
|
"step": 3380,
|
|
"valid_targets_mean": 5067.9,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 5.305882352941176,
|
|
"grad_norm": 0.46454795022553935,
|
|
"learning_rate": 6.737096764452609e-06,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14404542744159698,
|
|
"step": 3385,
|
|
"valid_targets_mean": 5265.8,
|
|
"valid_targets_min": 3597
|
|
},
|
|
{
|
|
"epoch": 5.313725490196078,
|
|
"grad_norm": 0.46217048196647176,
|
|
"learning_rate": 6.678689798094582e-06,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15968047082424164,
|
|
"step": 3390,
|
|
"valid_targets_mean": 5395.5,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 5.32156862745098,
|
|
"grad_norm": 0.45958861438426907,
|
|
"learning_rate": 6.620486325043871e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11720308661460876,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4572.5,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 5.329411764705882,
|
|
"grad_norm": 0.4957399411913295,
|
|
"learning_rate": 6.562487234403492e-06,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07668491452932358,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3854.9,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 5.337254901960784,
|
|
"grad_norm": 0.4875761006061657,
|
|
"learning_rate": 6.504693412154384e-06,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13727839291095734,
|
|
"step": 3405,
|
|
"valid_targets_mean": 5214.8,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 5.345098039215686,
|
|
"grad_norm": 0.49759533738805584,
|
|
"learning_rate": 6.447105741141828e-06,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12163832783699036,
|
|
"step": 3410,
|
|
"valid_targets_mean": 4840.9,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 5.352941176470588,
|
|
"grad_norm": 0.4349334170423276,
|
|
"learning_rate": 6.389725101062017e-06,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10749147087335587,
|
|
"step": 3415,
|
|
"valid_targets_mean": 4416.5,
|
|
"valid_targets_min": 3391
|
|
},
|
|
{
|
|
"epoch": 5.36078431372549,
|
|
"grad_norm": 0.4421960081035466,
|
|
"learning_rate": 6.332552368448552e-06,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09613317251205444,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4727.2,
|
|
"valid_targets_min": 3663
|
|
},
|
|
{
|
|
"epoch": 5.368627450980393,
|
|
"grad_norm": 0.4358667812308664,
|
|
"learning_rate": 6.275588416659111e-06,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11117342114448547,
|
|
"step": 3425,
|
|
"valid_targets_mean": 6231.6,
|
|
"valid_targets_min": 4385
|
|
},
|
|
{
|
|
"epoch": 5.376470588235295,
|
|
"grad_norm": 0.41219425712164226,
|
|
"learning_rate": 6.218834115862082e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13920395076274872,
|
|
"step": 3430,
|
|
"valid_targets_mean": 7189.1,
|
|
"valid_targets_min": 4094
|
|
},
|
|
{
|
|
"epoch": 5.3843137254901965,
|
|
"grad_norm": 0.4514069531314856,
|
|
"learning_rate": 6.162290333023255e-06,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08958177268505096,
|
|
"step": 3435,
|
|
"valid_targets_mean": 4465.5,
|
|
"valid_targets_min": 3624
|
|
},
|
|
{
|
|
"epoch": 5.392156862745098,
|
|
"grad_norm": 0.4530827675293336,
|
|
"learning_rate": 6.105957931892619e-06,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10022272169589996,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4265.6,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 5.4,
|
|
"grad_norm": 0.4855159356512361,
|
|
"learning_rate": 6.049837772991119e-06,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10491837561130524,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4224.2,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 5.407843137254902,
|
|
"grad_norm": 0.4617453629113387,
|
|
"learning_rate": 5.99393071359756e-06,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10774972289800644,
|
|
"step": 3450,
|
|
"valid_targets_mean": 4938.9,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 5.415686274509804,
|
|
"grad_norm": 0.4552969706980452,
|
|
"learning_rate": 5.93823760773546e-06,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12159918248653412,
|
|
"step": 3455,
|
|
"valid_targets_mean": 4968.8,
|
|
"valid_targets_min": 3542
|
|
},
|
|
{
|
|
"epoch": 5.423529411764706,
|
|
"grad_norm": 0.45149853825685593,
|
|
"learning_rate": 5.882759306160071e-06,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12657040357589722,
|
|
"step": 3460,
|
|
"valid_targets_mean": 5412.6,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 5.431372549019608,
|
|
"grad_norm": 0.46721949213155406,
|
|
"learning_rate": 5.827496656345299e-06,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13758181035518646,
|
|
"step": 3465,
|
|
"valid_targets_mean": 5169.2,
|
|
"valid_targets_min": 3526
|
|
},
|
|
{
|
|
"epoch": 5.43921568627451,
|
|
"grad_norm": 0.5036346273394595,
|
|
"learning_rate": 5.772450502470841e-06,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11827219277620316,
|
|
"step": 3470,
|
|
"valid_targets_mean": 4207.4,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 5.447058823529412,
|
|
"grad_norm": 0.475968320196414,
|
|
"learning_rate": 5.71762168540922e-06,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11889398843050003,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4802.8,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 5.454901960784314,
|
|
"grad_norm": 0.44599854272970313,
|
|
"learning_rate": 5.663011042712996e-06,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08588573336601257,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3836.0,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 5.462745098039216,
|
|
"grad_norm": 0.45285098433177345,
|
|
"learning_rate": 5.608619408601925e-06,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12081587314605713,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5519.2,
|
|
"valid_targets_min": 2934
|
|
},
|
|
{
|
|
"epoch": 5.470588235294118,
|
|
"grad_norm": 0.47332504195281055,
|
|
"learning_rate": 5.554447613950262e-06,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0917874276638031,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4277.1,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 5.47843137254902,
|
|
"grad_norm": 0.48931017360833473,
|
|
"learning_rate": 5.500496486274014e-06,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11668972671031952,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4985.4,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 5.486274509803922,
|
|
"grad_norm": 0.4705093783132721,
|
|
"learning_rate": 5.446766849718368e-06,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12192535400390625,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4844.0,
|
|
"valid_targets_min": 3442
|
|
},
|
|
{
|
|
"epoch": 5.4941176470588236,
|
|
"grad_norm": 0.47597725377797906,
|
|
"learning_rate": 5.393259525045025e-06,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12520119547843933,
|
|
"step": 3505,
|
|
"valid_targets_mean": 5064.5,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 5.5019607843137255,
|
|
"grad_norm": 0.5038215817205175,
|
|
"learning_rate": 5.3399753296197286e-06,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12013528496026993,
|
|
"step": 3510,
|
|
"valid_targets_mean": 6281.8,
|
|
"valid_targets_min": 3326
|
|
},
|
|
{
|
|
"epoch": 5.509803921568627,
|
|
"grad_norm": 0.45078764380258896,
|
|
"learning_rate": 5.286915077399728e-06,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1223902776837349,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4933.1,
|
|
"valid_targets_min": 3593
|
|
},
|
|
{
|
|
"epoch": 5.517647058823529,
|
|
"grad_norm": 0.44669914984891373,
|
|
"learning_rate": 5.2340795789213896e-06,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11959988623857498,
|
|
"step": 3520,
|
|
"valid_targets_mean": 5021.1,
|
|
"valid_targets_min": 4010
|
|
},
|
|
{
|
|
"epoch": 5.525490196078431,
|
|
"grad_norm": 0.47572915971838303,
|
|
"learning_rate": 5.1814696412877665e-06,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09711023420095444,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4000.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.533333333333333,
|
|
"grad_norm": 0.41059665549581065,
|
|
"learning_rate": 5.1290860681563195e-06,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11966368556022644,
|
|
"step": 3530,
|
|
"valid_targets_mean": 5863.4,
|
|
"valid_targets_min": 3710
|
|
},
|
|
{
|
|
"epoch": 5.541176470588235,
|
|
"grad_norm": 0.47801376036726195,
|
|
"learning_rate": 5.0769296597266126e-06,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09640462696552277,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4194.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 5.549019607843137,
|
|
"grad_norm": 0.47193724093712663,
|
|
"learning_rate": 5.0250012127280755e-06,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14034606516361237,
|
|
"step": 3540,
|
|
"valid_targets_mean": 5527.6,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 5.556862745098039,
|
|
"grad_norm": 0.4187684550749266,
|
|
"learning_rate": 4.973301520407874e-06,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11018697917461395,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4931.5,
|
|
"valid_targets_min": 3822
|
|
},
|
|
{
|
|
"epoch": 5.564705882352941,
|
|
"grad_norm": 0.43807197134211445,
|
|
"learning_rate": 4.921831372518751e-06,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10117003321647644,
|
|
"step": 3550,
|
|
"valid_targets_mean": 4589.4,
|
|
"valid_targets_min": 3745
|
|
},
|
|
{
|
|
"epoch": 5.572549019607843,
|
|
"grad_norm": 0.4295405081891386,
|
|
"learning_rate": 4.870591555306996e-06,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1253001093864441,
|
|
"step": 3555,
|
|
"valid_targets_mean": 5302.1,
|
|
"valid_targets_min": 3917
|
|
},
|
|
{
|
|
"epoch": 5.580392156862745,
|
|
"grad_norm": 0.40909809421081306,
|
|
"learning_rate": 4.819582851500406e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1117587685585022,
|
|
"step": 3560,
|
|
"valid_targets_mean": 5665.6,
|
|
"valid_targets_min": 3842
|
|
},
|
|
{
|
|
"epoch": 5.588235294117647,
|
|
"grad_norm": 0.4126217968143279,
|
|
"learning_rate": 4.768806040296348e-06,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10420885682106018,
|
|
"step": 3565,
|
|
"valid_targets_mean": 5147.0,
|
|
"valid_targets_min": 4120
|
|
},
|
|
{
|
|
"epoch": 5.596078431372549,
|
|
"grad_norm": 0.5182915809260275,
|
|
"learning_rate": 4.718261897349856e-06,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1196608915925026,
|
|
"step": 3570,
|
|
"valid_targets_mean": 5651.6,
|
|
"valid_targets_min": 3699
|
|
},
|
|
{
|
|
"epoch": 5.603921568627451,
|
|
"grad_norm": 0.43527217699842075,
|
|
"learning_rate": 4.667951194761773e-06,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1106809750199318,
|
|
"step": 3575,
|
|
"valid_targets_mean": 5147.1,
|
|
"valid_targets_min": 3362
|
|
},
|
|
{
|
|
"epoch": 5.6117647058823525,
|
|
"grad_norm": 0.48279027412203945,
|
|
"learning_rate": 4.6178747010669485e-06,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0991939902305603,
|
|
"step": 3580,
|
|
"valid_targets_mean": 5145.6,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 5.6196078431372545,
|
|
"grad_norm": 0.4410615828340391,
|
|
"learning_rate": 4.5680331812225335e-06,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11643695831298828,
|
|
"step": 3585,
|
|
"valid_targets_mean": 6146.0,
|
|
"valid_targets_min": 4295
|
|
},
|
|
{
|
|
"epoch": 5.627450980392156,
|
|
"grad_norm": 0.43364067999896105,
|
|
"learning_rate": 4.518427396596246e-06,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09558488428592682,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4636.6,
|
|
"valid_targets_min": 3449
|
|
},
|
|
{
|
|
"epoch": 5.635294117647058,
|
|
"grad_norm": 0.45945254451484613,
|
|
"learning_rate": 4.469058104954786e-06,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10179445147514343,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4072.9,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 5.643137254901961,
|
|
"grad_norm": 0.43883900146162236,
|
|
"learning_rate": 4.4199260604522266e-06,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09043440222740173,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4496.6,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 5.650980392156863,
|
|
"grad_norm": 0.45306683758184596,
|
|
"learning_rate": 4.371032013618519e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12673869729042053,
|
|
"step": 3605,
|
|
"valid_targets_mean": 5324.5,
|
|
"valid_targets_min": 3326
|
|
},
|
|
{
|
|
"epoch": 5.658823529411765,
|
|
"grad_norm": 0.43266837582367107,
|
|
"learning_rate": 4.322376711348009e-06,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11335854977369308,
|
|
"step": 3610,
|
|
"valid_targets_mean": 5719.5,
|
|
"valid_targets_min": 4228
|
|
},
|
|
{
|
|
"epoch": 5.666666666666667,
|
|
"grad_norm": 0.4613499468410268,
|
|
"learning_rate": 4.273960896888045e-06,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1092272475361824,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4711.0,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 5.674509803921569,
|
|
"grad_norm": 0.4439816008991479,
|
|
"learning_rate": 4.225785309827595e-06,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1643529236316681,
|
|
"step": 3620,
|
|
"valid_targets_mean": 7468.0,
|
|
"valid_targets_min": 4598
|
|
},
|
|
{
|
|
"epoch": 5.682352941176471,
|
|
"grad_norm": 0.435670122408513,
|
|
"learning_rate": 4.177850686085987e-06,
|
|
"loss": 0.2387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0981040745973587,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5326.0,
|
|
"valid_targets_min": 3710
|
|
},
|
|
{
|
|
"epoch": 5.690196078431373,
|
|
"grad_norm": 0.42982650741832645,
|
|
"learning_rate": 4.130157757901632e-06,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.088326595723629,
|
|
"step": 3630,
|
|
"valid_targets_mean": 4358.0,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 5.698039215686275,
|
|
"grad_norm": 0.4274034250053087,
|
|
"learning_rate": 4.0827072538208705e-06,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09751100093126297,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4744.9,
|
|
"valid_targets_min": 3655
|
|
},
|
|
{
|
|
"epoch": 5.705882352941177,
|
|
"grad_norm": 0.44871305535715067,
|
|
"learning_rate": 4.03549989868681e-06,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1253272145986557,
|
|
"step": 3640,
|
|
"valid_targets_mean": 5274.1,
|
|
"valid_targets_min": 4356
|
|
},
|
|
{
|
|
"epoch": 5.713725490196079,
|
|
"grad_norm": 0.43395756570056015,
|
|
"learning_rate": 3.988536413628286e-06,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11936426162719727,
|
|
"step": 3645,
|
|
"valid_targets_mean": 4859.6,
|
|
"valid_targets_min": 4330
|
|
},
|
|
{
|
|
"epoch": 5.7215686274509805,
|
|
"grad_norm": 0.4446433101279566,
|
|
"learning_rate": 3.941817516048827e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13166441023349762,
|
|
"step": 3650,
|
|
"valid_targets_mean": 5126.4,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 5.729411764705882,
|
|
"grad_norm": 0.4382133559962375,
|
|
"learning_rate": 3.895343919615686e-06,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09739711880683899,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4727.8,
|
|
"valid_targets_min": 3603
|
|
},
|
|
{
|
|
"epoch": 5.737254901960784,
|
|
"grad_norm": 0.43768199216496817,
|
|
"learning_rate": 3.849116334248972e-06,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12876223027706146,
|
|
"step": 3660,
|
|
"valid_targets_mean": 5489.1,
|
|
"valid_targets_min": 4068
|
|
},
|
|
{
|
|
"epoch": 5.745098039215686,
|
|
"grad_norm": 0.6386090470107243,
|
|
"learning_rate": 3.803135466110761e-06,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14873090386390686,
|
|
"step": 3665,
|
|
"valid_targets_mean": 6416.6,
|
|
"valid_targets_min": 3531
|
|
},
|
|
{
|
|
"epoch": 5.752941176470588,
|
|
"grad_norm": 0.46699010202113966,
|
|
"learning_rate": 3.7574020175943514e-06,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352880597114563,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4905.2,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 5.76078431372549,
|
|
"grad_norm": 0.44840009115628654,
|
|
"learning_rate": 3.7119166873135102e-06,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11027860641479492,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4478.0,
|
|
"valid_targets_min": 3613
|
|
},
|
|
{
|
|
"epoch": 5.768627450980392,
|
|
"grad_norm": 0.42512751711371577,
|
|
"learning_rate": 3.666680170091812e-06,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09653453528881073,
|
|
"step": 3680,
|
|
"valid_targets_mean": 4997.0,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.776470588235294,
|
|
"grad_norm": 0.4484558435127659,
|
|
"learning_rate": 3.6216931569519995e-06,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12785814702510834,
|
|
"step": 3685,
|
|
"valid_targets_mean": 5875.6,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 5.784313725490196,
|
|
"grad_norm": 0.4540984960968203,
|
|
"learning_rate": 3.576956335105477e-06,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1252613067626953,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4812.2,
|
|
"valid_targets_min": 4032
|
|
},
|
|
{
|
|
"epoch": 5.792156862745098,
|
|
"grad_norm": 0.4870289857866998,
|
|
"learning_rate": 3.5324703879417553e-06,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12756261229515076,
|
|
"step": 3695,
|
|
"valid_targets_mean": 4419.8,
|
|
"valid_targets_min": 3059
|
|
},
|
|
{
|
|
"epoch": 5.8,
|
|
"grad_norm": 0.43812883554424525,
|
|
"learning_rate": 3.488235995018063e-06,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10658499598503113,
|
|
"step": 3700,
|
|
"valid_targets_mean": 5321.4,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 5.807843137254902,
|
|
"grad_norm": 0.44563915612184524,
|
|
"learning_rate": 3.444253832048925e-06,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12261335551738739,
|
|
"step": 3705,
|
|
"valid_targets_mean": 5663.4,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 5.815686274509804,
|
|
"grad_norm": 0.4370119672813637,
|
|
"learning_rate": 3.4005245708958757e-06,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10031146556138992,
|
|
"step": 3710,
|
|
"valid_targets_mean": 5337.1,
|
|
"valid_targets_min": 3646
|
|
},
|
|
{
|
|
"epoch": 5.823529411764706,
|
|
"grad_norm": 0.4677568722641574,
|
|
"learning_rate": 3.3570488795571653e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10272514075040817,
|
|
"step": 3715,
|
|
"valid_targets_mean": 4458.5,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 5.8313725490196076,
|
|
"grad_norm": 0.42901369163627306,
|
|
"learning_rate": 3.313827422157583e-06,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11962015926837921,
|
|
"step": 3720,
|
|
"valid_targets_mean": 5243.4,
|
|
"valid_targets_min": 4273
|
|
},
|
|
{
|
|
"epoch": 5.8392156862745095,
|
|
"grad_norm": 0.4508229218395516,
|
|
"learning_rate": 3.2708608589382783e-06,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1208086609840393,
|
|
"step": 3725,
|
|
"valid_targets_mean": 5224.9,
|
|
"valid_targets_min": 3941
|
|
},
|
|
{
|
|
"epoch": 5.847058823529411,
|
|
"grad_norm": 0.5022818108642572,
|
|
"learning_rate": 3.2281498462467177e-06,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12419945746660233,
|
|
"step": 3730,
|
|
"valid_targets_mean": 4348.6,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 5.854901960784313,
|
|
"grad_norm": 0.4245003597578431,
|
|
"learning_rate": 3.185695036526615e-06,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09119582921266556,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4289.4,
|
|
"valid_targets_min": 3739
|
|
},
|
|
{
|
|
"epoch": 5.862745098039216,
|
|
"grad_norm": 0.44941318555493776,
|
|
"learning_rate": 3.1434970783080043e-06,
|
|
"loss": 0.2172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11358950287103653,
|
|
"step": 3740,
|
|
"valid_targets_mean": 5198.1,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 5.870588235294118,
|
|
"grad_norm": 0.4461123820613788,
|
|
"learning_rate": 3.1015566161972967e-06,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.125828817486763,
|
|
"step": 3745,
|
|
"valid_targets_mean": 5063.1,
|
|
"valid_targets_min": 3888
|
|
},
|
|
{
|
|
"epoch": 5.87843137254902,
|
|
"grad_norm": 0.4383491042478554,
|
|
"learning_rate": 3.059874290867464e-06,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12822286784648895,
|
|
"step": 3750,
|
|
"valid_targets_mean": 5993.8,
|
|
"valid_targets_min": 3910
|
|
},
|
|
{
|
|
"epoch": 5.886274509803922,
|
|
"grad_norm": 0.48015360297300713,
|
|
"learning_rate": 3.01845073904824e-06,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10203864425420761,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3913.8,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 5.894117647058824,
|
|
"grad_norm": 0.45844901007133243,
|
|
"learning_rate": 2.977286593516382e-06,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11280177533626556,
|
|
"step": 3760,
|
|
"valid_targets_mean": 5098.1,
|
|
"valid_targets_min": 3774
|
|
},
|
|
{
|
|
"epoch": 5.901960784313726,
|
|
"grad_norm": 0.4286501111208385,
|
|
"learning_rate": 2.936382483086022e-06,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09490187466144562,
|
|
"step": 3765,
|
|
"valid_targets_mean": 5090.5,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 5.909803921568628,
|
|
"grad_norm": 0.42920138358916626,
|
|
"learning_rate": 2.895739032599065e-06,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10762906074523926,
|
|
"step": 3770,
|
|
"valid_targets_mean": 5082.1,
|
|
"valid_targets_min": 3741
|
|
},
|
|
{
|
|
"epoch": 5.91764705882353,
|
|
"grad_norm": 0.4398887995970251,
|
|
"learning_rate": 2.8553568629156124e-06,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11214582622051239,
|
|
"step": 3775,
|
|
"valid_targets_mean": 5284.2,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 5.925490196078432,
|
|
"grad_norm": 0.45162955393842535,
|
|
"learning_rate": 2.815236590904522e-06,
|
|
"loss": 0.2183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13448527455329895,
|
|
"step": 3780,
|
|
"valid_targets_mean": 5756.5,
|
|
"valid_targets_min": 3362
|
|
},
|
|
{
|
|
"epoch": 5.933333333333334,
|
|
"grad_norm": 0.4622181433491908,
|
|
"learning_rate": 2.7753788294339412e-06,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10792221128940582,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4253.6,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 5.9411764705882355,
|
|
"grad_norm": 0.4478237372592683,
|
|
"learning_rate": 2.7357841873619804e-06,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10102271288633347,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4683.2,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 5.949019607843137,
|
|
"grad_norm": 0.43853988764646357,
|
|
"learning_rate": 2.6964532695273993e-06,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10243266820907593,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4895.5,
|
|
"valid_targets_min": 3691
|
|
},
|
|
{
|
|
"epoch": 5.956862745098039,
|
|
"grad_norm": 0.4726658375108929,
|
|
"learning_rate": 2.65738667674035e-06,
|
|
"loss": 0.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1146373450756073,
|
|
"step": 3800,
|
|
"valid_targets_mean": 5202.2,
|
|
"valid_targets_min": 4291
|
|
},
|
|
{
|
|
"epoch": 5.964705882352941,
|
|
"grad_norm": 0.46539138231794835,
|
|
"learning_rate": 2.6185850057732377e-06,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1239728182554245,
|
|
"step": 3805,
|
|
"valid_targets_mean": 5232.4,
|
|
"valid_targets_min": 3835
|
|
},
|
|
{
|
|
"epoch": 5.972549019607843,
|
|
"grad_norm": 0.4306305835407593,
|
|
"learning_rate": 2.5800488493515576e-06,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12047439068555832,
|
|
"step": 3810,
|
|
"valid_targets_mean": 5481.5,
|
|
"valid_targets_min": 3927
|
|
},
|
|
{
|
|
"epoch": 5.980392156862745,
|
|
"grad_norm": 0.47445010531505516,
|
|
"learning_rate": 2.5417787961448893e-06,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11555356532335281,
|
|
"step": 3815,
|
|
"valid_targets_mean": 5475.6,
|
|
"valid_targets_min": 4163
|
|
},
|
|
{
|
|
"epoch": 5.988235294117647,
|
|
"grad_norm": 0.4687655850135902,
|
|
"learning_rate": 2.5037754307578553e-06,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12887445092201233,
|
|
"step": 3820,
|
|
"valid_targets_mean": 5129.9,
|
|
"valid_targets_min": 3500
|
|
},
|
|
{
|
|
"epoch": 5.996078431372549,
|
|
"grad_norm": 0.4549710469894779,
|
|
"learning_rate": 2.466039333721246e-06,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14167582988739014,
|
|
"step": 3825,
|
|
"valid_targets_mean": 5541.9,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 6.003137254901961,
|
|
"grad_norm": 0.4778427742654853,
|
|
"learning_rate": 2.4285710814830908e-06,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11886697262525558,
|
|
"step": 3830,
|
|
"valid_targets_mean": 5217.9,
|
|
"valid_targets_min": 3748
|
|
},
|
|
{
|
|
"epoch": 6.0109803921568625,
|
|
"grad_norm": 0.395580018605638,
|
|
"learning_rate": 2.3913712463999026e-06,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12913060188293457,
|
|
"step": 3835,
|
|
"valid_targets_mean": 7408.9,
|
|
"valid_targets_min": 4043
|
|
},
|
|
{
|
|
"epoch": 6.0188235294117645,
|
|
"grad_norm": 0.3955623474862445,
|
|
"learning_rate": 2.354440396727902e-06,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11199808120727539,
|
|
"step": 3840,
|
|
"valid_targets_mean": 5850.1,
|
|
"valid_targets_min": 3770
|
|
},
|
|
{
|
|
"epoch": 6.026666666666666,
|
|
"grad_norm": 0.45188900717094593,
|
|
"learning_rate": 2.3177790966143653e-06,
|
|
"loss": 0.2109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11194856464862823,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4627.0,
|
|
"valid_targets_min": 3205
|
|
},
|
|
{
|
|
"epoch": 6.034509803921568,
|
|
"grad_norm": 0.4498151749329847,
|
|
"learning_rate": 2.28138790608897e-06,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09413829445838928,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4603.8,
|
|
"valid_targets_min": 2856
|
|
},
|
|
{
|
|
"epoch": 6.04235294117647,
|
|
"grad_norm": 0.3976619572773509,
|
|
"learning_rate": 2.2452673810552783e-06,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11031914502382278,
|
|
"step": 3855,
|
|
"valid_targets_mean": 5604.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 6.050196078431372,
|
|
"grad_norm": 0.4524586272105886,
|
|
"learning_rate": 2.2094180732822235e-06,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11842180788516998,
|
|
"step": 3860,
|
|
"valid_targets_mean": 5268.5,
|
|
"valid_targets_min": 3291
|
|
},
|
|
{
|
|
"epoch": 6.058039215686274,
|
|
"grad_norm": 0.44553350617251575,
|
|
"learning_rate": 2.1738405303956877e-06,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11821208894252777,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4652.0,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 6.065882352941176,
|
|
"grad_norm": 0.43295677319483183,
|
|
"learning_rate": 2.138535295870121e-06,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10608823597431183,
|
|
"step": 3870,
|
|
"valid_targets_mean": 4835.5,
|
|
"valid_targets_min": 3781
|
|
},
|
|
{
|
|
"epoch": 6.073725490196079,
|
|
"grad_norm": 0.45438458730551395,
|
|
"learning_rate": 2.1035029090202717e-06,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13026057183742523,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4839.9,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 6.081568627450981,
|
|
"grad_norm": 0.4345947842274597,
|
|
"learning_rate": 2.0687439049929113e-06,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11360672116279602,
|
|
"step": 3880,
|
|
"valid_targets_mean": 5265.2,
|
|
"valid_targets_min": 2579
|
|
},
|
|
{
|
|
"epoch": 6.089411764705883,
|
|
"grad_norm": 0.45271684865929085,
|
|
"learning_rate": 2.034258814758696e-06,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15223872661590576,
|
|
"step": 3885,
|
|
"valid_targets_mean": 7243.0,
|
|
"valid_targets_min": 3673
|
|
},
|
|
{
|
|
"epoch": 6.097254901960785,
|
|
"grad_norm": 0.4447514877525832,
|
|
"learning_rate": 2.0000481651040182e-06,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11474558711051941,
|
|
"step": 3890,
|
|
"valid_targets_mean": 4506.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 6.105098039215687,
|
|
"grad_norm": 0.4543298228190471,
|
|
"learning_rate": 1.96611247862299e-06,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11225864291191101,
|
|
"step": 3895,
|
|
"valid_targets_mean": 4808.0,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 6.1129411764705885,
|
|
"grad_norm": 0.39971827457975123,
|
|
"learning_rate": 1.932452273709453e-06,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10475362092256546,
|
|
"step": 3900,
|
|
"valid_targets_mean": 5980.1,
|
|
"valid_targets_min": 4308
|
|
},
|
|
{
|
|
"epoch": 6.1207843137254905,
|
|
"grad_norm": 0.4739608943853064,
|
|
"learning_rate": 1.8990680645490389e-06,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11026961356401443,
|
|
"step": 3905,
|
|
"valid_targets_mean": 4273.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 6.128627450980392,
|
|
"grad_norm": 0.4397719072306824,
|
|
"learning_rate": 1.8659603611113475e-06,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09811584651470184,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4635.4,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 6.136470588235294,
|
|
"grad_norm": 0.41433117956865595,
|
|
"learning_rate": 1.833129669142124e-06,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11510950326919556,
|
|
"step": 3915,
|
|
"valid_targets_mean": 6398.9,
|
|
"valid_targets_min": 4249
|
|
},
|
|
{
|
|
"epoch": 6.144313725490196,
|
|
"grad_norm": 0.4477196069615066,
|
|
"learning_rate": 1.8005764901555678e-06,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10999945551156998,
|
|
"step": 3920,
|
|
"valid_targets_mean": 5338.1,
|
|
"valid_targets_min": 3817
|
|
},
|
|
{
|
|
"epoch": 6.152156862745098,
|
|
"grad_norm": 0.4659311481730028,
|
|
"learning_rate": 1.7683013214266354e-06,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1519470512866974,
|
|
"step": 3925,
|
|
"valid_targets_mean": 5332.6,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 6.16,
|
|
"grad_norm": 0.4087147886719595,
|
|
"learning_rate": 1.7363046559834718e-06,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11720842123031616,
|
|
"step": 3930,
|
|
"valid_targets_mean": 6784.1,
|
|
"valid_targets_min": 4317
|
|
},
|
|
{
|
|
"epoch": 6.167843137254902,
|
|
"grad_norm": 0.4639434413306962,
|
|
"learning_rate": 1.7045869825998674e-06,
|
|
"loss": 0.216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09481732547283173,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3903.6,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 6.175686274509804,
|
|
"grad_norm": 0.5157394755392122,
|
|
"learning_rate": 1.6731487857877971e-06,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15734745562076569,
|
|
"step": 3940,
|
|
"valid_targets_mean": 5625.9,
|
|
"valid_targets_min": 3955
|
|
},
|
|
{
|
|
"epoch": 6.183529411764706,
|
|
"grad_norm": 0.42881466610280083,
|
|
"learning_rate": 1.641990545790002e-06,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1048150509595871,
|
|
"step": 3945,
|
|
"valid_targets_mean": 5696.6,
|
|
"valid_targets_min": 3260
|
|
},
|
|
{
|
|
"epoch": 6.191372549019608,
|
|
"grad_norm": 0.4278747106246206,
|
|
"learning_rate": 1.6111127385726844e-06,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11325961351394653,
|
|
"step": 3950,
|
|
"valid_targets_mean": 5047.4,
|
|
"valid_targets_min": 3681
|
|
},
|
|
{
|
|
"epoch": 6.19921568627451,
|
|
"grad_norm": 0.4386402336816314,
|
|
"learning_rate": 1.5805158358182039e-06,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09711767733097076,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4529.9,
|
|
"valid_targets_min": 3430
|
|
},
|
|
{
|
|
"epoch": 6.207058823529412,
|
|
"grad_norm": 0.46199348988460937,
|
|
"learning_rate": 1.550200304917897e-06,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10279853641986847,
|
|
"step": 3960,
|
|
"valid_targets_mean": 4843.8,
|
|
"valid_targets_min": 3220
|
|
},
|
|
{
|
|
"epoch": 6.214901960784314,
|
|
"grad_norm": 0.4764356306837442,
|
|
"learning_rate": 1.5201666089649193e-06,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09680809825658798,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4384.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 6.222745098039216,
|
|
"grad_norm": 0.4816928951210748,
|
|
"learning_rate": 1.4904152067471932e-06,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10596531629562378,
|
|
"step": 3970,
|
|
"valid_targets_mean": 4047.4,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 6.2305882352941175,
|
|
"grad_norm": 0.48260539647111306,
|
|
"learning_rate": 1.4609465527403722e-06,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09708277136087418,
|
|
"step": 3975,
|
|
"valid_targets_mean": 4819.9,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 6.2384313725490195,
|
|
"grad_norm": 0.5138669142577384,
|
|
"learning_rate": 1.431761097100921e-06,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11067366600036621,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3684.1,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 6.246274509803921,
|
|
"grad_norm": 0.5321119233927291,
|
|
"learning_rate": 1.4028592856592239e-06,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11594337224960327,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4429.8,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 6.254117647058823,
|
|
"grad_norm": 0.413032114332087,
|
|
"learning_rate": 1.374241559912788e-06,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11865642666816711,
|
|
"step": 3990,
|
|
"valid_targets_mean": 5541.2,
|
|
"valid_targets_min": 4575
|
|
},
|
|
{
|
|
"epoch": 6.261960784313725,
|
|
"grad_norm": 0.46459597693737414,
|
|
"learning_rate": 1.345908357019483e-06,
|
|
"loss": 0.2079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09667669236660004,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4013.4,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 6.269803921568627,
|
|
"grad_norm": 0.48191161876468613,
|
|
"learning_rate": 1.3178601097908827e-06,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09473130851984024,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4327.1,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 6.277647058823529,
|
|
"grad_norm": 0.4582088988911848,
|
|
"learning_rate": 1.2900972466856377e-06,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10878792405128479,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4861.2,
|
|
"valid_targets_min": 3449
|
|
},
|
|
{
|
|
"epoch": 6.285490196078431,
|
|
"grad_norm": 0.46547760493544904,
|
|
"learning_rate": 1.2626201918029301e-06,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11429126560688019,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4991.2,
|
|
"valid_targets_min": 3913
|
|
},
|
|
{
|
|
"epoch": 6.293333333333333,
|
|
"grad_norm": 0.4760840143081298,
|
|
"learning_rate": 1.235429364876015e-06,
|
|
"loss": 0.2169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11159655451774597,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4995.0,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 6.301176470588235,
|
|
"grad_norm": 0.4255504180851961,
|
|
"learning_rate": 1.2085251812657762e-06,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10898862034082413,
|
|
"step": 4020,
|
|
"valid_targets_mean": 5228.6,
|
|
"valid_targets_min": 3682
|
|
},
|
|
{
|
|
"epoch": 6.309019607843137,
|
|
"grad_norm": 0.4599513705632692,
|
|
"learning_rate": 1.1819080519544123e-06,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11290080845355988,
|
|
"step": 4025,
|
|
"valid_targets_mean": 4730.2,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 6.316862745098039,
|
|
"grad_norm": 0.4465738842979211,
|
|
"learning_rate": 1.155578383539142e-06,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10520611703395844,
|
|
"step": 4030,
|
|
"valid_targets_mean": 5691.1,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 6.324705882352941,
|
|
"grad_norm": 0.500638438402799,
|
|
"learning_rate": 1.1295365782259982e-06,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13809996843338013,
|
|
"step": 4035,
|
|
"valid_targets_mean": 6115.6,
|
|
"valid_targets_min": 3513
|
|
},
|
|
{
|
|
"epoch": 6.332549019607843,
|
|
"grad_norm": 0.4695168533709815,
|
|
"learning_rate": 1.103783033823671e-06,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10395525395870209,
|
|
"step": 4040,
|
|
"valid_targets_mean": 4979.2,
|
|
"valid_targets_min": 3179
|
|
},
|
|
{
|
|
"epoch": 6.3403921568627455,
|
|
"grad_norm": 0.4352116236902397,
|
|
"learning_rate": 1.0783181437374669e-06,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10586559772491455,
|
|
"step": 4045,
|
|
"valid_targets_mean": 5229.1,
|
|
"valid_targets_min": 3059
|
|
},
|
|
{
|
|
"epoch": 6.348235294117647,
|
|
"grad_norm": 0.40989056986201866,
|
|
"learning_rate": 1.05314229696325e-06,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12470489740371704,
|
|
"step": 4050,
|
|
"valid_targets_mean": 7550.8,
|
|
"valid_targets_min": 4536
|
|
},
|
|
{
|
|
"epoch": 6.356078431372549,
|
|
"grad_norm": 0.46710136624423204,
|
|
"learning_rate": 1.0282558780815411e-06,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10271404683589935,
|
|
"step": 4055,
|
|
"valid_targets_mean": 4367.8,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 6.363921568627451,
|
|
"grad_norm": 0.4619806527314137,
|
|
"learning_rate": 1.0036592672516088e-06,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12356600165367126,
|
|
"step": 4060,
|
|
"valid_targets_mean": 5394.8,
|
|
"valid_targets_min": 3766
|
|
},
|
|
{
|
|
"epoch": 6.371764705882353,
|
|
"grad_norm": 0.473282214165717,
|
|
"learning_rate": 9.79352840205703e-07,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10323870182037354,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3941.1,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 6.379607843137255,
|
|
"grad_norm": 0.408597109898331,
|
|
"learning_rate": 9.55336968243268e-07,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12019779533147812,
|
|
"step": 4070,
|
|
"valid_targets_mean": 5733.5,
|
|
"valid_targets_min": 3492
|
|
},
|
|
{
|
|
"epoch": 6.387450980392157,
|
|
"grad_norm": 0.5042720573788865,
|
|
"learning_rate": 9.316120182253141e-07,
|
|
"loss": 0.2113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10488654673099518,
|
|
"step": 4075,
|
|
"valid_targets_mean": 4488.6,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 6.395294117647059,
|
|
"grad_norm": 0.42170533895226114,
|
|
"learning_rate": 9.081783525687826e-07,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12465270608663559,
|
|
"step": 4080,
|
|
"valid_targets_mean": 6553.2,
|
|
"valid_targets_min": 4229
|
|
},
|
|
{
|
|
"epoch": 6.403137254901961,
|
|
"grad_norm": 0.4594310353925496,
|
|
"learning_rate": 8.850363292410269e-07,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11481781303882599,
|
|
"step": 4085,
|
|
"valid_targets_mean": 5709.0,
|
|
"valid_targets_min": 3848
|
|
},
|
|
{
|
|
"epoch": 6.410980392156863,
|
|
"grad_norm": 0.4565424799809639,
|
|
"learning_rate": 8.621863017543353e-07,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10198415815830231,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5144.8,
|
|
"valid_targets_min": 4120
|
|
},
|
|
{
|
|
"epoch": 6.418823529411765,
|
|
"grad_norm": 0.48683970303393115,
|
|
"learning_rate": 8.396286191605352e-07,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1126849502325058,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4646.0,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 6.426666666666667,
|
|
"grad_norm": 0.4382148779021092,
|
|
"learning_rate": 8.173636260456575e-07,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09468823671340942,
|
|
"step": 4100,
|
|
"valid_targets_mean": 4185.4,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 6.434509803921569,
|
|
"grad_norm": 0.4539076864044056,
|
|
"learning_rate": 7.953916625246804e-07,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0947730541229248,
|
|
"step": 4105,
|
|
"valid_targets_mean": 4015.5,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 6.442352941176471,
|
|
"grad_norm": 0.4652069738933959,
|
|
"learning_rate": 7.737130642363211e-07,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09318499267101288,
|
|
"step": 4110,
|
|
"valid_targets_mean": 3760.2,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 6.4501960784313725,
|
|
"grad_norm": 0.45696057518024663,
|
|
"learning_rate": 7.52328162337923e-07,
|
|
"loss": 0.2204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09854144603013992,
|
|
"step": 4115,
|
|
"valid_targets_mean": 4312.4,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 6.4580392156862745,
|
|
"grad_norm": 0.44296054339690394,
|
|
"learning_rate": 7.3123728350039e-07,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10991635918617249,
|
|
"step": 4120,
|
|
"valid_targets_mean": 5088.5,
|
|
"valid_targets_min": 4233
|
|
},
|
|
{
|
|
"epoch": 6.465882352941176,
|
|
"grad_norm": 0.4545107692595997,
|
|
"learning_rate": 7.104407499031896e-07,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18510067462921143,
|
|
"step": 4125,
|
|
"valid_targets_mean": 6381.0,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 6.473725490196078,
|
|
"grad_norm": 0.43613441644761725,
|
|
"learning_rate": 6.899388792294482e-07,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10570946335792542,
|
|
"step": 4130,
|
|
"valid_targets_mean": 4890.8,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 6.48156862745098,
|
|
"grad_norm": 0.4895168217779212,
|
|
"learning_rate": 6.697319846610772e-07,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12968577444553375,
|
|
"step": 4135,
|
|
"valid_targets_mean": 5128.5,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 6.489411764705882,
|
|
"grad_norm": 0.44488315854666716,
|
|
"learning_rate": 6.498203748740106e-07,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12031705677509308,
|
|
"step": 4140,
|
|
"valid_targets_mean": 5625.0,
|
|
"valid_targets_min": 3430
|
|
},
|
|
{
|
|
"epoch": 6.497254901960784,
|
|
"grad_norm": 0.4855206673682731,
|
|
"learning_rate": 6.302043540334701e-07,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11037050187587738,
|
|
"step": 4145,
|
|
"valid_targets_mean": 4964.1,
|
|
"valid_targets_min": 3384
|
|
},
|
|
{
|
|
"epoch": 6.505098039215686,
|
|
"grad_norm": 0.5610636357808569,
|
|
"learning_rate": 6.108842217893384e-07,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14256209135055542,
|
|
"step": 4150,
|
|
"valid_targets_mean": 5879.5,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 6.512941176470588,
|
|
"grad_norm": 0.5084141453900793,
|
|
"learning_rate": 5.918602732715583e-07,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09828584641218185,
|
|
"step": 4155,
|
|
"valid_targets_mean": 4444.2,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 6.52078431372549,
|
|
"grad_norm": 0.46072558569216976,
|
|
"learning_rate": 5.731327990856517e-07,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12016499042510986,
|
|
"step": 4160,
|
|
"valid_targets_mean": 5461.9,
|
|
"valid_targets_min": 3945
|
|
},
|
|
{
|
|
"epoch": 6.528627450980392,
|
|
"grad_norm": 0.4284249921405838,
|
|
"learning_rate": 5.547020853082497e-07,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11326861381530762,
|
|
"step": 4165,
|
|
"valid_targets_mean": 5245.2,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 6.536470588235294,
|
|
"grad_norm": 0.4944332344549911,
|
|
"learning_rate": 5.365684134827542e-07,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12869860231876373,
|
|
"step": 4170,
|
|
"valid_targets_mean": 5085.1,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 6.544313725490196,
|
|
"grad_norm": 0.5017200025013507,
|
|
"learning_rate": 5.187320606150103e-07,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18561658263206482,
|
|
"step": 4175,
|
|
"valid_targets_mean": 6446.4,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 6.552156862745098,
|
|
"grad_norm": 0.42883611085045314,
|
|
"learning_rate": 5.011932991690959e-07,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1269712746143341,
|
|
"step": 4180,
|
|
"valid_targets_mean": 5828.9,
|
|
"valid_targets_min": 4165
|
|
},
|
|
{
|
|
"epoch": 6.5600000000000005,
|
|
"grad_norm": 0.4478364879457884,
|
|
"learning_rate": 4.839523970631387e-07,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12050098180770874,
|
|
"step": 4185,
|
|
"valid_targets_mean": 5428.6,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 6.567843137254902,
|
|
"grad_norm": 0.4713390822402125,
|
|
"learning_rate": 4.6700961766524833e-07,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1017150729894638,
|
|
"step": 4190,
|
|
"valid_targets_mean": 4122.8,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 6.575686274509804,
|
|
"grad_norm": 0.4687166113705068,
|
|
"learning_rate": 4.503652197894681e-07,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09618793427944183,
|
|
"step": 4195,
|
|
"valid_targets_mean": 4185.8,
|
|
"valid_targets_min": 3418
|
|
},
|
|
{
|
|
"epoch": 6.583529411764706,
|
|
"grad_norm": 0.46394809739148357,
|
|
"learning_rate": 4.3401945769183884e-07,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09462013840675354,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3753.0,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 6.591372549019608,
|
|
"grad_norm": 0.40901320396790997,
|
|
"learning_rate": 4.1797258106650804e-07,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10463082790374756,
|
|
"step": 4205,
|
|
"valid_targets_mean": 5433.4,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 6.59921568627451,
|
|
"grad_norm": 0.4820081120677433,
|
|
"learning_rate": 4.0222483504191556e-07,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08848341554403305,
|
|
"step": 4210,
|
|
"valid_targets_mean": 4271.1,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 6.607058823529412,
|
|
"grad_norm": 0.6091087606774781,
|
|
"learning_rate": 3.8677646017704963e-07,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10160544514656067,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3950.5,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 6.614901960784314,
|
|
"grad_norm": 0.3987476887516562,
|
|
"learning_rate": 3.716276924577744e-07,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.086185023188591,
|
|
"step": 4220,
|
|
"valid_targets_mean": 5230.9,
|
|
"valid_targets_min": 3524
|
|
},
|
|
{
|
|
"epoch": 6.622745098039216,
|
|
"grad_norm": 0.4201747092176709,
|
|
"learning_rate": 3.5677876329322624e-07,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13234418630599976,
|
|
"step": 4225,
|
|
"valid_targets_mean": 6772.1,
|
|
"valid_targets_min": 3196
|
|
},
|
|
{
|
|
"epoch": 6.630588235294118,
|
|
"grad_norm": 0.49514217900654944,
|
|
"learning_rate": 3.422298995122675e-07,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14086517691612244,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4969.0,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 6.63843137254902,
|
|
"grad_norm": 0.4416953146981601,
|
|
"learning_rate": 3.279813233600404e-07,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11398651450872421,
|
|
"step": 4235,
|
|
"valid_targets_mean": 5638.6,
|
|
"valid_targets_min": 3436
|
|
},
|
|
{
|
|
"epoch": 6.646274509803922,
|
|
"grad_norm": 0.43040627292630385,
|
|
"learning_rate": 3.140332524945522e-07,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10975523293018341,
|
|
"step": 4240,
|
|
"valid_targets_mean": 4396.2,
|
|
"valid_targets_min": 3472
|
|
},
|
|
{
|
|
"epoch": 6.654117647058824,
|
|
"grad_norm": 0.46808890102327305,
|
|
"learning_rate": 3.0038589998336423e-07,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12664811313152313,
|
|
"step": 4245,
|
|
"valid_targets_mean": 5429.0,
|
|
"valid_targets_min": 3906
|
|
},
|
|
{
|
|
"epoch": 6.661960784313726,
|
|
"grad_norm": 0.4430529588449379,
|
|
"learning_rate": 2.8703947430033465e-07,
|
|
"loss": 0.2079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09306753426790237,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4497.1,
|
|
"valid_targets_min": 3315
|
|
},
|
|
{
|
|
"epoch": 6.669803921568628,
|
|
"grad_norm": 0.4811322640009443,
|
|
"learning_rate": 2.7399417932242325e-07,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1111401841044426,
|
|
"step": 4255,
|
|
"valid_targets_mean": 4216.9,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 6.6776470588235295,
|
|
"grad_norm": 0.4262010221953986,
|
|
"learning_rate": 2.6125021432659824e-07,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10089631378650665,
|
|
"step": 4260,
|
|
"valid_targets_mean": 5505.8,
|
|
"valid_targets_min": 2862
|
|
},
|
|
{
|
|
"epoch": 6.685490196078431,
|
|
"grad_norm": 0.4430501362974238,
|
|
"learning_rate": 2.4880777398677e-07,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13782116770744324,
|
|
"step": 4265,
|
|
"valid_targets_mean": 5625.0,
|
|
"valid_targets_min": 4029
|
|
},
|
|
{
|
|
"epoch": 6.693333333333333,
|
|
"grad_norm": 0.4554702090858196,
|
|
"learning_rate": 2.3666704837082888e-07,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12508338689804077,
|
|
"step": 4270,
|
|
"valid_targets_mean": 4775.0,
|
|
"valid_targets_min": 3986
|
|
},
|
|
{
|
|
"epoch": 6.701176470588235,
|
|
"grad_norm": 0.4449879282070006,
|
|
"learning_rate": 2.2482822293774297e-07,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12404239177703857,
|
|
"step": 4275,
|
|
"valid_targets_mean": 6255.5,
|
|
"valid_targets_min": 4588
|
|
},
|
|
{
|
|
"epoch": 6.709019607843137,
|
|
"grad_norm": 0.4445030743333133,
|
|
"learning_rate": 2.13291478534714e-07,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14368386566638947,
|
|
"step": 4280,
|
|
"valid_targets_mean": 6303.6,
|
|
"valid_targets_min": 3679
|
|
},
|
|
{
|
|
"epoch": 6.716862745098039,
|
|
"grad_norm": 0.5118470526146196,
|
|
"learning_rate": 2.020569913944348e-07,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.110391765832901,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3751.0,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 6.724705882352941,
|
|
"grad_norm": 0.4356191475545199,
|
|
"learning_rate": 1.9112493313236947e-07,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13480740785598755,
|
|
"step": 4290,
|
|
"valid_targets_mean": 6193.1,
|
|
"valid_targets_min": 3947
|
|
},
|
|
{
|
|
"epoch": 6.732549019607843,
|
|
"grad_norm": 0.46764269786091694,
|
|
"learning_rate": 1.8049547074416195e-07,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1276300549507141,
|
|
"step": 4295,
|
|
"valid_targets_mean": 4674.4,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 6.740392156862745,
|
|
"grad_norm": 0.47631618188058367,
|
|
"learning_rate": 1.701687666030627e-07,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11678611487150192,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4788.5,
|
|
"valid_targets_min": 3863
|
|
},
|
|
{
|
|
"epoch": 6.748235294117647,
|
|
"grad_norm": 0.48207970098367864,
|
|
"learning_rate": 1.6014497845745715e-07,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12263034284114838,
|
|
"step": 4305,
|
|
"valid_targets_mean": 4376.2,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 6.756078431372549,
|
|
"grad_norm": 0.4796323347873038,
|
|
"learning_rate": 1.504242594284544e-07,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13712115585803986,
|
|
"step": 4310,
|
|
"valid_targets_mean": 5298.9,
|
|
"valid_targets_min": 4224
|
|
},
|
|
{
|
|
"epoch": 6.763921568627451,
|
|
"grad_norm": 0.4615879695403553,
|
|
"learning_rate": 1.4100675800754914e-07,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12211369723081589,
|
|
"step": 4315,
|
|
"valid_targets_mean": 4523.1,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 6.771764705882353,
|
|
"grad_norm": 0.4814894091241974,
|
|
"learning_rate": 1.318926180543545e-07,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10186205804347992,
|
|
"step": 4320,
|
|
"valid_targets_mean": 4021.1,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 6.779607843137255,
|
|
"grad_norm": 0.48253773649910064,
|
|
"learning_rate": 1.2308197879439932e-07,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13970795273780823,
|
|
"step": 4325,
|
|
"valid_targets_mean": 5597.2,
|
|
"valid_targets_min": 4134
|
|
},
|
|
{
|
|
"epoch": 6.7874509803921566,
|
|
"grad_norm": 0.4388344176611287,
|
|
"learning_rate": 1.1457497481700775e-07,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1335308849811554,
|
|
"step": 4330,
|
|
"valid_targets_mean": 5858.1,
|
|
"valid_targets_min": 3410
|
|
},
|
|
{
|
|
"epoch": 6.7952941176470585,
|
|
"grad_norm": 0.5190560756784384,
|
|
"learning_rate": 1.0637173607323859e-07,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08725880831480026,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3410.9,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 6.80313725490196,
|
|
"grad_norm": 0.4456080177024077,
|
|
"learning_rate": 9.847238787390246e-08,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10467448830604553,
|
|
"step": 4340,
|
|
"valid_targets_mean": 5285.2,
|
|
"valid_targets_min": 3760
|
|
},
|
|
{
|
|
"epoch": 6.810980392156862,
|
|
"grad_norm": 0.44821049189338225,
|
|
"learning_rate": 9.087705088764997e-08,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09593216329813004,
|
|
"step": 4345,
|
|
"valid_targets_mean": 4779.4,
|
|
"valid_targets_min": 3127
|
|
},
|
|
{
|
|
"epoch": 6.818823529411764,
|
|
"grad_norm": 0.5242377131360809,
|
|
"learning_rate": 8.358584113911994e-08,
|
|
"loss": 0.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10853705555200577,
|
|
"step": 4350,
|
|
"valid_targets_mean": 4434.6,
|
|
"valid_targets_min": 3719
|
|
},
|
|
{
|
|
"epoch": 6.826666666666666,
|
|
"grad_norm": 0.4403671390580566,
|
|
"learning_rate": 7.659887000717403e-08,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10588899254798889,
|
|
"step": 4355,
|
|
"valid_targets_mean": 4671.9,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 6.834509803921568,
|
|
"grad_norm": 0.4176873293928865,
|
|
"learning_rate": 6.991624422320042e-08,
|
|
"loss": 0.2212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10170692950487137,
|
|
"step": 4360,
|
|
"valid_targets_mean": 4710.1,
|
|
"valid_targets_min": 3220
|
|
},
|
|
{
|
|
"epoch": 6.842352941176471,
|
|
"grad_norm": 0.45332879139232546,
|
|
"learning_rate": 6.353806586946842e-08,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10372980684041977,
|
|
"step": 4365,
|
|
"valid_targets_mean": 4713.1,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 6.850196078431373,
|
|
"grad_norm": 0.4389638053567823,
|
|
"learning_rate": 5.7464432377580814e-08,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10711215436458588,
|
|
"step": 4370,
|
|
"valid_targets_mean": 4678.0,
|
|
"valid_targets_min": 2848
|
|
},
|
|
{
|
|
"epoch": 6.858039215686275,
|
|
"grad_norm": 0.46371795730090787,
|
|
"learning_rate": 5.169543652698617e-08,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10585801303386688,
|
|
"step": 4375,
|
|
"valid_targets_mean": 4192.1,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 6.865882352941177,
|
|
"grad_norm": 0.45336551811517417,
|
|
"learning_rate": 4.623116644354886e-08,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12293557822704315,
|
|
"step": 4380,
|
|
"valid_targets_mean": 5835.8,
|
|
"valid_targets_min": 3830
|
|
},
|
|
{
|
|
"epoch": 6.873725490196079,
|
|
"grad_norm": 0.4516088275443724,
|
|
"learning_rate": 4.107170559822127e-08,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09389039874076843,
|
|
"step": 4385,
|
|
"valid_targets_mean": 4147.2,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 6.881568627450981,
|
|
"grad_norm": 0.48450787877788476,
|
|
"learning_rate": 3.621713280575367e-08,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11004867404699326,
|
|
"step": 4390,
|
|
"valid_targets_mean": 5069.1,
|
|
"valid_targets_min": 3293
|
|
},
|
|
{
|
|
"epoch": 6.889411764705883,
|
|
"grad_norm": 0.4390787127123611,
|
|
"learning_rate": 3.166752222349523e-08,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09037572145462036,
|
|
"step": 4395,
|
|
"valid_targets_mean": 4896.4,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 6.8972549019607845,
|
|
"grad_norm": 0.46856100540775486,
|
|
"learning_rate": 2.7422943350263743e-08,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11701315641403198,
|
|
"step": 4400,
|
|
"valid_targets_mean": 4600.9,
|
|
"valid_targets_min": 3802
|
|
},
|
|
{
|
|
"epoch": 6.905098039215686,
|
|
"grad_norm": 0.4511941684003278,
|
|
"learning_rate": 2.3483461025282094e-08,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08290404081344604,
|
|
"step": 4405,
|
|
"valid_targets_mean": 3947.2,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 6.912941176470588,
|
|
"grad_norm": 0.47725026449168123,
|
|
"learning_rate": 1.9849135427187916e-08,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16268667578697205,
|
|
"step": 4410,
|
|
"valid_targets_mean": 5750.8,
|
|
"valid_targets_min": 3535
|
|
},
|
|
{
|
|
"epoch": 6.92078431372549,
|
|
"grad_norm": 0.43317704843794524,
|
|
"learning_rate": 1.6520022073112098e-08,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08880816400051117,
|
|
"step": 4415,
|
|
"valid_targets_mean": 4106.0,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 6.928627450980392,
|
|
"grad_norm": 0.45593617498953504,
|
|
"learning_rate": 1.3496171817832804e-08,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12651695311069489,
|
|
"step": 4420,
|
|
"valid_targets_mean": 6219.2,
|
|
"valid_targets_min": 4586
|
|
},
|
|
{
|
|
"epoch": 6.936470588235294,
|
|
"grad_norm": 0.5786636423481467,
|
|
"learning_rate": 1.0777630853000543e-08,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09566706418991089,
|
|
"step": 4425,
|
|
"valid_targets_mean": 5062.0,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 6.944313725490196,
|
|
"grad_norm": 0.48609466865000867,
|
|
"learning_rate": 8.364440706429833e-09,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11808843910694122,
|
|
"step": 4430,
|
|
"valid_targets_mean": 4558.0,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 6.952156862745098,
|
|
"grad_norm": 0.4607867458472101,
|
|
"learning_rate": 6.2566382414641635e-09,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1263408064842224,
|
|
"step": 4435,
|
|
"valid_targets_mean": 5793.4,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 6.96,
|
|
"grad_norm": 0.47554011312145067,
|
|
"learning_rate": 4.454255656414219e-09,
|
|
"loss": 0.2183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11474943161010742,
|
|
"step": 4440,
|
|
"valid_targets_mean": 5632.1,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 6.967843137254902,
|
|
"grad_norm": 0.4555499881336514,
|
|
"learning_rate": 2.957320484067161e-09,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10707740485668182,
|
|
"step": 4445,
|
|
"valid_targets_mean": 4728.5,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 6.975686274509804,
|
|
"grad_norm": 0.44759152584461565,
|
|
"learning_rate": 1.7658555912647423e-09,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13053828477859497,
|
|
"step": 4450,
|
|
"valid_targets_mean": 6211.1,
|
|
"valid_targets_min": 4483
|
|
},
|
|
{
|
|
"epoch": 6.983529411764706,
|
|
"grad_norm": 0.476165174658354,
|
|
"learning_rate": 8.79879178554699e-10,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11098936200141907,
|
|
"step": 4455,
|
|
"valid_targets_mean": 4677.2,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 6.991372549019608,
|
|
"grad_norm": 0.4427022605119287,
|
|
"learning_rate": 2.9940477991097227e-10,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10073821246623993,
|
|
"step": 4460,
|
|
"valid_targets_mean": 4503.6,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 6.99921568627451,
|
|
"grad_norm": 0.8836534606685401,
|
|
"learning_rate": 2.4441262527208355e-11,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10273762047290802,
|
|
"step": 4465,
|
|
"valid_targets_mean": 4522.4,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21145598590373993,
|
|
"step": 4466,
|
|
"total_flos": 2.8947179058078024e+18,
|
|
"train_loss": 0.26257057309707904,
|
|
"train_runtime": 68118.4527,
|
|
"train_samples_per_second": 1.048,
|
|
"train_steps_per_second": 0.066,
|
|
"valid_targets_mean": 4636.9,
|
|
"valid_targets_min": 581
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4466,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.8947179058078024e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|