9867 lines
274 KiB
JSON
9867 lines
274 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4466,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00784313725490196,
|
|
"grad_norm": 23.298347338390826,
|
|
"learning_rate": 3.579418344519016e-07,
|
|
"loss": 0.8602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43839943408966064,
|
|
"step": 5,
|
|
"valid_targets_mean": 6052.9,
|
|
"valid_targets_min": 4026
|
|
},
|
|
{
|
|
"epoch": 0.01568627450980392,
|
|
"grad_norm": 22.291211448466854,
|
|
"learning_rate": 8.053691275167786e-07,
|
|
"loss": 0.8507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3826303482055664,
|
|
"step": 10,
|
|
"valid_targets_mean": 4323.8,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 0.023529411764705882,
|
|
"grad_norm": 19.59349405942615,
|
|
"learning_rate": 1.2527964205816557e-06,
|
|
"loss": 0.8156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39852985739707947,
|
|
"step": 15,
|
|
"valid_targets_mean": 5580.4,
|
|
"valid_targets_min": 3518
|
|
},
|
|
{
|
|
"epoch": 0.03137254901960784,
|
|
"grad_norm": 15.074355357417764,
|
|
"learning_rate": 1.7002237136465326e-06,
|
|
"loss": 0.7847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3531936705112457,
|
|
"step": 20,
|
|
"valid_targets_mean": 4555.9,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 0.0392156862745098,
|
|
"grad_norm": 8.503476256842038,
|
|
"learning_rate": 2.1476510067114096e-06,
|
|
"loss": 0.7347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3691006302833557,
|
|
"step": 25,
|
|
"valid_targets_mean": 5543.6,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 0.047058823529411764,
|
|
"grad_norm": 4.791441092709092,
|
|
"learning_rate": 2.5950782997762863e-06,
|
|
"loss": 0.6663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.313259482383728,
|
|
"step": 30,
|
|
"valid_targets_mean": 4514.2,
|
|
"valid_targets_min": 3605
|
|
},
|
|
{
|
|
"epoch": 0.054901960784313725,
|
|
"grad_norm": 2.5079670106002365,
|
|
"learning_rate": 3.0425055928411635e-06,
|
|
"loss": 0.627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3332233428955078,
|
|
"step": 35,
|
|
"valid_targets_mean": 5949.8,
|
|
"valid_targets_min": 3442
|
|
},
|
|
{
|
|
"epoch": 0.06274509803921569,
|
|
"grad_norm": 1.773502389142227,
|
|
"learning_rate": 3.4899328859060407e-06,
|
|
"loss": 0.6072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549877166748047,
|
|
"step": 40,
|
|
"valid_targets_mean": 4407.0,
|
|
"valid_targets_min": 3418
|
|
},
|
|
{
|
|
"epoch": 0.07058823529411765,
|
|
"grad_norm": 1.6073737125855354,
|
|
"learning_rate": 3.937360178970917e-06,
|
|
"loss": 0.5814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27978047728538513,
|
|
"step": 45,
|
|
"valid_targets_mean": 4382.0,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 0.0784313725490196,
|
|
"grad_norm": 1.2925467378333404,
|
|
"learning_rate": 4.384787472035795e-06,
|
|
"loss": 0.5545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26093819737434387,
|
|
"step": 50,
|
|
"valid_targets_mean": 5089.8,
|
|
"valid_targets_min": 3683
|
|
},
|
|
{
|
|
"epoch": 0.08627450980392157,
|
|
"grad_norm": 1.0921764137166408,
|
|
"learning_rate": 4.832214765100672e-06,
|
|
"loss": 0.5497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828206419944763,
|
|
"step": 55,
|
|
"valid_targets_mean": 4860.9,
|
|
"valid_targets_min": 3327
|
|
},
|
|
{
|
|
"epoch": 0.09411764705882353,
|
|
"grad_norm": 0.9105078282562821,
|
|
"learning_rate": 5.2796420581655485e-06,
|
|
"loss": 0.5404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650872468948364,
|
|
"step": 60,
|
|
"valid_targets_mean": 4219.0,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 0.10196078431372549,
|
|
"grad_norm": 0.8176291712197993,
|
|
"learning_rate": 5.727069351230425e-06,
|
|
"loss": 0.5087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2280701994895935,
|
|
"step": 65,
|
|
"valid_targets_mean": 4217.8,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 0.10980392156862745,
|
|
"grad_norm": 0.7305215048778408,
|
|
"learning_rate": 6.174496644295303e-06,
|
|
"loss": 0.5017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25523003935813904,
|
|
"step": 70,
|
|
"valid_targets_mean": 5386.5,
|
|
"valid_targets_min": 3532
|
|
},
|
|
{
|
|
"epoch": 0.11764705882352941,
|
|
"grad_norm": 0.6691013259217359,
|
|
"learning_rate": 6.6219239373601796e-06,
|
|
"loss": 0.4841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24713614583015442,
|
|
"step": 75,
|
|
"valid_targets_mean": 4911.4,
|
|
"valid_targets_min": 3582
|
|
},
|
|
{
|
|
"epoch": 0.12549019607843137,
|
|
"grad_norm": 0.609679353765235,
|
|
"learning_rate": 7.069351230425056e-06,
|
|
"loss": 0.4621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23147262632846832,
|
|
"step": 80,
|
|
"valid_targets_mean": 4827.0,
|
|
"valid_targets_min": 4254
|
|
},
|
|
{
|
|
"epoch": 0.13333333333333333,
|
|
"grad_norm": 0.6087171040956587,
|
|
"learning_rate": 7.516778523489934e-06,
|
|
"loss": 0.4707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24862024188041687,
|
|
"step": 85,
|
|
"valid_targets_mean": 6231.5,
|
|
"valid_targets_min": 4574
|
|
},
|
|
{
|
|
"epoch": 0.1411764705882353,
|
|
"grad_norm": 0.6145120711271943,
|
|
"learning_rate": 7.96420581655481e-06,
|
|
"loss": 0.435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22450268268585205,
|
|
"step": 90,
|
|
"valid_targets_mean": 4949.2,
|
|
"valid_targets_min": 3877
|
|
},
|
|
{
|
|
"epoch": 0.14901960784313725,
|
|
"grad_norm": 0.5603079041349106,
|
|
"learning_rate": 8.411633109619688e-06,
|
|
"loss": 0.4323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18175888061523438,
|
|
"step": 95,
|
|
"valid_targets_mean": 4763.6,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 0.1568627450980392,
|
|
"grad_norm": 0.6890920617581278,
|
|
"learning_rate": 8.859060402684566e-06,
|
|
"loss": 0.4226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21500495076179504,
|
|
"step": 100,
|
|
"valid_targets_mean": 5103.5,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.16470588235294117,
|
|
"grad_norm": 0.7055026012162536,
|
|
"learning_rate": 9.306487695749442e-06,
|
|
"loss": 0.4028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.216325581073761,
|
|
"step": 105,
|
|
"valid_targets_mean": 4637.9,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 0.17254901960784313,
|
|
"grad_norm": 0.5643953971819364,
|
|
"learning_rate": 9.753914988814318e-06,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19290407001972198,
|
|
"step": 110,
|
|
"valid_targets_mean": 4680.8,
|
|
"valid_targets_min": 3445
|
|
},
|
|
{
|
|
"epoch": 0.1803921568627451,
|
|
"grad_norm": 0.7330275863785135,
|
|
"learning_rate": 1.0201342281879197e-05,
|
|
"loss": 0.4107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16619446873664856,
|
|
"step": 115,
|
|
"valid_targets_mean": 3789.8,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 0.18823529411764706,
|
|
"grad_norm": 0.618524578115423,
|
|
"learning_rate": 1.0648769574944073e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21853846311569214,
|
|
"step": 120,
|
|
"valid_targets_mean": 5809.2,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 0.19607843137254902,
|
|
"grad_norm": 0.5780165322055293,
|
|
"learning_rate": 1.109619686800895e-05,
|
|
"loss": 0.4072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1631530225276947,
|
|
"step": 125,
|
|
"valid_targets_mean": 4298.9,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 0.20392156862745098,
|
|
"grad_norm": 0.5547050817776145,
|
|
"learning_rate": 1.1543624161073828e-05,
|
|
"loss": 0.3932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1529003232717514,
|
|
"step": 130,
|
|
"valid_targets_mean": 4122.0,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 0.21176470588235294,
|
|
"grad_norm": 0.5810478350386387,
|
|
"learning_rate": 1.1991051454138702e-05,
|
|
"loss": 0.3848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.216994971036911,
|
|
"step": 135,
|
|
"valid_targets_mean": 5638.5,
|
|
"valid_targets_min": 3599
|
|
},
|
|
{
|
|
"epoch": 0.2196078431372549,
|
|
"grad_norm": 0.5922588085332883,
|
|
"learning_rate": 1.243847874720358e-05,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18506886065006256,
|
|
"step": 140,
|
|
"valid_targets_mean": 4467.4,
|
|
"valid_targets_min": 3917
|
|
},
|
|
{
|
|
"epoch": 0.22745098039215686,
|
|
"grad_norm": 0.6014684802309681,
|
|
"learning_rate": 1.2885906040268457e-05,
|
|
"loss": 0.3835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14172306656837463,
|
|
"step": 145,
|
|
"valid_targets_mean": 4013.1,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 0.23529411764705882,
|
|
"grad_norm": 0.6097819527541619,
|
|
"learning_rate": 1.3333333333333333e-05,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1689232587814331,
|
|
"step": 150,
|
|
"valid_targets_mean": 4505.0,
|
|
"valid_targets_min": 4012
|
|
},
|
|
{
|
|
"epoch": 0.24313725490196078,
|
|
"grad_norm": 0.554747015726934,
|
|
"learning_rate": 1.3780760626398211e-05,
|
|
"loss": 0.3831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27534186840057373,
|
|
"step": 155,
|
|
"valid_targets_mean": 6673.6,
|
|
"valid_targets_min": 2505
|
|
},
|
|
{
|
|
"epoch": 0.25098039215686274,
|
|
"grad_norm": 0.8259973561073648,
|
|
"learning_rate": 1.4228187919463088e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1627364158630371,
|
|
"step": 160,
|
|
"valid_targets_mean": 4557.5,
|
|
"valid_targets_min": 2179
|
|
},
|
|
{
|
|
"epoch": 0.25882352941176473,
|
|
"grad_norm": 0.5575267126622169,
|
|
"learning_rate": 1.4675615212527964e-05,
|
|
"loss": 0.369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1631404161453247,
|
|
"step": 165,
|
|
"valid_targets_mean": 4479.1,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 0.26666666666666666,
|
|
"grad_norm": 0.5975170110533272,
|
|
"learning_rate": 1.5123042505592842e-05,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17251205444335938,
|
|
"step": 170,
|
|
"valid_targets_mean": 4417.2,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 0.27450980392156865,
|
|
"grad_norm": 0.5366892107887073,
|
|
"learning_rate": 1.5570469798657718e-05,
|
|
"loss": 0.3728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19492444396018982,
|
|
"step": 175,
|
|
"valid_targets_mean": 5861.2,
|
|
"valid_targets_min": 3404
|
|
},
|
|
{
|
|
"epoch": 0.2823529411764706,
|
|
"grad_norm": 0.6754136526395924,
|
|
"learning_rate": 1.6017897091722595e-05,
|
|
"loss": 0.3871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17513027787208557,
|
|
"step": 180,
|
|
"valid_targets_mean": 3518.5,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.2901960784313726,
|
|
"grad_norm": 0.9443788799610068,
|
|
"learning_rate": 1.6465324384787473e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18656358122825623,
|
|
"step": 185,
|
|
"valid_targets_mean": 5753.5,
|
|
"valid_targets_min": 4620
|
|
},
|
|
{
|
|
"epoch": 0.2980392156862745,
|
|
"grad_norm": 0.5451311543234643,
|
|
"learning_rate": 1.691275167785235e-05,
|
|
"loss": 0.3599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21605977416038513,
|
|
"step": 190,
|
|
"valid_targets_mean": 6037.1,
|
|
"valid_targets_min": 3270
|
|
},
|
|
{
|
|
"epoch": 0.3058823529411765,
|
|
"grad_norm": 0.5096399052054448,
|
|
"learning_rate": 1.7360178970917228e-05,
|
|
"loss": 0.3725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20450440049171448,
|
|
"step": 195,
|
|
"valid_targets_mean": 6507.9,
|
|
"valid_targets_min": 3275
|
|
},
|
|
{
|
|
"epoch": 0.3137254901960784,
|
|
"grad_norm": 0.5993350633001354,
|
|
"learning_rate": 1.7807606263982106e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15169182419776917,
|
|
"step": 200,
|
|
"valid_targets_mean": 4578.1,
|
|
"valid_targets_min": 4111
|
|
},
|
|
{
|
|
"epoch": 0.3215686274509804,
|
|
"grad_norm": 0.6205306419609562,
|
|
"learning_rate": 1.825503355704698e-05,
|
|
"loss": 0.366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17773106694221497,
|
|
"step": 205,
|
|
"valid_targets_mean": 4392.1,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 0.32941176470588235,
|
|
"grad_norm": 0.62482187351153,
|
|
"learning_rate": 1.8702460850111858e-05,
|
|
"loss": 0.3633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1388782262802124,
|
|
"step": 210,
|
|
"valid_targets_mean": 4299.2,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 0.33725490196078434,
|
|
"grad_norm": 0.6208166759487009,
|
|
"learning_rate": 1.9149888143176735e-05,
|
|
"loss": 0.3486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20655155181884766,
|
|
"step": 215,
|
|
"valid_targets_mean": 5151.0,
|
|
"valid_targets_min": 3970
|
|
},
|
|
{
|
|
"epoch": 0.34509803921568627,
|
|
"grad_norm": 0.6173058180465927,
|
|
"learning_rate": 1.9597315436241613e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23730814456939697,
|
|
"step": 220,
|
|
"valid_targets_mean": 5458.9,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 0.35294117647058826,
|
|
"grad_norm": 0.8313308037805428,
|
|
"learning_rate": 2.004474272930649e-05,
|
|
"loss": 0.3386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20326246321201324,
|
|
"step": 225,
|
|
"valid_targets_mean": 5639.2,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 0.3607843137254902,
|
|
"grad_norm": 0.5802044604297666,
|
|
"learning_rate": 2.0492170022371365e-05,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1663266271352768,
|
|
"step": 230,
|
|
"valid_targets_mean": 5272.0,
|
|
"valid_targets_min": 4070
|
|
},
|
|
{
|
|
"epoch": 0.3686274509803922,
|
|
"grad_norm": 0.5500195062926332,
|
|
"learning_rate": 2.0939597315436246e-05,
|
|
"loss": 0.366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2033405900001526,
|
|
"step": 235,
|
|
"valid_targets_mean": 6574.4,
|
|
"valid_targets_min": 4332
|
|
},
|
|
{
|
|
"epoch": 0.3764705882352941,
|
|
"grad_norm": 0.6230935555119714,
|
|
"learning_rate": 2.138702460850112e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13374356925487518,
|
|
"step": 240,
|
|
"valid_targets_mean": 3771.4,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 0.3843137254901961,
|
|
"grad_norm": 0.5902118918894713,
|
|
"learning_rate": 2.1834451901565997e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15112121403217316,
|
|
"step": 245,
|
|
"valid_targets_mean": 3938.1,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 0.39215686274509803,
|
|
"grad_norm": 0.5856848973492705,
|
|
"learning_rate": 2.228187919463087e-05,
|
|
"loss": 0.3454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17914924025535583,
|
|
"step": 250,
|
|
"valid_targets_mean": 5226.8,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.5808064932029918,
|
|
"learning_rate": 2.2729306487695753e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.168888121843338,
|
|
"step": 255,
|
|
"valid_targets_mean": 5211.1,
|
|
"valid_targets_min": 2843
|
|
},
|
|
{
|
|
"epoch": 0.40784313725490196,
|
|
"grad_norm": 0.608614659511003,
|
|
"learning_rate": 2.3176733780760627e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16316288709640503,
|
|
"step": 260,
|
|
"valid_targets_mean": 4988.6,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 0.41568627450980394,
|
|
"grad_norm": 0.6482303863343926,
|
|
"learning_rate": 2.3624161073825508e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17435654997825623,
|
|
"step": 265,
|
|
"valid_targets_mean": 4630.8,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 0.4235294117647059,
|
|
"grad_norm": 0.7427445144818828,
|
|
"learning_rate": 2.4071588366890382e-05,
|
|
"loss": 0.3292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14953133463859558,
|
|
"step": 270,
|
|
"valid_targets_mean": 4994.5,
|
|
"valid_targets_min": 3067
|
|
},
|
|
{
|
|
"epoch": 0.43137254901960786,
|
|
"grad_norm": 0.5724650392644556,
|
|
"learning_rate": 2.451901565995526e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17354685068130493,
|
|
"step": 275,
|
|
"valid_targets_mean": 5621.2,
|
|
"valid_targets_min": 3628
|
|
},
|
|
{
|
|
"epoch": 0.4392156862745098,
|
|
"grad_norm": 0.5950700951789527,
|
|
"learning_rate": 2.4966442953020137e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1778419315814972,
|
|
"step": 280,
|
|
"valid_targets_mean": 5702.2,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 0.4470588235294118,
|
|
"grad_norm": 0.6345193957231828,
|
|
"learning_rate": 2.5413870246085015e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20228183269500732,
|
|
"step": 285,
|
|
"valid_targets_mean": 4570.2,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 0.4549019607843137,
|
|
"grad_norm": 0.6094068416813229,
|
|
"learning_rate": 2.586129753914989e-05,
|
|
"loss": 0.3408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13829955458641052,
|
|
"step": 290,
|
|
"valid_targets_mean": 4302.2,
|
|
"valid_targets_min": 2734
|
|
},
|
|
{
|
|
"epoch": 0.4627450980392157,
|
|
"grad_norm": 0.5825911888294858,
|
|
"learning_rate": 2.630872483221477e-05,
|
|
"loss": 0.3429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13955016434192657,
|
|
"step": 295,
|
|
"valid_targets_mean": 4808.9,
|
|
"valid_targets_min": 3739
|
|
},
|
|
{
|
|
"epoch": 0.47058823529411764,
|
|
"grad_norm": 0.6437990917169216,
|
|
"learning_rate": 2.6756152125279644e-05,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18308787047863007,
|
|
"step": 300,
|
|
"valid_targets_mean": 4868.2,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.47843137254901963,
|
|
"grad_norm": 0.6714443312074112,
|
|
"learning_rate": 2.7203579418344522e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16416051983833313,
|
|
"step": 305,
|
|
"valid_targets_mean": 4578.1,
|
|
"valid_targets_min": 3890
|
|
},
|
|
{
|
|
"epoch": 0.48627450980392156,
|
|
"grad_norm": 0.5771009814027651,
|
|
"learning_rate": 2.76510067114094e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1598922163248062,
|
|
"step": 310,
|
|
"valid_targets_mean": 5190.5,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 0.49411764705882355,
|
|
"grad_norm": 0.5690495376174819,
|
|
"learning_rate": 2.8098434004474274e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19397258758544922,
|
|
"step": 315,
|
|
"valid_targets_mean": 5327.6,
|
|
"valid_targets_min": 3876
|
|
},
|
|
{
|
|
"epoch": 0.5019607843137255,
|
|
"grad_norm": 0.550480153370413,
|
|
"learning_rate": 2.854586129753915e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15099109709262848,
|
|
"step": 320,
|
|
"valid_targets_mean": 5396.5,
|
|
"valid_targets_min": 4033
|
|
},
|
|
{
|
|
"epoch": 0.5098039215686274,
|
|
"grad_norm": 0.583247357916581,
|
|
"learning_rate": 2.899328859060403e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11765319854021072,
|
|
"step": 325,
|
|
"valid_targets_mean": 4748.0,
|
|
"valid_targets_min": 3230
|
|
},
|
|
{
|
|
"epoch": 0.5176470588235295,
|
|
"grad_norm": 0.64154390330704,
|
|
"learning_rate": 2.9440715883668906e-05,
|
|
"loss": 0.3307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15749859809875488,
|
|
"step": 330,
|
|
"valid_targets_mean": 4616.2,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 0.5254901960784314,
|
|
"grad_norm": 0.615191724266002,
|
|
"learning_rate": 2.988814317673378e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16712582111358643,
|
|
"step": 335,
|
|
"valid_targets_mean": 4441.6,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 0.5333333333333333,
|
|
"grad_norm": 0.5891783226631264,
|
|
"learning_rate": 3.033557046979866e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17673718929290771,
|
|
"step": 340,
|
|
"valid_targets_mean": 4994.9,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 0.5411764705882353,
|
|
"grad_norm": 0.5602456880109379,
|
|
"learning_rate": 3.078299776286353e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12272541970014572,
|
|
"step": 345,
|
|
"valid_targets_mean": 4047.5,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 0.5490196078431373,
|
|
"grad_norm": 0.5463989570288924,
|
|
"learning_rate": 3.123042505592841e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16012008488178253,
|
|
"step": 350,
|
|
"valid_targets_mean": 5549.2,
|
|
"valid_targets_min": 2882
|
|
},
|
|
{
|
|
"epoch": 0.5568627450980392,
|
|
"grad_norm": 0.5817982029983223,
|
|
"learning_rate": 3.167785234899329e-05,
|
|
"loss": 0.3295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17968128621578217,
|
|
"step": 355,
|
|
"valid_targets_mean": 5364.1,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 0.5647058823529412,
|
|
"grad_norm": 0.5606408079082489,
|
|
"learning_rate": 3.212527964205817e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18859368562698364,
|
|
"step": 360,
|
|
"valid_targets_mean": 5831.8,
|
|
"valid_targets_min": 3932
|
|
},
|
|
{
|
|
"epoch": 0.5725490196078431,
|
|
"grad_norm": 0.6040131290277008,
|
|
"learning_rate": 3.257270693512304e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17811569571495056,
|
|
"step": 365,
|
|
"valid_targets_mean": 5092.4,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 0.5803921568627451,
|
|
"grad_norm": 0.5323257843878618,
|
|
"learning_rate": 3.3020134228187924e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1304280161857605,
|
|
"step": 370,
|
|
"valid_targets_mean": 4231.9,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 0.5501225667538716,
|
|
"learning_rate": 3.34675615212528e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15985777974128723,
|
|
"step": 375,
|
|
"valid_targets_mean": 4744.2,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 0.596078431372549,
|
|
"grad_norm": 0.6070915816256318,
|
|
"learning_rate": 3.391498881431768e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14768286049365997,
|
|
"step": 380,
|
|
"valid_targets_mean": 4678.5,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 0.6039215686274509,
|
|
"grad_norm": 0.5676478206539363,
|
|
"learning_rate": 3.436241610738255e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18851745128631592,
|
|
"step": 385,
|
|
"valid_targets_mean": 5673.2,
|
|
"valid_targets_min": 4270
|
|
},
|
|
{
|
|
"epoch": 0.611764705882353,
|
|
"grad_norm": 0.6294438971771676,
|
|
"learning_rate": 3.4809843400447434e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16219162940979004,
|
|
"step": 390,
|
|
"valid_targets_mean": 4658.5,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 0.6196078431372549,
|
|
"grad_norm": 0.5517350373337631,
|
|
"learning_rate": 3.525727069351231e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15507352352142334,
|
|
"step": 395,
|
|
"valid_targets_mean": 5542.0,
|
|
"valid_targets_min": 3454
|
|
},
|
|
{
|
|
"epoch": 0.6274509803921569,
|
|
"grad_norm": 0.5402705972646643,
|
|
"learning_rate": 3.570469798657719e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2057492733001709,
|
|
"step": 400,
|
|
"valid_targets_mean": 6888.8,
|
|
"valid_targets_min": 3784
|
|
},
|
|
{
|
|
"epoch": 0.6352941176470588,
|
|
"grad_norm": 0.5745876216369237,
|
|
"learning_rate": 3.6152125279642063e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13152286410331726,
|
|
"step": 405,
|
|
"valid_targets_mean": 4793.1,
|
|
"valid_targets_min": 4053
|
|
},
|
|
{
|
|
"epoch": 0.6431372549019608,
|
|
"grad_norm": 0.6031541334068352,
|
|
"learning_rate": 3.659955257270694e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18968364596366882,
|
|
"step": 410,
|
|
"valid_targets_mean": 5629.0,
|
|
"valid_targets_min": 4213
|
|
},
|
|
{
|
|
"epoch": 0.6509803921568628,
|
|
"grad_norm": 0.6164309927080983,
|
|
"learning_rate": 3.704697986577181e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16448290646076202,
|
|
"step": 415,
|
|
"valid_targets_mean": 4735.1,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 0.6588235294117647,
|
|
"grad_norm": 0.5359492226720084,
|
|
"learning_rate": 3.749440715883669e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19872182607650757,
|
|
"step": 420,
|
|
"valid_targets_mean": 5724.0,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 0.5819044959604062,
|
|
"learning_rate": 3.794183445190157e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14885927736759186,
|
|
"step": 425,
|
|
"valid_targets_mean": 5043.6,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 0.6745098039215687,
|
|
"grad_norm": 0.5752622619650046,
|
|
"learning_rate": 3.838926174496644e-05,
|
|
"loss": 0.3206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14092881977558136,
|
|
"step": 430,
|
|
"valid_targets_mean": 4815.8,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.6823529411764706,
|
|
"grad_norm": 0.49132427520732735,
|
|
"learning_rate": 3.883668903803132e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14789029955863953,
|
|
"step": 435,
|
|
"valid_targets_mean": 5252.1,
|
|
"valid_targets_min": 3838
|
|
},
|
|
{
|
|
"epoch": 0.6901960784313725,
|
|
"grad_norm": 0.48093782694535336,
|
|
"learning_rate": 3.9284116331096196e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13488665223121643,
|
|
"step": 440,
|
|
"valid_targets_mean": 5491.2,
|
|
"valid_targets_min": 3145
|
|
},
|
|
{
|
|
"epoch": 0.6980392156862745,
|
|
"grad_norm": 0.5571196071755746,
|
|
"learning_rate": 3.973154362416108e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19611987471580505,
|
|
"step": 445,
|
|
"valid_targets_mean": 5789.0,
|
|
"valid_targets_min": 3587
|
|
},
|
|
{
|
|
"epoch": 0.7058823529411765,
|
|
"grad_norm": 0.6046907322781063,
|
|
"learning_rate": 3.999997555873748e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14690479636192322,
|
|
"step": 450,
|
|
"valid_targets_mean": 5491.9,
|
|
"valid_targets_min": 3758
|
|
},
|
|
{
|
|
"epoch": 0.7137254901960784,
|
|
"grad_norm": 0.5492823367274015,
|
|
"learning_rate": 3.999970059522009e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14941585063934326,
|
|
"step": 455,
|
|
"valid_targets_mean": 4707.6,
|
|
"valid_targets_min": 3849
|
|
},
|
|
{
|
|
"epoch": 0.7215686274509804,
|
|
"grad_norm": 0.5380082427998023,
|
|
"learning_rate": 3.9999120120821446e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24840714037418365,
|
|
"step": 460,
|
|
"valid_targets_mean": 6441.1,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 0.7294117647058823,
|
|
"grad_norm": 0.5596379062224002,
|
|
"learning_rate": 3.999823414440874e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13548262417316437,
|
|
"step": 465,
|
|
"valid_targets_mean": 4467.9,
|
|
"valid_targets_min": 2927
|
|
},
|
|
{
|
|
"epoch": 0.7372549019607844,
|
|
"grad_norm": 0.5356703634829085,
|
|
"learning_rate": 3.999704267951594e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15319597721099854,
|
|
"step": 470,
|
|
"valid_targets_mean": 4402.6,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 0.7450980392156863,
|
|
"grad_norm": 0.600066524630049,
|
|
"learning_rate": 3.999554574434359e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11718227714300156,
|
|
"step": 475,
|
|
"valid_targets_mean": 3519.2,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 0.7529411764705882,
|
|
"grad_norm": 0.6098828990516277,
|
|
"learning_rate": 3.999374336175854e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1625547856092453,
|
|
"step": 480,
|
|
"valid_targets_mean": 4139.1,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.7607843137254902,
|
|
"grad_norm": 0.5371084402168345,
|
|
"learning_rate": 3.999163555929357e-05,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14883488416671753,
|
|
"step": 485,
|
|
"valid_targets_mean": 4719.6,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 0.7686274509803922,
|
|
"grad_norm": 0.5528904483847205,
|
|
"learning_rate": 3.9989222369147e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.149824321269989,
|
|
"step": 490,
|
|
"valid_targets_mean": 5001.2,
|
|
"valid_targets_min": 3946
|
|
},
|
|
{
|
|
"epoch": 0.7764705882352941,
|
|
"grad_norm": 0.6173104751559979,
|
|
"learning_rate": 3.998650382818217e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23142695426940918,
|
|
"step": 495,
|
|
"valid_targets_mean": 5869.8,
|
|
"valid_targets_min": 3481
|
|
},
|
|
{
|
|
"epoch": 0.7843137254901961,
|
|
"grad_norm": 0.5310347193247758,
|
|
"learning_rate": 3.998347997792689e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1767919659614563,
|
|
"step": 500,
|
|
"valid_targets_mean": 5104.8,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 0.792156862745098,
|
|
"grad_norm": 0.5225823121767637,
|
|
"learning_rate": 3.9980150864572815e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1493397206068039,
|
|
"step": 505,
|
|
"valid_targets_mean": 5159.4,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.5473700973149909,
|
|
"learning_rate": 3.997651653897472e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430482566356659,
|
|
"step": 510,
|
|
"valid_targets_mean": 4216.4,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.807843137254902,
|
|
"grad_norm": 0.5527704785980678,
|
|
"learning_rate": 3.997257705664974e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1335963010787964,
|
|
"step": 515,
|
|
"valid_targets_mean": 4604.4,
|
|
"valid_targets_min": 3499
|
|
},
|
|
{
|
|
"epoch": 0.8156862745098039,
|
|
"grad_norm": 0.5146849440416694,
|
|
"learning_rate": 3.9968332477776505e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11378350108861923,
|
|
"step": 520,
|
|
"valid_targets_mean": 4262.4,
|
|
"valid_targets_min": 3479
|
|
},
|
|
{
|
|
"epoch": 0.8235294117647058,
|
|
"grad_norm": 0.5786673735607119,
|
|
"learning_rate": 3.996378286719425e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1584271490573883,
|
|
"step": 525,
|
|
"valid_targets_mean": 4425.1,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 0.8313725490196079,
|
|
"grad_norm": 0.48691241061943,
|
|
"learning_rate": 3.995892829440178e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14986231923103333,
|
|
"step": 530,
|
|
"valid_targets_mean": 5350.5,
|
|
"valid_targets_min": 3222
|
|
},
|
|
{
|
|
"epoch": 0.8392156862745098,
|
|
"grad_norm": 0.6882772998345945,
|
|
"learning_rate": 3.995376883355645e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16301119327545166,
|
|
"step": 535,
|
|
"valid_targets_mean": 5388.0,
|
|
"valid_targets_min": 3841
|
|
},
|
|
{
|
|
"epoch": 0.8470588235294118,
|
|
"grad_norm": 0.5504142868498775,
|
|
"learning_rate": 3.994830456347302e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13605545461177826,
|
|
"step": 540,
|
|
"valid_targets_mean": 4475.0,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 0.8549019607843137,
|
|
"grad_norm": 0.552541153109021,
|
|
"learning_rate": 3.994253556762243e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1425020694732666,
|
|
"step": 545,
|
|
"valid_targets_mean": 4783.4,
|
|
"valid_targets_min": 4019
|
|
},
|
|
{
|
|
"epoch": 0.8627450980392157,
|
|
"grad_norm": 0.516174073782907,
|
|
"learning_rate": 3.993646193413053e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12146864831447601,
|
|
"step": 550,
|
|
"valid_targets_mean": 4630.2,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 0.8705882352941177,
|
|
"grad_norm": 0.5114622919919676,
|
|
"learning_rate": 3.99300837557768e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.129593163728714,
|
|
"step": 555,
|
|
"valid_targets_mean": 4601.2,
|
|
"valid_targets_min": 3465
|
|
},
|
|
{
|
|
"epoch": 0.8784313725490196,
|
|
"grad_norm": 0.6180845077447936,
|
|
"learning_rate": 3.9923401129992826e-05,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22494003176689148,
|
|
"step": 560,
|
|
"valid_targets_mean": 4843.1,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 0.8862745098039215,
|
|
"grad_norm": 0.5938338606255534,
|
|
"learning_rate": 3.991641415886089e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13986267149448395,
|
|
"step": 565,
|
|
"valid_targets_mean": 4873.4,
|
|
"valid_targets_min": 3458
|
|
},
|
|
{
|
|
"epoch": 0.8941176470588236,
|
|
"grad_norm": 0.5131012574265683,
|
|
"learning_rate": 3.990912294911236e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.141075998544693,
|
|
"step": 570,
|
|
"valid_targets_mean": 5001.8,
|
|
"valid_targets_min": 3638
|
|
},
|
|
{
|
|
"epoch": 0.9019607843137255,
|
|
"grad_norm": 0.5218539338715148,
|
|
"learning_rate": 3.99015276121261e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18449991941452026,
|
|
"step": 575,
|
|
"valid_targets_mean": 7555.2,
|
|
"valid_targets_min": 4641
|
|
},
|
|
{
|
|
"epoch": 0.9098039215686274,
|
|
"grad_norm": 0.5584481822366371,
|
|
"learning_rate": 3.989362826392676e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15493915975093842,
|
|
"step": 580,
|
|
"valid_targets_mean": 5178.5,
|
|
"valid_targets_min": 3262
|
|
},
|
|
{
|
|
"epoch": 0.9176470588235294,
|
|
"grad_norm": 0.5330193946594204,
|
|
"learning_rate": 3.9885425025183e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12677499651908875,
|
|
"step": 585,
|
|
"valid_targets_mean": 4999.5,
|
|
"valid_targets_min": 3634
|
|
},
|
|
{
|
|
"epoch": 0.9254901960784314,
|
|
"grad_norm": 0.555409429804303,
|
|
"learning_rate": 3.9876918021205606e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16259485483169556,
|
|
"step": 590,
|
|
"valid_targets_mean": 4657.9,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 0.9333333333333333,
|
|
"grad_norm": 0.5413694736991173,
|
|
"learning_rate": 3.9868107381945646e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14090581238269806,
|
|
"step": 595,
|
|
"valid_targets_mean": 4137.2,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 0.9411764705882353,
|
|
"grad_norm": 0.5097233576500176,
|
|
"learning_rate": 3.9858993241992454e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16496001183986664,
|
|
"step": 600,
|
|
"valid_targets_mean": 5535.8,
|
|
"valid_targets_min": 3537
|
|
},
|
|
{
|
|
"epoch": 0.9490196078431372,
|
|
"grad_norm": 0.565606495791218,
|
|
"learning_rate": 3.984957574057155e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1719355583190918,
|
|
"step": 605,
|
|
"valid_targets_mean": 5035.8,
|
|
"valid_targets_min": 3531
|
|
},
|
|
{
|
|
"epoch": 0.9568627450980393,
|
|
"grad_norm": 0.5662622685590348,
|
|
"learning_rate": 3.983985502154254e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1330733746290207,
|
|
"step": 610,
|
|
"valid_targets_mean": 4509.8,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 0.9647058823529412,
|
|
"grad_norm": 0.650042780936349,
|
|
"learning_rate": 3.982983123339694e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16050265729427338,
|
|
"step": 615,
|
|
"valid_targets_mean": 4618.6,
|
|
"valid_targets_min": 3628
|
|
},
|
|
{
|
|
"epoch": 0.9725490196078431,
|
|
"grad_norm": 0.4789311071288452,
|
|
"learning_rate": 3.9819504529255836e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1295998990535736,
|
|
"step": 620,
|
|
"valid_targets_mean": 5019.6,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 0.9803921568627451,
|
|
"grad_norm": 0.5185497844397633,
|
|
"learning_rate": 3.980887506686763e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11702439188957214,
|
|
"step": 625,
|
|
"valid_targets_mean": 4191.5,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 0.9882352941176471,
|
|
"grad_norm": 0.5290223238660421,
|
|
"learning_rate": 3.9797943008605575e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14580155909061432,
|
|
"step": 630,
|
|
"valid_targets_mean": 4653.8,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 0.996078431372549,
|
|
"grad_norm": 0.5266373986466892,
|
|
"learning_rate": 3.978670852146529e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13359291851520538,
|
|
"step": 635,
|
|
"valid_targets_mean": 4390.6,
|
|
"valid_targets_min": 3725
|
|
},
|
|
{
|
|
"epoch": 1.0031372549019608,
|
|
"grad_norm": 0.47547522888568966,
|
|
"learning_rate": 3.977517177706226e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12808755040168762,
|
|
"step": 640,
|
|
"valid_targets_mean": 5032.6,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 1.0109803921568628,
|
|
"grad_norm": 0.5103283050856352,
|
|
"learning_rate": 3.976333295162918e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15815523266792297,
|
|
"step": 645,
|
|
"valid_targets_mean": 4782.9,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 1.0188235294117647,
|
|
"grad_norm": 0.5437229569859977,
|
|
"learning_rate": 3.975119222601323e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1376689076423645,
|
|
"step": 650,
|
|
"valid_targets_mean": 4856.1,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 1.0266666666666666,
|
|
"grad_norm": 0.47980311335397074,
|
|
"learning_rate": 3.9738749785673406e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15655337274074554,
|
|
"step": 655,
|
|
"valid_targets_mean": 4985.4,
|
|
"valid_targets_min": 3526
|
|
},
|
|
{
|
|
"epoch": 1.0345098039215685,
|
|
"grad_norm": 0.6311205327783157,
|
|
"learning_rate": 3.972600582067758e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1531863808631897,
|
|
"step": 660,
|
|
"valid_targets_mean": 4498.0,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 1.0423529411764705,
|
|
"grad_norm": 0.5716799603731424,
|
|
"learning_rate": 3.971296052569967e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1194576546549797,
|
|
"step": 665,
|
|
"valid_targets_mean": 4117.6,
|
|
"valid_targets_min": 3617
|
|
},
|
|
{
|
|
"epoch": 1.0501960784313726,
|
|
"grad_norm": 0.4684618466869646,
|
|
"learning_rate": 3.9699614100016634e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16516944766044617,
|
|
"step": 670,
|
|
"valid_targets_mean": 5447.8,
|
|
"valid_targets_min": 4101
|
|
},
|
|
{
|
|
"epoch": 1.0580392156862746,
|
|
"grad_norm": 0.49367470728777396,
|
|
"learning_rate": 3.968596674750545e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14375250041484833,
|
|
"step": 675,
|
|
"valid_targets_mean": 5414.5,
|
|
"valid_targets_min": 3837
|
|
},
|
|
{
|
|
"epoch": 1.0658823529411765,
|
|
"grad_norm": 0.46044503507781187,
|
|
"learning_rate": 3.967201867663996e-05,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16253119707107544,
|
|
"step": 680,
|
|
"valid_targets_mean": 6025.1,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 1.0737254901960784,
|
|
"grad_norm": 0.6024045182349768,
|
|
"learning_rate": 3.9657770100487736e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13948144018650055,
|
|
"step": 685,
|
|
"valid_targets_mean": 5004.2,
|
|
"valid_targets_min": 3757
|
|
},
|
|
{
|
|
"epoch": 1.0815686274509804,
|
|
"grad_norm": 0.5157512027527342,
|
|
"learning_rate": 3.964322123670678e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1478308141231537,
|
|
"step": 690,
|
|
"valid_targets_mean": 4180.2,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 1.0894117647058823,
|
|
"grad_norm": 0.5002569272875327,
|
|
"learning_rate": 3.9628372307542225e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1788061261177063,
|
|
"step": 695,
|
|
"valid_targets_mean": 5319.0,
|
|
"valid_targets_min": 2231
|
|
},
|
|
{
|
|
"epoch": 1.0972549019607842,
|
|
"grad_norm": 0.5100097718835788,
|
|
"learning_rate": 3.961322353982295e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1128082424402237,
|
|
"step": 700,
|
|
"valid_targets_mean": 4365.6,
|
|
"valid_targets_min": 3235
|
|
},
|
|
{
|
|
"epoch": 1.1050980392156862,
|
|
"grad_norm": 0.5109172456711149,
|
|
"learning_rate": 3.959777516495809e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1364116221666336,
|
|
"step": 705,
|
|
"valid_targets_mean": 3845.6,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 1.1129411764705883,
|
|
"grad_norm": 0.49047092157775113,
|
|
"learning_rate": 3.95820274189335e-05,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17177531123161316,
|
|
"step": 710,
|
|
"valid_targets_mean": 5469.4,
|
|
"valid_targets_min": 4127
|
|
},
|
|
{
|
|
"epoch": 1.1207843137254903,
|
|
"grad_norm": 0.4843667352820801,
|
|
"learning_rate": 3.956598054230816e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13446100056171417,
|
|
"step": 715,
|
|
"valid_targets_mean": 4805.4,
|
|
"valid_targets_min": 3414
|
|
},
|
|
{
|
|
"epoch": 1.1286274509803922,
|
|
"grad_norm": 0.5326626591489633,
|
|
"learning_rate": 3.9549634780210536e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15380431711673737,
|
|
"step": 720,
|
|
"valid_targets_mean": 5294.5,
|
|
"valid_targets_min": 3954
|
|
},
|
|
{
|
|
"epoch": 1.1364705882352941,
|
|
"grad_norm": 0.506684867506722,
|
|
"learning_rate": 3.953299038233476e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16172771155834198,
|
|
"step": 725,
|
|
"valid_targets_mean": 6202.2,
|
|
"valid_targets_min": 4193
|
|
},
|
|
{
|
|
"epoch": 1.144313725490196,
|
|
"grad_norm": 0.49900322782201706,
|
|
"learning_rate": 3.9516047602936864e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386362463235855,
|
|
"step": 730,
|
|
"valid_targets_mean": 5224.0,
|
|
"valid_targets_min": 3954
|
|
},
|
|
{
|
|
"epoch": 1.152156862745098,
|
|
"grad_norm": 0.5485013506223598,
|
|
"learning_rate": 3.949880670083091e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1538248062133789,
|
|
"step": 735,
|
|
"valid_targets_mean": 4433.6,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"grad_norm": 0.5114165877370134,
|
|
"learning_rate": 3.9481267939385e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17208515107631683,
|
|
"step": 740,
|
|
"valid_targets_mean": 4607.8,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 1.167843137254902,
|
|
"grad_norm": 0.5140897776212602,
|
|
"learning_rate": 3.946343158651725e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19184085726737976,
|
|
"step": 745,
|
|
"valid_targets_mean": 6692.2,
|
|
"valid_targets_min": 4235
|
|
},
|
|
{
|
|
"epoch": 1.175686274509804,
|
|
"grad_norm": 0.510349829135783,
|
|
"learning_rate": 3.944529791469175e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15706050395965576,
|
|
"step": 750,
|
|
"valid_targets_mean": 5801.4,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 1.183529411764706,
|
|
"grad_norm": 0.5139210232913193,
|
|
"learning_rate": 3.9426867200914355e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11696702986955643,
|
|
"step": 755,
|
|
"valid_targets_mean": 4200.5,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 1.1913725490196079,
|
|
"grad_norm": 0.6138467891844176,
|
|
"learning_rate": 3.9408139726728444e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13788250088691711,
|
|
"step": 760,
|
|
"valid_targets_mean": 4887.9,
|
|
"valid_targets_min": 3979
|
|
},
|
|
{
|
|
"epoch": 1.1992156862745098,
|
|
"grad_norm": 0.5274585474600504,
|
|
"learning_rate": 3.9389115778210666e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15761765837669373,
|
|
"step": 765,
|
|
"valid_targets_mean": 5229.5,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 1.2070588235294117,
|
|
"grad_norm": 0.5045239215906021,
|
|
"learning_rate": 3.936979564596653e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14591068029403687,
|
|
"step": 770,
|
|
"valid_targets_mean": 4233.5,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 1.2149019607843137,
|
|
"grad_norm": 0.5278256503203437,
|
|
"learning_rate": 3.935017962512599e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11342287063598633,
|
|
"step": 775,
|
|
"valid_targets_mean": 4776.5,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 1.2227450980392156,
|
|
"grad_norm": 0.47582043807758717,
|
|
"learning_rate": 3.933026801533893e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15707777440547943,
|
|
"step": 780,
|
|
"valid_targets_mean": 5618.9,
|
|
"valid_targets_min": 4369
|
|
},
|
|
{
|
|
"epoch": 1.2305882352941175,
|
|
"grad_norm": 0.45934059705023184,
|
|
"learning_rate": 3.9310061120770556e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13776640594005585,
|
|
"step": 785,
|
|
"valid_targets_mean": 5107.0,
|
|
"valid_targets_min": 4087
|
|
},
|
|
{
|
|
"epoch": 1.2384313725490197,
|
|
"grad_norm": 0.46414624555908107,
|
|
"learning_rate": 3.928955925009682e-05,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1537836790084839,
|
|
"step": 790,
|
|
"valid_targets_mean": 4994.2,
|
|
"valid_targets_min": 3250
|
|
},
|
|
{
|
|
"epoch": 1.2462745098039216,
|
|
"grad_norm": 0.4408619533536079,
|
|
"learning_rate": 3.9268762716499615e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13994896411895752,
|
|
"step": 795,
|
|
"valid_targets_mean": 6135.1,
|
|
"valid_targets_min": 3933
|
|
},
|
|
{
|
|
"epoch": 1.2541176470588236,
|
|
"grad_norm": 0.4736658560562933,
|
|
"learning_rate": 3.924767183766208e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12094531208276749,
|
|
"step": 800,
|
|
"valid_targets_mean": 4518.0,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 1.2619607843137255,
|
|
"grad_norm": 0.5874137322174587,
|
|
"learning_rate": 3.922628693576369e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14845438301563263,
|
|
"step": 805,
|
|
"valid_targets_mean": 4616.0,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 1.2698039215686274,
|
|
"grad_norm": 0.47373845837495127,
|
|
"learning_rate": 3.9204608337475323e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442110240459442,
|
|
"step": 810,
|
|
"valid_targets_mean": 5869.6,
|
|
"valid_targets_min": 4108
|
|
},
|
|
{
|
|
"epoch": 1.2776470588235294,
|
|
"grad_norm": 0.5594446464578944,
|
|
"learning_rate": 3.9182636373954345e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18353107571601868,
|
|
"step": 815,
|
|
"valid_targets_mean": 5140.8,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 1.2854901960784313,
|
|
"grad_norm": 0.4574990179860623,
|
|
"learning_rate": 3.916037138083947e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12598153948783875,
|
|
"step": 820,
|
|
"valid_targets_mean": 5185.1,
|
|
"valid_targets_min": 3440
|
|
},
|
|
{
|
|
"epoch": 1.2933333333333334,
|
|
"grad_norm": 0.40740564941785307,
|
|
"learning_rate": 3.913781369824567e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14568153023719788,
|
|
"step": 825,
|
|
"valid_targets_mean": 6277.1,
|
|
"valid_targets_min": 3898
|
|
},
|
|
{
|
|
"epoch": 1.3011764705882354,
|
|
"grad_norm": 0.44929897069661345,
|
|
"learning_rate": 3.911496367075897e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08832447975873947,
|
|
"step": 830,
|
|
"valid_targets_mean": 4550.4,
|
|
"valid_targets_min": 3266
|
|
},
|
|
{
|
|
"epoch": 1.3090196078431373,
|
|
"grad_norm": 0.4838672180760848,
|
|
"learning_rate": 3.909182164743122e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1226353794336319,
|
|
"step": 835,
|
|
"valid_targets_mean": 5036.8,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 1.3168627450980392,
|
|
"grad_norm": 0.5483780903684279,
|
|
"learning_rate": 3.906838798177469e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13118325173854828,
|
|
"step": 840,
|
|
"valid_targets_mean": 4114.9,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 1.3247058823529412,
|
|
"grad_norm": 0.46795419074341116,
|
|
"learning_rate": 3.904466303175674e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13329827785491943,
|
|
"step": 845,
|
|
"valid_targets_mean": 5061.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.332549019607843,
|
|
"grad_norm": 0.5731984387593678,
|
|
"learning_rate": 3.90206471597943e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1332540065050125,
|
|
"step": 850,
|
|
"valid_targets_mean": 4545.1,
|
|
"valid_targets_min": 3721
|
|
},
|
|
{
|
|
"epoch": 1.340392156862745,
|
|
"grad_norm": 0.493596072249093,
|
|
"learning_rate": 3.8996340732748396e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15036369860172272,
|
|
"step": 855,
|
|
"valid_targets_mean": 5181.5,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 1.348235294117647,
|
|
"grad_norm": 0.5153894528113263,
|
|
"learning_rate": 3.8971744121918465e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13004739582538605,
|
|
"step": 860,
|
|
"valid_targets_mean": 5174.0,
|
|
"valid_targets_min": 4119
|
|
},
|
|
{
|
|
"epoch": 1.356078431372549,
|
|
"grad_norm": 0.531726998826914,
|
|
"learning_rate": 3.894685770303675e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16023707389831543,
|
|
"step": 865,
|
|
"valid_targets_mean": 4943.9,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 1.3639215686274508,
|
|
"grad_norm": 0.4984274476423372,
|
|
"learning_rate": 3.8921681856262535e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15315961837768555,
|
|
"step": 870,
|
|
"valid_targets_mean": 5614.4,
|
|
"valid_targets_min": 3721
|
|
},
|
|
{
|
|
"epoch": 1.371764705882353,
|
|
"grad_norm": 0.4524127448562263,
|
|
"learning_rate": 3.889621696617633e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313284933567047,
|
|
"step": 875,
|
|
"valid_targets_mean": 4431.6,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 1.379607843137255,
|
|
"grad_norm": 0.578828012700717,
|
|
"learning_rate": 3.887046342177401e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15103895962238312,
|
|
"step": 880,
|
|
"valid_targets_mean": 3760.1,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 1.3874509803921569,
|
|
"grad_norm": 0.47108002593851134,
|
|
"learning_rate": 3.884442161646086e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1282312572002411,
|
|
"step": 885,
|
|
"valid_targets_mean": 4732.4,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 1.3952941176470588,
|
|
"grad_norm": 0.46403908903025914,
|
|
"learning_rate": 3.881809194804559e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14507359266281128,
|
|
"step": 890,
|
|
"valid_targets_mean": 5339.4,
|
|
"valid_targets_min": 3849
|
|
},
|
|
{
|
|
"epoch": 1.4031372549019607,
|
|
"grad_norm": 0.5164491186153499,
|
|
"learning_rate": 3.879147481873423e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14479577541351318,
|
|
"step": 895,
|
|
"valid_targets_mean": 4245.0,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 1.4109803921568629,
|
|
"grad_norm": 0.47262134352541796,
|
|
"learning_rate": 3.876457063512399e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14927907288074493,
|
|
"step": 900,
|
|
"valid_targets_mean": 5170.5,
|
|
"valid_targets_min": 3806
|
|
},
|
|
{
|
|
"epoch": 1.4188235294117648,
|
|
"grad_norm": 0.48030885275086166,
|
|
"learning_rate": 3.873737980819707e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1346939206123352,
|
|
"step": 905,
|
|
"valid_targets_mean": 4743.6,
|
|
"valid_targets_min": 2538
|
|
},
|
|
{
|
|
"epoch": 1.4266666666666667,
|
|
"grad_norm": 0.5198341770217074,
|
|
"learning_rate": 3.870990275331437e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17289170622825623,
|
|
"step": 910,
|
|
"valid_targets_mean": 4804.0,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 1.4345098039215687,
|
|
"grad_norm": 0.45328710388368715,
|
|
"learning_rate": 3.8682139890209124e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17157664895057678,
|
|
"step": 915,
|
|
"valid_targets_mean": 6150.1,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 1.4423529411764706,
|
|
"grad_norm": 0.5085841911130472,
|
|
"learning_rate": 3.865409164298052e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13336902856826782,
|
|
"step": 920,
|
|
"valid_targets_mean": 4057.1,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 1.4501960784313725,
|
|
"grad_norm": 0.4994287763598936,
|
|
"learning_rate": 3.8625758440087213e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16101914644241333,
|
|
"step": 925,
|
|
"valid_targets_mean": 4822.4,
|
|
"valid_targets_min": 3827
|
|
},
|
|
{
|
|
"epoch": 1.4580392156862745,
|
|
"grad_norm": 0.4941923929879018,
|
|
"learning_rate": 3.859714071434078e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16710661351680756,
|
|
"step": 930,
|
|
"valid_targets_mean": 5267.9,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 1.4658823529411764,
|
|
"grad_norm": 0.48327572840974614,
|
|
"learning_rate": 3.8568238902899085e-05,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417180448770523,
|
|
"step": 935,
|
|
"valid_targets_mean": 4978.8,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 1.4737254901960783,
|
|
"grad_norm": 0.44695203493729635,
|
|
"learning_rate": 3.853905344725963e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13325612246990204,
|
|
"step": 940,
|
|
"valid_targets_mean": 5665.6,
|
|
"valid_targets_min": 3905
|
|
},
|
|
{
|
|
"epoch": 1.4815686274509803,
|
|
"grad_norm": 0.5099534725059365,
|
|
"learning_rate": 3.850958479325281e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12173628807067871,
|
|
"step": 945,
|
|
"valid_targets_mean": 4427.0,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 1.4894117647058824,
|
|
"grad_norm": 0.5134884915273966,
|
|
"learning_rate": 3.8479833391035085e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11635063588619232,
|
|
"step": 950,
|
|
"valid_targets_mean": 4344.9,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 1.4972549019607844,
|
|
"grad_norm": 0.480513750698937,
|
|
"learning_rate": 3.844979969508211e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17787393927574158,
|
|
"step": 955,
|
|
"valid_targets_mean": 5552.0,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.5050980392156863,
|
|
"grad_norm": 0.4766337557657441,
|
|
"learning_rate": 3.84194841641818e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378069669008255,
|
|
"step": 960,
|
|
"valid_targets_mean": 4584.0,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 1.5129411764705882,
|
|
"grad_norm": 0.44495383748209494,
|
|
"learning_rate": 3.838888726142732e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1239204853773117,
|
|
"step": 965,
|
|
"valid_targets_mean": 4935.8,
|
|
"valid_targets_min": 3706
|
|
},
|
|
{
|
|
"epoch": 1.5207843137254902,
|
|
"grad_norm": 0.4861707300267207,
|
|
"learning_rate": 3.8358009454210006e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1610291451215744,
|
|
"step": 970,
|
|
"valid_targets_mean": 5325.9,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 1.5286274509803923,
|
|
"grad_norm": 0.46843425756768353,
|
|
"learning_rate": 3.8326851214212206e-05,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10426193475723267,
|
|
"step": 975,
|
|
"valid_targets_mean": 3031.6,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.5364705882352943,
|
|
"grad_norm": 0.4787661485970747,
|
|
"learning_rate": 3.829541301740014e-05,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11571415513753891,
|
|
"step": 980,
|
|
"valid_targets_mean": 4581.1,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 1.5443137254901962,
|
|
"grad_norm": 0.4798542433376364,
|
|
"learning_rate": 3.826369534401653e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12648630142211914,
|
|
"step": 985,
|
|
"valid_targets_mean": 4808.4,
|
|
"valid_targets_min": 3196
|
|
},
|
|
{
|
|
"epoch": 1.5521568627450981,
|
|
"grad_norm": 0.4524354796896961,
|
|
"learning_rate": 3.823169867857337e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11299093067646027,
|
|
"step": 990,
|
|
"valid_targets_mean": 4355.0,
|
|
"valid_targets_min": 3430
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"grad_norm": 0.48021380766661625,
|
|
"learning_rate": 3.819942350984444e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11084786802530289,
|
|
"step": 995,
|
|
"valid_targets_mean": 4540.6,
|
|
"valid_targets_min": 4192
|
|
},
|
|
{
|
|
"epoch": 1.567843137254902,
|
|
"grad_norm": 0.45077386317800505,
|
|
"learning_rate": 3.816687033085788e-05,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14281418919563293,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4483.5,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 1.575686274509804,
|
|
"grad_norm": 0.49805538601357824,
|
|
"learning_rate": 3.813403963888866e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10800270736217499,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4369.6,
|
|
"valid_targets_min": 1821
|
|
},
|
|
{
|
|
"epoch": 1.5835294117647059,
|
|
"grad_norm": 0.4505476380153165,
|
|
"learning_rate": 3.810093193545097e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17713075876235962,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5999.9,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 1.5913725490196078,
|
|
"grad_norm": 0.4575194505901327,
|
|
"learning_rate": 3.806754772629055e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14325906336307526,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4706.1,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.5992156862745097,
|
|
"grad_norm": 0.45014150592540364,
|
|
"learning_rate": 3.8033887521377015e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14992156624794006,
|
|
"step": 1020,
|
|
"valid_targets_mean": 5189.2,
|
|
"valid_targets_min": 3654
|
|
},
|
|
{
|
|
"epoch": 1.6070588235294117,
|
|
"grad_norm": 0.4491623171932943,
|
|
"learning_rate": 3.799995183489599e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17521008849143982,
|
|
"step": 1025,
|
|
"valid_targets_mean": 6152.9,
|
|
"valid_targets_min": 4129
|
|
},
|
|
{
|
|
"epoch": 1.6149019607843136,
|
|
"grad_norm": 0.4964276596194231,
|
|
"learning_rate": 3.796574118524131e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12860007584095,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4406.8,
|
|
"valid_targets_min": 3784
|
|
},
|
|
{
|
|
"epoch": 1.6227450980392157,
|
|
"grad_norm": 0.45560423944040035,
|
|
"learning_rate": 3.793125609500709e-05,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1591438204050064,
|
|
"step": 1035,
|
|
"valid_targets_mean": 6584.6,
|
|
"valid_targets_min": 3599
|
|
},
|
|
{
|
|
"epoch": 1.6305882352941177,
|
|
"grad_norm": 0.4408094419647726,
|
|
"learning_rate": 3.789649709097973e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1180456355214119,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4745.6,
|
|
"valid_targets_min": 2823
|
|
},
|
|
{
|
|
"epoch": 1.6384313725490196,
|
|
"grad_norm": 0.48261746494460517,
|
|
"learning_rate": 3.786146470412988e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13682833313941956,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4307.1,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 1.6462745098039215,
|
|
"grad_norm": 0.5081155036574729,
|
|
"learning_rate": 3.782615946960432e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12664803862571716,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5285.6,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 1.6541176470588237,
|
|
"grad_norm": 0.4195569376421684,
|
|
"learning_rate": 3.779058192671777e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1745804250240326,
|
|
"step": 1055,
|
|
"valid_targets_mean": 6423.2,
|
|
"valid_targets_min": 3217
|
|
},
|
|
{
|
|
"epoch": 1.6619607843137256,
|
|
"grad_norm": 0.4698726821958353,
|
|
"learning_rate": 3.775473261894472e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16999664902687073,
|
|
"step": 1060,
|
|
"valid_targets_mean": 6138.6,
|
|
"valid_targets_min": 3941
|
|
},
|
|
{
|
|
"epoch": 1.6698039215686276,
|
|
"grad_norm": 0.41817128576043927,
|
|
"learning_rate": 3.771861209391103e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1361289620399475,
|
|
"step": 1065,
|
|
"valid_targets_mean": 5478.5,
|
|
"valid_targets_min": 3265
|
|
},
|
|
{
|
|
"epoch": 1.6776470588235295,
|
|
"grad_norm": 0.5313533412781013,
|
|
"learning_rate": 3.768222090338564e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13523992896080017,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3810.9,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 1.6854901960784314,
|
|
"grad_norm": 0.44784161225326163,
|
|
"learning_rate": 3.7645559603272104e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14014381170272827,
|
|
"step": 1075,
|
|
"valid_targets_mean": 5192.9,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 1.6933333333333334,
|
|
"grad_norm": 0.4745633540456,
|
|
"learning_rate": 3.76086287536001e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13647784292697906,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4379.9,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 1.7011764705882353,
|
|
"grad_norm": 0.4406930109228543,
|
|
"learning_rate": 3.757142891851691e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11379043757915497,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4284.6,
|
|
"valid_targets_min": 2690
|
|
},
|
|
{
|
|
"epoch": 1.7090196078431372,
|
|
"grad_norm": 0.4381588247078304,
|
|
"learning_rate": 3.753396066627876e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13208818435668945,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4889.5,
|
|
"valid_targets_min": 3500
|
|
},
|
|
{
|
|
"epoch": 1.7168627450980392,
|
|
"grad_norm": 0.4494074671045861,
|
|
"learning_rate": 3.749622456924215e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12918317317962646,
|
|
"step": 1095,
|
|
"valid_targets_mean": 5156.4,
|
|
"valid_targets_min": 3659
|
|
},
|
|
{
|
|
"epoch": 1.724705882352941,
|
|
"grad_norm": 0.459960762817952,
|
|
"learning_rate": 3.745822120385512e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11129502952098846,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4385.8,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 1.732549019607843,
|
|
"grad_norm": 0.47940383452188756,
|
|
"learning_rate": 3.7419951150648445e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10941147804260254,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3149.5,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 1.740392156862745,
|
|
"grad_norm": 0.4821955655374367,
|
|
"learning_rate": 3.738141499422677e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13078473508358002,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4533.4,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 1.748235294117647,
|
|
"grad_norm": 0.5819716977898828,
|
|
"learning_rate": 3.7342613323259654e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11359615623950958,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4173.2,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 1.756078431372549,
|
|
"grad_norm": 0.4021276805738122,
|
|
"learning_rate": 3.7303546730472607e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14241759479045868,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5989.4,
|
|
"valid_targets_min": 4148
|
|
},
|
|
{
|
|
"epoch": 1.763921568627451,
|
|
"grad_norm": 0.4882268950005422,
|
|
"learning_rate": 3.726421581263802e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1279492974281311,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4249.0,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 1.771764705882353,
|
|
"grad_norm": 0.4391268851806516,
|
|
"learning_rate": 3.722462117056607e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17066633701324463,
|
|
"step": 1130,
|
|
"valid_targets_mean": 5343.8,
|
|
"valid_targets_min": 3800
|
|
},
|
|
{
|
|
"epoch": 1.779607843137255,
|
|
"grad_norm": 0.41715519290088876,
|
|
"learning_rate": 3.718476340909548e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12390994280576706,
|
|
"step": 1135,
|
|
"valid_targets_mean": 5063.4,
|
|
"valid_targets_min": 3835
|
|
},
|
|
{
|
|
"epoch": 1.787450980392157,
|
|
"grad_norm": 0.43055363176019373,
|
|
"learning_rate": 3.714464313708439e-05,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12827664613723755,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5404.1,
|
|
"valid_targets_min": 3878
|
|
},
|
|
{
|
|
"epoch": 1.795294117647059,
|
|
"grad_norm": 0.4467983247147168,
|
|
"learning_rate": 3.710426096740094e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11275273561477661,
|
|
"step": 1145,
|
|
"valid_targets_mean": 5128.1,
|
|
"valid_targets_min": 3606
|
|
},
|
|
{
|
|
"epoch": 1.8031372549019609,
|
|
"grad_norm": 0.4532080165298866,
|
|
"learning_rate": 3.7063617516913974e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10102865099906921,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3786.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.8109803921568628,
|
|
"grad_norm": 0.45477210640708865,
|
|
"learning_rate": 3.7022713406483626e-05,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12230783700942993,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4558.8,
|
|
"valid_targets_min": 2708
|
|
},
|
|
{
|
|
"epoch": 1.8188235294117647,
|
|
"grad_norm": 0.4331370541967343,
|
|
"learning_rate": 3.698154926095177e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12535810470581055,
|
|
"step": 1160,
|
|
"valid_targets_mean": 5213.6,
|
|
"valid_targets_min": 3787
|
|
},
|
|
{
|
|
"epoch": 1.8266666666666667,
|
|
"grad_norm": 0.4121000726174262,
|
|
"learning_rate": 3.694012570913254e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1354580819606781,
|
|
"step": 1165,
|
|
"valid_targets_mean": 5838.6,
|
|
"valid_targets_min": 4390
|
|
},
|
|
{
|
|
"epoch": 1.8345098039215686,
|
|
"grad_norm": 0.41764883679327386,
|
|
"learning_rate": 3.689844338380271e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16744661331176758,
|
|
"step": 1170,
|
|
"valid_targets_mean": 6286.6,
|
|
"valid_targets_min": 3685
|
|
},
|
|
{
|
|
"epoch": 1.8423529411764705,
|
|
"grad_norm": 0.4777235188577736,
|
|
"learning_rate": 3.6856502921692004e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14872866868972778,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4034.6,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 1.8501960784313725,
|
|
"grad_norm": 0.4537327724299662,
|
|
"learning_rate": 3.681430496347339e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12654449045658112,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4453.2,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 1.8580392156862744,
|
|
"grad_norm": 0.4725361478945017,
|
|
"learning_rate": 3.677185015375329e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11899863183498383,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3511.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 1.8658823529411763,
|
|
"grad_norm": 0.43676745421678775,
|
|
"learning_rate": 3.672913914106173e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11573658138513565,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4290.5,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 1.8737254901960785,
|
|
"grad_norm": 0.4929717954552796,
|
|
"learning_rate": 3.6686172577842425e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13691729307174683,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4568.4,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 1.8815686274509804,
|
|
"grad_norm": 0.4405003687632648,
|
|
"learning_rate": 3.6642951120442834e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1263415813446045,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5306.5,
|
|
"valid_targets_min": 4122
|
|
},
|
|
{
|
|
"epoch": 1.8894117647058823,
|
|
"grad_norm": 0.44460183123100777,
|
|
"learning_rate": 3.6599475429104125e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20764252543449402,
|
|
"step": 1205,
|
|
"valid_targets_mean": 7194.6,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 1.8972549019607843,
|
|
"grad_norm": 0.44478346536222496,
|
|
"learning_rate": 3.655574616795108e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14065058529376984,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5352.9,
|
|
"valid_targets_min": 3593
|
|
},
|
|
{
|
|
"epoch": 1.9050980392156864,
|
|
"grad_norm": 0.4419280017991199,
|
|
"learning_rate": 3.651176400498194e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16233976185321808,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5459.9,
|
|
"valid_targets_min": 4407
|
|
},
|
|
{
|
|
"epoch": 1.9129411764705884,
|
|
"grad_norm": 0.46390239048982107,
|
|
"learning_rate": 3.646752961205825e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17894771695137024,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5047.5,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 1.9207843137254903,
|
|
"grad_norm": 0.4438782966995154,
|
|
"learning_rate": 3.642304366489453e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1729862093925476,
|
|
"step": 1225,
|
|
"valid_targets_mean": 6348.2,
|
|
"valid_targets_min": 4423
|
|
},
|
|
{
|
|
"epoch": 1.9286274509803922,
|
|
"grad_norm": 0.47402141722773866,
|
|
"learning_rate": 3.6378306843047996e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10692477226257324,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4371.1,
|
|
"valid_targets_min": 3719
|
|
},
|
|
{
|
|
"epoch": 1.9364705882352942,
|
|
"grad_norm": 0.43266903488313885,
|
|
"learning_rate": 3.6333319829908196e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1654496192932129,
|
|
"step": 1235,
|
|
"valid_targets_mean": 6174.9,
|
|
"valid_targets_min": 3664
|
|
},
|
|
{
|
|
"epoch": 1.944313725490196,
|
|
"grad_norm": 0.49592255861334467,
|
|
"learning_rate": 3.628808331268649e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1314307451248169,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4144.4,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 1.952156862745098,
|
|
"grad_norm": 0.435538874680241,
|
|
"learning_rate": 3.624259798240565e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15344339609146118,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5449.9,
|
|
"valid_targets_min": 3396
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"grad_norm": 0.5094650203239252,
|
|
"learning_rate": 3.6196864533889245e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12815450131893158,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4447.8,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 1.967843137254902,
|
|
"grad_norm": 0.45177257568112905,
|
|
"learning_rate": 3.615088366575104e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13006862998008728,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4545.5,
|
|
"valid_targets_min": 3821
|
|
},
|
|
{
|
|
"epoch": 1.9756862745098038,
|
|
"grad_norm": 0.4682992295938859,
|
|
"learning_rate": 3.610465608038432e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1345278024673462,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4651.2,
|
|
"valid_targets_min": 3695
|
|
},
|
|
{
|
|
"epoch": 1.9835294117647058,
|
|
"grad_norm": 0.46672262150503485,
|
|
"learning_rate": 3.605818248395118e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12447239458560944,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3983.6,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.9913725490196077,
|
|
"grad_norm": 0.4299468455762349,
|
|
"learning_rate": 3.6011463586371715e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11944080144166946,
|
|
"step": 1270,
|
|
"valid_targets_mean": 5400.0,
|
|
"valid_targets_min": 3763
|
|
},
|
|
{
|
|
"epoch": 1.9992156862745099,
|
|
"grad_norm": 0.43099766066618245,
|
|
"learning_rate": 3.596450010131319e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10978048294782639,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4126.8,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 2.0062745098039216,
|
|
"grad_norm": 0.4576887974176139,
|
|
"learning_rate": 3.5917292746179134e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14465484023094177,
|
|
"step": 1280,
|
|
"valid_targets_mean": 5874.0,
|
|
"valid_targets_min": 4065
|
|
},
|
|
{
|
|
"epoch": 2.0141176470588236,
|
|
"grad_norm": 0.45200663585781176,
|
|
"learning_rate": 3.586984224209837e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11500234156847,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4390.4,
|
|
"valid_targets_min": 3582
|
|
},
|
|
{
|
|
"epoch": 2.0219607843137255,
|
|
"grad_norm": 0.45078823865058354,
|
|
"learning_rate": 3.582214931391402e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13320954144001007,
|
|
"step": 1290,
|
|
"valid_targets_mean": 4999.0,
|
|
"valid_targets_min": 3822
|
|
},
|
|
{
|
|
"epoch": 2.0298039215686274,
|
|
"grad_norm": 0.48915435190703266,
|
|
"learning_rate": 3.5774214690172405e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15321773290634155,
|
|
"step": 1295,
|
|
"valid_targets_mean": 5010.4,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 2.0376470588235294,
|
|
"grad_norm": 0.43119415423933033,
|
|
"learning_rate": 3.572603910311196e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14951878786087036,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5940.9,
|
|
"valid_targets_min": 4218
|
|
},
|
|
{
|
|
"epoch": 2.0454901960784313,
|
|
"grad_norm": 0.453774158368299,
|
|
"learning_rate": 3.5677623288652e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12337134033441544,
|
|
"step": 1305,
|
|
"valid_targets_mean": 4268.1,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 2.0533333333333332,
|
|
"grad_norm": 0.43873462102134814,
|
|
"learning_rate": 3.5628967986381485e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12993843853473663,
|
|
"step": 1310,
|
|
"valid_targets_mean": 5040.6,
|
|
"valid_targets_min": 3270
|
|
},
|
|
{
|
|
"epoch": 2.061176470588235,
|
|
"grad_norm": 0.48445761598277753,
|
|
"learning_rate": 3.558007393954778e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1226993054151535,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4753.5,
|
|
"valid_targets_min": 3988
|
|
},
|
|
{
|
|
"epoch": 2.069019607843137,
|
|
"grad_norm": 0.4431437458469214,
|
|
"learning_rate": 3.553094189504522e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12933582067489624,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5278.0,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 2.076862745098039,
|
|
"grad_norm": 0.4925639699972867,
|
|
"learning_rate": 3.548157260340376e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12065766751766205,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4327.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 2.084705882352941,
|
|
"grad_norm": 0.4748173687132383,
|
|
"learning_rate": 3.5431966818777476e-05,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14784438908100128,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4556.0,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 2.0925490196078433,
|
|
"grad_norm": 0.46446365306399356,
|
|
"learning_rate": 3.5382125298933055e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13218021392822266,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4969.9,
|
|
"valid_targets_min": 3886
|
|
},
|
|
{
|
|
"epoch": 2.1003921568627453,
|
|
"grad_norm": 0.4024430729199942,
|
|
"learning_rate": 3.533204880523823e-05,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12806639075279236,
|
|
"step": 1340,
|
|
"valid_targets_mean": 6134.9,
|
|
"valid_targets_min": 3973
|
|
},
|
|
{
|
|
"epoch": 2.108235294117647,
|
|
"grad_norm": 0.49039396114872347,
|
|
"learning_rate": 3.528173810265015e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13579592108726501,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4923.4,
|
|
"valid_targets_min": 3575
|
|
},
|
|
{
|
|
"epoch": 2.116078431372549,
|
|
"grad_norm": 0.4547559535366543,
|
|
"learning_rate": 3.5231193959703654e-05,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17610886693000793,
|
|
"step": 1350,
|
|
"valid_targets_mean": 5835.4,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 2.123921568627451,
|
|
"grad_norm": 0.4335357581518014,
|
|
"learning_rate": 3.51804171484996e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15231119096279144,
|
|
"step": 1355,
|
|
"valid_targets_mean": 6393.4,
|
|
"valid_targets_min": 4455
|
|
},
|
|
{
|
|
"epoch": 2.131764705882353,
|
|
"grad_norm": 0.4262901053655905,
|
|
"learning_rate": 3.5129408444693014e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15080666542053223,
|
|
"step": 1360,
|
|
"valid_targets_mean": 5871.1,
|
|
"valid_targets_min": 3873
|
|
},
|
|
{
|
|
"epoch": 2.139607843137255,
|
|
"grad_norm": 0.41157978357484015,
|
|
"learning_rate": 3.507816862748126e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11613994836807251,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4608.5,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 2.147450980392157,
|
|
"grad_norm": 0.4393084572770216,
|
|
"learning_rate": 3.502669847959213e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12773963809013367,
|
|
"step": 1370,
|
|
"valid_targets_mean": 5043.8,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 2.155294117647059,
|
|
"grad_norm": 0.43224899587658944,
|
|
"learning_rate": 3.497499878727193e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14609867334365845,
|
|
"step": 1375,
|
|
"valid_targets_mean": 7183.4,
|
|
"valid_targets_min": 3707
|
|
},
|
|
{
|
|
"epoch": 2.1631372549019607,
|
|
"grad_norm": 0.44993852844779975,
|
|
"learning_rate": 3.49230703402734e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15001554787158966,
|
|
"step": 1380,
|
|
"valid_targets_mean": 5360.2,
|
|
"valid_targets_min": 3605
|
|
},
|
|
{
|
|
"epoch": 2.1709803921568627,
|
|
"grad_norm": 0.45145932766055635,
|
|
"learning_rate": 3.487091393184369e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10081503540277481,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3932.6,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.1788235294117646,
|
|
"grad_norm": 0.4630111233196212,
|
|
"learning_rate": 3.481853035871224e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15715688467025757,
|
|
"step": 1390,
|
|
"valid_targets_mean": 5147.0,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 2.1866666666666665,
|
|
"grad_norm": 0.42476980988232227,
|
|
"learning_rate": 3.476592042107862e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1355331540107727,
|
|
"step": 1395,
|
|
"valid_targets_mean": 5580.8,
|
|
"valid_targets_min": 3203
|
|
},
|
|
{
|
|
"epoch": 2.1945098039215685,
|
|
"grad_norm": 0.46012280923366145,
|
|
"learning_rate": 3.4713084922600274e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09466949850320816,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3824.4,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 2.2023529411764704,
|
|
"grad_norm": 0.45613351517254874,
|
|
"learning_rate": 3.466002467038028e-05,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14354060590267181,
|
|
"step": 1405,
|
|
"valid_targets_mean": 5275.2,
|
|
"valid_targets_min": 3251
|
|
},
|
|
{
|
|
"epoch": 2.2101960784313723,
|
|
"grad_norm": 0.45134079789119325,
|
|
"learning_rate": 3.460674047495497e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09967637807130814,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3815.8,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 2.2180392156862747,
|
|
"grad_norm": 0.4423793190213438,
|
|
"learning_rate": 3.455323315028164e-05,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12513357400894165,
|
|
"step": 1415,
|
|
"valid_targets_mean": 5017.8,
|
|
"valid_targets_min": 3620
|
|
},
|
|
{
|
|
"epoch": 2.2258823529411766,
|
|
"grad_norm": 0.48405737331229126,
|
|
"learning_rate": 3.449950351372599e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12709875404834747,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3627.4,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 2.2337254901960786,
|
|
"grad_norm": 0.44207806562612784,
|
|
"learning_rate": 3.444555238604974e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11383885145187378,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4074.1,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 2.2415686274509805,
|
|
"grad_norm": 0.45288162146105243,
|
|
"learning_rate": 3.439138059139808e-05,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13188298046588898,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5178.9,
|
|
"valid_targets_min": 3668
|
|
},
|
|
{
|
|
"epoch": 2.2494117647058824,
|
|
"grad_norm": 0.41893172335481055,
|
|
"learning_rate": 3.433698895728701e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11395653337240219,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5147.2,
|
|
"valid_targets_min": 3650
|
|
},
|
|
{
|
|
"epoch": 2.2572549019607844,
|
|
"grad_norm": 0.4463833706314381,
|
|
"learning_rate": 3.428237831459078e-05,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1425066888332367,
|
|
"step": 1440,
|
|
"valid_targets_mean": 5160.4,
|
|
"valid_targets_min": 3250
|
|
},
|
|
{
|
|
"epoch": 2.2650980392156863,
|
|
"grad_norm": 0.4628208133465244,
|
|
"learning_rate": 3.422754949752917e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1350829303264618,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5317.6,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 2.2729411764705882,
|
|
"grad_norm": 0.41564615419434575,
|
|
"learning_rate": 3.41725033436547e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.126197949051857,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4831.5,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 2.28078431372549,
|
|
"grad_norm": 0.4340917177276431,
|
|
"learning_rate": 3.411724069383993e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303197741508484,
|
|
"step": 1455,
|
|
"valid_targets_mean": 5435.0,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 2.288627450980392,
|
|
"grad_norm": 0.47085411749146966,
|
|
"learning_rate": 3.4061762392264545e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12932007014751434,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4807.9,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 2.296470588235294,
|
|
"grad_norm": 0.4517811739999653,
|
|
"learning_rate": 3.400606928640245e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14722979068756104,
|
|
"step": 1465,
|
|
"valid_targets_mean": 6255.8,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 2.304313725490196,
|
|
"grad_norm": 0.4287736838496608,
|
|
"learning_rate": 3.3950162227008884e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11872623860836029,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4925.6,
|
|
"valid_targets_min": 3386
|
|
},
|
|
{
|
|
"epoch": 2.312156862745098,
|
|
"grad_norm": 0.4087067393977361,
|
|
"learning_rate": 3.389404206810739e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12659403681755066,
|
|
"step": 1475,
|
|
"valid_targets_mean": 5264.6,
|
|
"valid_targets_min": 3238
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"grad_norm": 0.42539754197954616,
|
|
"learning_rate": 3.383770966697675e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11696159094572067,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5793.8,
|
|
"valid_targets_min": 3558
|
|
},
|
|
{
|
|
"epoch": 2.3278431372549018,
|
|
"grad_norm": 0.45919977036236226,
|
|
"learning_rate": 3.378116588413792e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2455115169286728,
|
|
"step": 1485,
|
|
"valid_targets_mean": 7003.0,
|
|
"valid_targets_min": 3702
|
|
},
|
|
{
|
|
"epoch": 2.335686274509804,
|
|
"grad_norm": 0.47012118225391586,
|
|
"learning_rate": 3.372441158334089e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12278923392295837,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4004.5,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 2.343529411764706,
|
|
"grad_norm": 0.45097229914655057,
|
|
"learning_rate": 3.3667447631551456e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11797159165143967,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4820.8,
|
|
"valid_targets_min": 3175
|
|
},
|
|
{
|
|
"epoch": 2.351372549019608,
|
|
"grad_norm": 0.44937463916849063,
|
|
"learning_rate": 3.361027489893799e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273888796567917,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4960.4,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 2.35921568627451,
|
|
"grad_norm": 0.4297668418247861,
|
|
"learning_rate": 3.3552894258858173e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1481958031654358,
|
|
"step": 1505,
|
|
"valid_targets_mean": 5842.0,
|
|
"valid_targets_min": 3544
|
|
},
|
|
{
|
|
"epoch": 2.367058823529412,
|
|
"grad_norm": 0.4960961983896037,
|
|
"learning_rate": 3.3495306587845616e-05,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13040918111801147,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3905.0,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.374901960784314,
|
|
"grad_norm": 0.4282759611457031,
|
|
"learning_rate": 3.343751276559651e-05,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11184579879045486,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4647.2,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 2.3827450980392157,
|
|
"grad_norm": 0.45579365401838495,
|
|
"learning_rate": 3.3379513674956134e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12589934468269348,
|
|
"step": 1520,
|
|
"valid_targets_mean": 5245.9,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 2.3905882352941177,
|
|
"grad_norm": 0.45709101945433256,
|
|
"learning_rate": 3.332131020190542e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11196628212928772,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4238.2,
|
|
"valid_targets_min": 3391
|
|
},
|
|
{
|
|
"epoch": 2.3984313725490196,
|
|
"grad_norm": 0.4591799991741506,
|
|
"learning_rate": 3.326290323554739e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1280558705329895,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3959.6,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 2.4062745098039215,
|
|
"grad_norm": 0.4615942164142822,
|
|
"learning_rate": 3.320429366809361e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11446023732423782,
|
|
"step": 1535,
|
|
"valid_targets_mean": 4679.1,
|
|
"valid_targets_min": 3561
|
|
},
|
|
{
|
|
"epoch": 2.4141176470588235,
|
|
"grad_norm": 0.42097481544627646,
|
|
"learning_rate": 3.314548239485048e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12864826619625092,
|
|
"step": 1540,
|
|
"valid_targets_mean": 5744.0,
|
|
"valid_targets_min": 4080
|
|
},
|
|
{
|
|
"epoch": 2.4219607843137254,
|
|
"grad_norm": 0.4698739773554904,
|
|
"learning_rate": 3.308647031420567e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10772387683391571,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3619.2,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 2.4298039215686273,
|
|
"grad_norm": 0.46570283043841393,
|
|
"learning_rate": 3.3027258327614305e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13037823140621185,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4860.4,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 2.4376470588235293,
|
|
"grad_norm": 0.42593711692675135,
|
|
"learning_rate": 3.296784733958524e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11170825362205505,
|
|
"step": 1555,
|
|
"valid_targets_mean": 5222.6,
|
|
"valid_targets_min": 3930
|
|
},
|
|
{
|
|
"epoch": 2.445490196078431,
|
|
"grad_norm": 0.4503559079527785,
|
|
"learning_rate": 3.2908238257667214e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11901139467954636,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4362.2,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 2.453333333333333,
|
|
"grad_norm": 0.4536297330372539,
|
|
"learning_rate": 3.2848431992435037e-05,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1577979326248169,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5607.9,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 2.461176470588235,
|
|
"grad_norm": 0.4578552551635253,
|
|
"learning_rate": 3.278842945747561e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13507062196731567,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4755.2,
|
|
"valid_targets_min": 4177
|
|
},
|
|
{
|
|
"epoch": 2.469019607843137,
|
|
"grad_norm": 0.405225958138305,
|
|
"learning_rate": 3.272823156937403e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12951521575450897,
|
|
"step": 1575,
|
|
"valid_targets_mean": 5787.6,
|
|
"valid_targets_min": 3621
|
|
},
|
|
{
|
|
"epoch": 2.4768627450980394,
|
|
"grad_norm": 0.47001834465815556,
|
|
"learning_rate": 3.266783924769954e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15250499546527863,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4942.0,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 2.4847058823529413,
|
|
"grad_norm": 0.4443067921781512,
|
|
"learning_rate": 3.2607253414991534e-05,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11628343164920807,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4674.5,
|
|
"valid_targets_min": 1464
|
|
},
|
|
{
|
|
"epoch": 2.4925490196078433,
|
|
"grad_norm": 0.434437602942818,
|
|
"learning_rate": 3.2546474996745424e-05,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13555780053138733,
|
|
"step": 1590,
|
|
"valid_targets_mean": 5046.5,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 2.500392156862745,
|
|
"grad_norm": 0.4208723935905886,
|
|
"learning_rate": 3.248550492139851e-05,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13717062771320343,
|
|
"step": 1595,
|
|
"valid_targets_mean": 5657.9,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 2.508235294117647,
|
|
"grad_norm": 0.4156063186715286,
|
|
"learning_rate": 3.242434412031581e-05,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12002235651016235,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4616.9,
|
|
"valid_targets_min": 3414
|
|
},
|
|
{
|
|
"epoch": 2.516078431372549,
|
|
"grad_norm": 0.4532735568961372,
|
|
"learning_rate": 3.236299352777583e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11414999514818192,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4527.4,
|
|
"valid_targets_min": 3291
|
|
},
|
|
{
|
|
"epoch": 2.523921568627451,
|
|
"grad_norm": 0.45229568900269806,
|
|
"learning_rate": 3.230145408095626e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16222326457500458,
|
|
"step": 1610,
|
|
"valid_targets_mean": 6180.2,
|
|
"valid_targets_min": 4221
|
|
},
|
|
{
|
|
"epoch": 2.531764705882353,
|
|
"grad_norm": 0.9265934257142856,
|
|
"learning_rate": 3.223972671991972e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11284160614013672,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4565.6,
|
|
"valid_targets_min": 2368
|
|
},
|
|
{
|
|
"epoch": 2.539607843137255,
|
|
"grad_norm": 0.4676815700857632,
|
|
"learning_rate": 3.217781238759935e-05,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13243766129016876,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4217.1,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 2.547450980392157,
|
|
"grad_norm": 0.46955739866786866,
|
|
"learning_rate": 3.211571202978442e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14136609435081482,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3918.2,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 2.5552941176470587,
|
|
"grad_norm": 0.4096516881386851,
|
|
"learning_rate": 3.2053426595105865e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313982754945755,
|
|
"step": 1630,
|
|
"valid_targets_mean": 5967.1,
|
|
"valid_targets_min": 3776
|
|
},
|
|
{
|
|
"epoch": 2.5631372549019606,
|
|
"grad_norm": 0.40846410459043553,
|
|
"learning_rate": 3.199095703502185e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12226898968219757,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5342.9,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 2.5709803921568626,
|
|
"grad_norm": 0.4673050459428165,
|
|
"learning_rate": 3.1928304303803174e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15106196701526642,
|
|
"step": 1640,
|
|
"valid_targets_mean": 5145.8,
|
|
"valid_targets_min": 3736
|
|
},
|
|
{
|
|
"epoch": 2.578823529411765,
|
|
"grad_norm": 0.4221808109207067,
|
|
"learning_rate": 3.1865469358518726e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12939462065696716,
|
|
"step": 1645,
|
|
"valid_targets_mean": 5564.0,
|
|
"valid_targets_min": 3317
|
|
},
|
|
{
|
|
"epoch": 2.586666666666667,
|
|
"grad_norm": 0.4770970150351554,
|
|
"learning_rate": 3.180245315902084e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13009631633758545,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4264.9,
|
|
"valid_targets_min": 3863
|
|
},
|
|
{
|
|
"epoch": 2.594509803921569,
|
|
"grad_norm": 0.4658297511693963,
|
|
"learning_rate": 3.173925666793065e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11663345992565155,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4386.8,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 2.6023529411764708,
|
|
"grad_norm": 0.42932723171267134,
|
|
"learning_rate": 3.1675880850623416e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1144811138510704,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4374.2,
|
|
"valid_targets_min": 3576
|
|
},
|
|
{
|
|
"epoch": 2.6101960784313727,
|
|
"grad_norm": 0.44524167780109175,
|
|
"learning_rate": 3.1612326675213717e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13575562834739685,
|
|
"step": 1665,
|
|
"valid_targets_mean": 5264.5,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 2.6180392156862746,
|
|
"grad_norm": 0.40667869411716223,
|
|
"learning_rate": 3.154859511254067e-05,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14803528785705566,
|
|
"step": 1670,
|
|
"valid_targets_mean": 5660.6,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 2.6258823529411766,
|
|
"grad_norm": 0.46647343522628637,
|
|
"learning_rate": 3.148468713615318e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17942845821380615,
|
|
"step": 1675,
|
|
"valid_targets_mean": 5058.1,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 2.6337254901960785,
|
|
"grad_norm": 0.42492816146492085,
|
|
"learning_rate": 3.1420603722294935e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0995701402425766,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4293.0,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 2.6415686274509804,
|
|
"grad_norm": 0.4643952663623767,
|
|
"learning_rate": 3.135634584988962e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15429197251796722,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5230.4,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 2.6494117647058824,
|
|
"grad_norm": 0.4307747516160081,
|
|
"learning_rate": 3.1291914500525886e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12061157077550888,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4799.1,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 2.6572549019607843,
|
|
"grad_norm": 0.45228484652654455,
|
|
"learning_rate": 3.1227310658442395e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1393534541130066,
|
|
"step": 1695,
|
|
"valid_targets_mean": 5052.2,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 2.665098039215686,
|
|
"grad_norm": 0.4168826045733845,
|
|
"learning_rate": 3.1162535310512745e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18650421500205994,
|
|
"step": 1700,
|
|
"valid_targets_mean": 6966.9,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 2.672941176470588,
|
|
"grad_norm": 0.40084673503347185,
|
|
"learning_rate": 3.109758944623042e-05,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14108054339885712,
|
|
"step": 1705,
|
|
"valid_targets_mean": 5895.4,
|
|
"valid_targets_min": 4538
|
|
},
|
|
{
|
|
"epoch": 2.68078431372549,
|
|
"grad_norm": 0.3955665597655761,
|
|
"learning_rate": 3.103247405769372e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14262932538986206,
|
|
"step": 1710,
|
|
"valid_targets_mean": 6712.8,
|
|
"valid_targets_min": 3756
|
|
},
|
|
{
|
|
"epoch": 2.688627450980392,
|
|
"grad_norm": 0.44285839167263197,
|
|
"learning_rate": 3.0967190139590484e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11949534714221954,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4721.9,
|
|
"valid_targets_min": 3757
|
|
},
|
|
{
|
|
"epoch": 2.696470588235294,
|
|
"grad_norm": 0.4382594154360303,
|
|
"learning_rate": 3.090173868918303e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13688015937805176,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5184.2,
|
|
"valid_targets_min": 3311
|
|
},
|
|
{
|
|
"epoch": 2.704313725490196,
|
|
"grad_norm": 0.44947431591051146,
|
|
"learning_rate": 3.083612070629283e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13894183933734894,
|
|
"step": 1725,
|
|
"valid_targets_mean": 5535.2,
|
|
"valid_targets_min": 3820
|
|
},
|
|
{
|
|
"epoch": 2.712156862745098,
|
|
"grad_norm": 0.4183405331712526,
|
|
"learning_rate": 3.077033719328529e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1369009017944336,
|
|
"step": 1730,
|
|
"valid_targets_mean": 6275.8,
|
|
"valid_targets_min": 4239
|
|
},
|
|
{
|
|
"epoch": 2.7199999999999998,
|
|
"grad_norm": 0.4573204986274018,
|
|
"learning_rate": 3.070438915505439e-05,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1038956418633461,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4946.8,
|
|
"valid_targets_min": 3587
|
|
},
|
|
{
|
|
"epoch": 2.7278431372549017,
|
|
"grad_norm": 0.40878178498619927,
|
|
"learning_rate": 3.063827759900739e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17439252138137817,
|
|
"step": 1740,
|
|
"valid_targets_mean": 6192.5,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 2.735686274509804,
|
|
"grad_norm": 0.44085339250166117,
|
|
"learning_rate": 3.057200353504938e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16376739740371704,
|
|
"step": 1745,
|
|
"valid_targets_mean": 5451.5,
|
|
"valid_targets_min": 3356
|
|
},
|
|
{
|
|
"epoch": 2.743529411764706,
|
|
"grad_norm": 0.461953369271599,
|
|
"learning_rate": 3.0505567975567915e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1565898358821869,
|
|
"step": 1750,
|
|
"valid_targets_mean": 5895.6,
|
|
"valid_targets_min": 3764
|
|
},
|
|
{
|
|
"epoch": 2.751372549019608,
|
|
"grad_norm": 0.4281409420363027,
|
|
"learning_rate": 3.04389719354175e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13279592990875244,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3907.8,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 2.75921568627451,
|
|
"grad_norm": 0.43238889821769466,
|
|
"learning_rate": 3.0372216431904103e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10412300378084183,
|
|
"step": 1760,
|
|
"valid_targets_mean": 4064.8,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 2.767058823529412,
|
|
"grad_norm": 0.44339678774004454,
|
|
"learning_rate": 3.030530248476963e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417621672153473,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4940.9,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 2.7749019607843137,
|
|
"grad_norm": 0.7531055888351225,
|
|
"learning_rate": 3.0238231116176338e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1466093808412552,
|
|
"step": 1770,
|
|
"valid_targets_mean": 5570.1,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 2.7827450980392157,
|
|
"grad_norm": 0.44515977292919706,
|
|
"learning_rate": 3.0171003350691194e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11763986945152283,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4642.2,
|
|
"valid_targets_min": 3477
|
|
},
|
|
{
|
|
"epoch": 2.7905882352941176,
|
|
"grad_norm": 0.4681887303349638,
|
|
"learning_rate": 3.0103620215270285e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14247599244117737,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4720.1,
|
|
"valid_targets_min": 4152
|
|
},
|
|
{
|
|
"epoch": 2.7984313725490195,
|
|
"grad_norm": 0.4368899815968614,
|
|
"learning_rate": 3.0036082739243064e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11585693061351776,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4842.4,
|
|
"valid_targets_min": 3042
|
|
},
|
|
{
|
|
"epoch": 2.8062745098039215,
|
|
"grad_norm": 0.4139493135985029,
|
|
"learning_rate": 2.996839195429667e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13053040206432343,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4798.0,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 2.8141176470588234,
|
|
"grad_norm": 0.4339471630530843,
|
|
"learning_rate": 2.9900548894460146e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11310513317584991,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4532.4,
|
|
"valid_targets_min": 3948
|
|
},
|
|
{
|
|
"epoch": 2.8219607843137258,
|
|
"grad_norm": 0.4146149344392902,
|
|
"learning_rate": 2.9832554596088653e-05,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11644154787063599,
|
|
"step": 1800,
|
|
"valid_targets_mean": 4799.2,
|
|
"valid_targets_min": 3593
|
|
},
|
|
{
|
|
"epoch": 2.8298039215686277,
|
|
"grad_norm": 0.399208547371692,
|
|
"learning_rate": 2.9764410097847657e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1448148936033249,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5765.4,
|
|
"valid_targets_min": 4453
|
|
},
|
|
{
|
|
"epoch": 2.8376470588235296,
|
|
"grad_norm": 0.42365842351647104,
|
|
"learning_rate": 2.9696116440697008e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12039990723133087,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4533.0,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 2.8454901960784316,
|
|
"grad_norm": 0.41829893935454315,
|
|
"learning_rate": 2.9627674667875104e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1044776663184166,
|
|
"step": 1815,
|
|
"valid_targets_mean": 4577.5,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 2.8533333333333335,
|
|
"grad_norm": 0.5098614371498988,
|
|
"learning_rate": 2.9559085824882916e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1291094571352005,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5700.8,
|
|
"valid_targets_min": 3426
|
|
},
|
|
{
|
|
"epoch": 2.8611764705882354,
|
|
"grad_norm": 0.3886315178037877,
|
|
"learning_rate": 2.9490350959468014e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10714983940124512,
|
|
"step": 1825,
|
|
"valid_targets_mean": 5307.0,
|
|
"valid_targets_min": 3690
|
|
},
|
|
{
|
|
"epoch": 2.8690196078431374,
|
|
"grad_norm": 0.4434985699129623,
|
|
"learning_rate": 2.9421471121608588e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.112812839448452,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4190.8,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 2.8768627450980393,
|
|
"grad_norm": 0.40331633099392356,
|
|
"learning_rate": 2.9352447363497378e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16460323333740234,
|
|
"step": 1835,
|
|
"valid_targets_mean": 6978.6,
|
|
"valid_targets_min": 3874
|
|
},
|
|
{
|
|
"epoch": 2.8847058823529412,
|
|
"grad_norm": 0.3931483141522423,
|
|
"learning_rate": 2.928328073952564e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1279119849205017,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5583.5,
|
|
"valid_targets_min": 3800
|
|
},
|
|
{
|
|
"epoch": 2.892549019607843,
|
|
"grad_norm": 0.44537855883298005,
|
|
"learning_rate": 2.921397230626699e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14499521255493164,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5594.1,
|
|
"valid_targets_min": 3706
|
|
},
|
|
{
|
|
"epoch": 2.900392156862745,
|
|
"grad_norm": 0.4291966772375491,
|
|
"learning_rate": 2.914452312246131e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1422233134508133,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5994.2,
|
|
"valid_targets_min": 4215
|
|
},
|
|
{
|
|
"epoch": 2.908235294117647,
|
|
"grad_norm": 0.5524116065305122,
|
|
"learning_rate": 2.9074934248998557e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13530707359313965,
|
|
"step": 1855,
|
|
"valid_targets_mean": 5521.1,
|
|
"valid_targets_min": 2581
|
|
},
|
|
{
|
|
"epoch": 2.916078431372549,
|
|
"grad_norm": 0.4208152215051117,
|
|
"learning_rate": 2.9005206748902538e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15862345695495605,
|
|
"step": 1860,
|
|
"valid_targets_mean": 6273.5,
|
|
"valid_targets_min": 3548
|
|
},
|
|
{
|
|
"epoch": 2.923921568627451,
|
|
"grad_norm": 0.42119458482360816,
|
|
"learning_rate": 2.8935341687314703e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609993726015091,
|
|
"step": 1865,
|
|
"valid_targets_mean": 6337.6,
|
|
"valid_targets_min": 4558
|
|
},
|
|
{
|
|
"epoch": 2.931764705882353,
|
|
"grad_norm": 0.429911645514168,
|
|
"learning_rate": 2.8865340131477846e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10718243569135666,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3961.9,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 2.9396078431372548,
|
|
"grad_norm": 0.3991529710863121,
|
|
"learning_rate": 2.8795203150719836e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12177126109600067,
|
|
"step": 1875,
|
|
"valid_targets_mean": 4913.1,
|
|
"valid_targets_min": 3659
|
|
},
|
|
{
|
|
"epoch": 2.9474509803921567,
|
|
"grad_norm": 0.3941443481025059,
|
|
"learning_rate": 2.8724931816437255e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10472463071346283,
|
|
"step": 1880,
|
|
"valid_targets_mean": 5183.1,
|
|
"valid_targets_min": 3549
|
|
},
|
|
{
|
|
"epoch": 2.9552941176470586,
|
|
"grad_norm": 0.4413921768424274,
|
|
"learning_rate": 2.8654527202079027e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14560449123382568,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4850.9,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 2.9631372549019606,
|
|
"grad_norm": 0.4475101418982669,
|
|
"learning_rate": 2.8583990383130043e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12408129125833511,
|
|
"step": 1890,
|
|
"valid_targets_mean": 4401.6,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 2.9709803921568625,
|
|
"grad_norm": 0.42161930668028813,
|
|
"learning_rate": 2.8513322437094727e-05,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12770900130271912,
|
|
"step": 1895,
|
|
"valid_targets_mean": 5884.8,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 2.978823529411765,
|
|
"grad_norm": 0.47301335801935585,
|
|
"learning_rate": 2.844252444348055e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12068523466587067,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4397.2,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 2.986666666666667,
|
|
"grad_norm": 0.4152297852271275,
|
|
"learning_rate": 2.8371597483781577e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09066110849380493,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4132.5,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 2.9945098039215687,
|
|
"grad_norm": 0.4058706264549092,
|
|
"learning_rate": 2.8300542641461937e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13444828987121582,
|
|
"step": 1910,
|
|
"valid_targets_mean": 6010.4,
|
|
"valid_targets_min": 4250
|
|
},
|
|
{
|
|
"epoch": 3.0015686274509803,
|
|
"grad_norm": 0.447309349141732,
|
|
"learning_rate": 2.822936100193924e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11755850911140442,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4557.0,
|
|
"valid_targets_min": 3016
|
|
},
|
|
{
|
|
"epoch": 3.0094117647058822,
|
|
"grad_norm": 0.4062062188090626,
|
|
"learning_rate": 2.8158053652568046e-05,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13184432685375214,
|
|
"step": 1920,
|
|
"valid_targets_mean": 6199.8,
|
|
"valid_targets_min": 4235
|
|
},
|
|
{
|
|
"epoch": 3.017254901960784,
|
|
"grad_norm": 0.44070499727258766,
|
|
"learning_rate": 2.808662168262321e-05,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11080098152160645,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4623.4,
|
|
"valid_targets_min": 3713
|
|
},
|
|
{
|
|
"epoch": 3.025098039215686,
|
|
"grad_norm": 0.4763670171149257,
|
|
"learning_rate": 2.8015066183283272e-05,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1113940179347992,
|
|
"step": 1930,
|
|
"valid_targets_mean": 4373.8,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 3.032941176470588,
|
|
"grad_norm": 0.4355243760620159,
|
|
"learning_rate": 2.7943388247613787e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14149567484855652,
|
|
"step": 1935,
|
|
"valid_targets_mean": 5609.1,
|
|
"valid_targets_min": 4401
|
|
},
|
|
{
|
|
"epoch": 3.0407843137254904,
|
|
"grad_norm": 0.42356997296686133,
|
|
"learning_rate": 2.787158897055061e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1346980333328247,
|
|
"step": 1940,
|
|
"valid_targets_mean": 5785.2,
|
|
"valid_targets_min": 4146
|
|
},
|
|
{
|
|
"epoch": 3.0486274509803923,
|
|
"grad_norm": 0.465176695347723,
|
|
"learning_rate": 2.7799669448883165e-05,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12955603003501892,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3767.0,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 3.0564705882352943,
|
|
"grad_norm": 0.46121709494870516,
|
|
"learning_rate": 2.7727630781237743e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1162000298500061,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4428.5,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 3.064313725490196,
|
|
"grad_norm": 0.429787541213453,
|
|
"learning_rate": 2.7655474068060644e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11739633232355118,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4955.6,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 3.072156862745098,
|
|
"grad_norm": 0.46328820902774925,
|
|
"learning_rate": 2.7583200411601424e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11228733509778976,
|
|
"step": 1960,
|
|
"valid_targets_mean": 5139.6,
|
|
"valid_targets_min": 2463
|
|
},
|
|
{
|
|
"epoch": 3.08,
|
|
"grad_norm": 0.5328657014424273,
|
|
"learning_rate": 2.7510810915896043e-05,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13428783416748047,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4627.1,
|
|
"valid_targets_min": 3925
|
|
},
|
|
{
|
|
"epoch": 3.087843137254902,
|
|
"grad_norm": 0.45050168223411524,
|
|
"learning_rate": 2.7438306686749978e-05,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10249172151088715,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4340.6,
|
|
"valid_targets_min": 3457
|
|
},
|
|
{
|
|
"epoch": 3.095686274509804,
|
|
"grad_norm": 0.4319070860395141,
|
|
"learning_rate": 2.7365688831721358e-05,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13910704851150513,
|
|
"step": 1975,
|
|
"valid_targets_mean": 5662.8,
|
|
"valid_targets_min": 4784
|
|
},
|
|
{
|
|
"epoch": 3.103529411764706,
|
|
"grad_norm": 0.42886398818721116,
|
|
"learning_rate": 2.7292958460104027e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12069983780384064,
|
|
"step": 1980,
|
|
"valid_targets_mean": 5236.9,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 3.111372549019608,
|
|
"grad_norm": 0.4611108403825653,
|
|
"learning_rate": 2.7220116682910628e-05,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13484933972358704,
|
|
"step": 1985,
|
|
"valid_targets_mean": 5587.2,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 3.1192156862745097,
|
|
"grad_norm": 0.4451396832224761,
|
|
"learning_rate": 2.714716461285559e-05,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11066186428070068,
|
|
"step": 1990,
|
|
"valid_targets_mean": 5059.9,
|
|
"valid_targets_min": 4067
|
|
},
|
|
{
|
|
"epoch": 3.1270588235294117,
|
|
"grad_norm": 0.38821312176893963,
|
|
"learning_rate": 2.7074103364338155e-05,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14021679759025574,
|
|
"step": 1995,
|
|
"valid_targets_mean": 7066.0,
|
|
"valid_targets_min": 4159
|
|
},
|
|
{
|
|
"epoch": 3.1349019607843136,
|
|
"grad_norm": 0.4183193977456837,
|
|
"learning_rate": 2.7000934053425347e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12177550047636032,
|
|
"step": 2000,
|
|
"valid_targets_mean": 5553.2,
|
|
"valid_targets_min": 3258
|
|
},
|
|
{
|
|
"epoch": 3.1427450980392155,
|
|
"grad_norm": 0.43765474174948893,
|
|
"learning_rate": 2.692765779783494e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11270841956138611,
|
|
"step": 2005,
|
|
"valid_targets_mean": 4834.6,
|
|
"valid_targets_min": 3572
|
|
},
|
|
{
|
|
"epoch": 3.1505882352941175,
|
|
"grad_norm": 0.426633457560378,
|
|
"learning_rate": 2.6854275716918352e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14287523925304413,
|
|
"step": 2010,
|
|
"valid_targets_mean": 5455.9,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 3.1584313725490194,
|
|
"grad_norm": 0.4078425097510185,
|
|
"learning_rate": 2.678078893164359e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11538930237293243,
|
|
"step": 2015,
|
|
"valid_targets_mean": 5689.1,
|
|
"valid_targets_min": 3770
|
|
},
|
|
{
|
|
"epoch": 3.1662745098039213,
|
|
"grad_norm": 0.44439178115633443,
|
|
"learning_rate": 2.6707198564578066e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11317244172096252,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4206.0,
|
|
"valid_targets_min": 2551
|
|
},
|
|
{
|
|
"epoch": 3.1741176470588237,
|
|
"grad_norm": 0.456212200285294,
|
|
"learning_rate": 2.663350573987152e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11523454636335373,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4083.5,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 3.1819607843137256,
|
|
"grad_norm": 0.4599279892597807,
|
|
"learning_rate": 2.655971158323879e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11775343865156174,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4272.1,
|
|
"valid_targets_min": 3329
|
|
},
|
|
{
|
|
"epoch": 3.1898039215686276,
|
|
"grad_norm": 0.40566259798749416,
|
|
"learning_rate": 2.648581722194264e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12668456137180328,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5694.6,
|
|
"valid_targets_min": 4027
|
|
},
|
|
{
|
|
"epoch": 3.1976470588235295,
|
|
"grad_norm": 0.4725635952751264,
|
|
"learning_rate": 2.6411823784776537e-05,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10355997085571289,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4016.1,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 3.2054901960784314,
|
|
"grad_norm": 0.44696073096110805,
|
|
"learning_rate": 2.6337732402047422e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08200616389513016,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4368.2,
|
|
"valid_targets_min": 3319
|
|
},
|
|
{
|
|
"epoch": 3.2133333333333334,
|
|
"grad_norm": 0.4046751507104614,
|
|
"learning_rate": 2.626354420555841e-05,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10652727633714676,
|
|
"step": 2050,
|
|
"valid_targets_mean": 5266.0,
|
|
"valid_targets_min": 4091
|
|
},
|
|
{
|
|
"epoch": 3.2211764705882353,
|
|
"grad_norm": 0.43847046907302706,
|
|
"learning_rate": 2.618926032859154e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10782967507839203,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4211.2,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 3.2290196078431372,
|
|
"grad_norm": 0.4329089155848683,
|
|
"learning_rate": 2.611488190589043e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12138757854700089,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4696.2,
|
|
"valid_targets_min": 4053
|
|
},
|
|
{
|
|
"epoch": 3.236862745098039,
|
|
"grad_norm": 0.42953762620876,
|
|
"learning_rate": 2.6040410073642965e-05,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10514818876981735,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4275.8,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 3.244705882352941,
|
|
"grad_norm": 0.4701217543316149,
|
|
"learning_rate": 2.596584596946392e-05,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11655449122190475,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4477.5,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 3.252549019607843,
|
|
"grad_norm": 0.4448673683099468,
|
|
"learning_rate": 2.589119073237762e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1493644416332245,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5425.0,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 3.260392156862745,
|
|
"grad_norm": 0.4529303538983212,
|
|
"learning_rate": 2.5816445502800494e-05,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310499757528305,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4829.0,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 3.268235294117647,
|
|
"grad_norm": 0.42516319618073034,
|
|
"learning_rate": 2.5741611422523684e-05,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10771302878856659,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4812.5,
|
|
"valid_targets_min": 3609
|
|
},
|
|
{
|
|
"epoch": 3.276078431372549,
|
|
"grad_norm": 0.4202683026835801,
|
|
"learning_rate": 2.566668963469559e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13373664021492004,
|
|
"step": 2090,
|
|
"valid_targets_mean": 5934.4,
|
|
"valid_targets_min": 3554
|
|
},
|
|
{
|
|
"epoch": 3.283921568627451,
|
|
"grad_norm": 0.46645395477484847,
|
|
"learning_rate": 2.5591681283804426e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1471448540687561,
|
|
"step": 2095,
|
|
"valid_targets_mean": 6183.4,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 3.291764705882353,
|
|
"grad_norm": 0.414522147212614,
|
|
"learning_rate": 2.5516587515660706e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12761680781841278,
|
|
"step": 2100,
|
|
"valid_targets_mean": 5796.4,
|
|
"valid_targets_min": 3701
|
|
},
|
|
{
|
|
"epoch": 3.299607843137255,
|
|
"grad_norm": 0.42790819400186036,
|
|
"learning_rate": 2.5441409477379764e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09236972033977509,
|
|
"step": 2105,
|
|
"valid_targets_mean": 4362.0,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 3.307450980392157,
|
|
"grad_norm": 0.46013772293585553,
|
|
"learning_rate": 2.5366148317364237e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10060383379459381,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3963.4,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 3.315294117647059,
|
|
"grad_norm": 0.41304440348713223,
|
|
"learning_rate": 2.5290805185286494e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10279671102762222,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4324.1,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 3.323137254901961,
|
|
"grad_norm": 0.39225623307144264,
|
|
"learning_rate": 2.521538123207111e-05,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1322818249464035,
|
|
"step": 2120,
|
|
"valid_targets_mean": 5806.5,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 3.330980392156863,
|
|
"grad_norm": 0.4904865566407993,
|
|
"learning_rate": 2.5139877609877244e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13356977701187134,
|
|
"step": 2125,
|
|
"valid_targets_mean": 5983.1,
|
|
"valid_targets_min": 3689
|
|
},
|
|
{
|
|
"epoch": 3.3388235294117647,
|
|
"grad_norm": 0.4198231556106936,
|
|
"learning_rate": 2.506429547208107e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10872642695903778,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4768.0,
|
|
"valid_targets_min": 4005
|
|
},
|
|
{
|
|
"epoch": 3.3466666666666667,
|
|
"grad_norm": 0.45380194517033634,
|
|
"learning_rate": 2.498863597325815e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11521173268556595,
|
|
"step": 2135,
|
|
"valid_targets_mean": 5030.1,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 3.3545098039215686,
|
|
"grad_norm": 0.43347927756835075,
|
|
"learning_rate": 2.4912900269165797e-05,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13011592626571655,
|
|
"step": 2140,
|
|
"valid_targets_mean": 6122.5,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 3.3623529411764705,
|
|
"grad_norm": 0.4271739715689149,
|
|
"learning_rate": 2.483708951672541e-05,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262693703174591,
|
|
"step": 2145,
|
|
"valid_targets_mean": 5268.2,
|
|
"valid_targets_min": 3765
|
|
},
|
|
{
|
|
"epoch": 3.3701960784313725,
|
|
"grad_norm": 0.4025912834365145,
|
|
"learning_rate": 2.4761204874004818e-05,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12437351047992706,
|
|
"step": 2150,
|
|
"valid_targets_mean": 5548.9,
|
|
"valid_targets_min": 3340
|
|
},
|
|
{
|
|
"epoch": 3.3780392156862744,
|
|
"grad_norm": 0.45364220463006383,
|
|
"learning_rate": 2.4685247500200583e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10871313512325287,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4770.6,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 3.3858823529411763,
|
|
"grad_norm": 0.44490191755890435,
|
|
"learning_rate": 2.4609218555620275e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12509560585021973,
|
|
"step": 2160,
|
|
"valid_targets_mean": 5227.4,
|
|
"valid_targets_min": 4045
|
|
},
|
|
{
|
|
"epoch": 3.3937254901960783,
|
|
"grad_norm": 0.40037646667924837,
|
|
"learning_rate": 2.4533119201664785e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10177329182624817,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4978.2,
|
|
"valid_targets_min": 3524
|
|
},
|
|
{
|
|
"epoch": 3.40156862745098,
|
|
"grad_norm": 0.4155901937269117,
|
|
"learning_rate": 2.4456950600810542e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15294963121414185,
|
|
"step": 2170,
|
|
"valid_targets_mean": 6141.0,
|
|
"valid_targets_min": 3708
|
|
},
|
|
{
|
|
"epoch": 3.409411764705882,
|
|
"grad_norm": 0.4641026797448153,
|
|
"learning_rate": 2.4380713916591785e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12357586622238159,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4147.4,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.417254901960784,
|
|
"grad_norm": 0.4171810410522086,
|
|
"learning_rate": 2.4304410313582776e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12325207889080048,
|
|
"step": 2180,
|
|
"valid_targets_mean": 5426.9,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 3.4250980392156865,
|
|
"grad_norm": 0.46416076819498153,
|
|
"learning_rate": 2.422804095738002e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08827514946460724,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3166.9,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 3.4329411764705884,
|
|
"grad_norm": 0.4532483257717387,
|
|
"learning_rate": 2.4151607014584437e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15844324231147766,
|
|
"step": 2190,
|
|
"valid_targets_mean": 7008.6,
|
|
"valid_targets_min": 4008
|
|
},
|
|
{
|
|
"epoch": 3.4407843137254903,
|
|
"grad_norm": 0.44426263058425547,
|
|
"learning_rate": 2.4075109652783573e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11195256561040878,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4449.6,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 3.4486274509803923,
|
|
"grad_norm": 0.4782397885162491,
|
|
"learning_rate": 2.3998550040533743e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13909482955932617,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4378.5,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 3.456470588235294,
|
|
"grad_norm": 0.4702721164139716,
|
|
"learning_rate": 2.392192934734219e-05,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13552600145339966,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4721.4,
|
|
"valid_targets_min": 3900
|
|
},
|
|
{
|
|
"epoch": 3.464313725490196,
|
|
"grad_norm": 0.43475168828785626,
|
|
"learning_rate": 2.3845248743649196e-05,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1300785392522812,
|
|
"step": 2210,
|
|
"valid_targets_mean": 5198.1,
|
|
"valid_targets_min": 3612
|
|
},
|
|
{
|
|
"epoch": 3.472156862745098,
|
|
"grad_norm": 0.43867207693018295,
|
|
"learning_rate": 2.376850940081025e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14244931936264038,
|
|
"step": 2215,
|
|
"valid_targets_mean": 5360.0,
|
|
"valid_targets_min": 3791
|
|
},
|
|
{
|
|
"epoch": 3.48,
|
|
"grad_norm": 0.44063741886627417,
|
|
"learning_rate": 2.3691712491078107e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09964333474636078,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4041.4,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 3.487843137254902,
|
|
"grad_norm": 0.42522725919593213,
|
|
"learning_rate": 2.3614859187584914e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13847005367279053,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4862.5,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 3.495686274509804,
|
|
"grad_norm": 0.6390226843494196,
|
|
"learning_rate": 2.353795066432427e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1294592320919037,
|
|
"step": 2230,
|
|
"valid_targets_mean": 6039.2,
|
|
"valid_targets_min": 4011
|
|
},
|
|
{
|
|
"epoch": 3.503529411764706,
|
|
"grad_norm": 0.4288414756464652,
|
|
"learning_rate": 2.3460988096133284e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12420792132616043,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5175.9,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 3.5113725490196077,
|
|
"grad_norm": 0.43668336523298357,
|
|
"learning_rate": 2.338397265867468e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11191526055335999,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4937.1,
|
|
"valid_targets_min": 4136
|
|
},
|
|
{
|
|
"epoch": 3.5192156862745096,
|
|
"grad_norm": 0.4668546590697348,
|
|
"learning_rate": 2.3306905528418762e-05,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14170069992542267,
|
|
"step": 2245,
|
|
"valid_targets_mean": 5523.1,
|
|
"valid_targets_min": 3314
|
|
},
|
|
{
|
|
"epoch": 3.527058823529412,
|
|
"grad_norm": 0.4235869993667335,
|
|
"learning_rate": 2.3229787882625496e-05,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12099191546440125,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4769.6,
|
|
"valid_targets_min": 3255
|
|
},
|
|
{
|
|
"epoch": 3.534901960784314,
|
|
"grad_norm": 0.43656476223885277,
|
|
"learning_rate": 2.315262089932653e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12198434770107269,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5176.5,
|
|
"valid_targets_min": 4087
|
|
},
|
|
{
|
|
"epoch": 3.542745098039216,
|
|
"grad_norm": 0.4152031042873841,
|
|
"learning_rate": 2.3075405757307147e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12806783616542816,
|
|
"step": 2260,
|
|
"valid_targets_mean": 6016.6,
|
|
"valid_targets_min": 3494
|
|
},
|
|
{
|
|
"epoch": 3.550588235294118,
|
|
"grad_norm": 0.43654785463193213,
|
|
"learning_rate": 2.2998143636088323e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11978089809417725,
|
|
"step": 2265,
|
|
"valid_targets_mean": 5109.9,
|
|
"valid_targets_min": 3790
|
|
},
|
|
{
|
|
"epoch": 3.5584313725490198,
|
|
"grad_norm": 0.45198424570474105,
|
|
"learning_rate": 2.2920835715908654e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13197702169418335,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4443.2,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 3.5662745098039217,
|
|
"grad_norm": 0.4250985223444925,
|
|
"learning_rate": 2.2843483177706363e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.149743914604187,
|
|
"step": 2275,
|
|
"valid_targets_mean": 6065.6,
|
|
"valid_targets_min": 3785
|
|
},
|
|
{
|
|
"epoch": 3.5741176470588236,
|
|
"grad_norm": 0.4173987075467208,
|
|
"learning_rate": 2.2766087203101245e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1250050663948059,
|
|
"step": 2280,
|
|
"valid_targets_mean": 5207.5,
|
|
"valid_targets_min": 3569
|
|
},
|
|
{
|
|
"epoch": 3.5819607843137256,
|
|
"grad_norm": 0.45314085836943274,
|
|
"learning_rate": 2.2688648974376622e-05,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10489747673273087,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3719.0,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 3.5898039215686275,
|
|
"grad_norm": 0.44051637331745386,
|
|
"learning_rate": 2.261116967446127e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1355193555355072,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4599.1,
|
|
"valid_targets_min": 4026
|
|
},
|
|
{
|
|
"epoch": 3.5976470588235294,
|
|
"grad_norm": 0.4728518002937475,
|
|
"learning_rate": 2.2533650486911375e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13677480816841125,
|
|
"step": 2295,
|
|
"valid_targets_mean": 4525.0,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.6054901960784314,
|
|
"grad_norm": 0.4008279926262856,
|
|
"learning_rate": 2.245609259589243e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10138443112373352,
|
|
"step": 2300,
|
|
"valid_targets_mean": 5378.1,
|
|
"valid_targets_min": 3568
|
|
},
|
|
{
|
|
"epoch": 3.6133333333333333,
|
|
"grad_norm": 0.4265027129685831,
|
|
"learning_rate": 2.2378497186161146e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1514357626438141,
|
|
"step": 2305,
|
|
"valid_targets_mean": 6498.0,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 3.621176470588235,
|
|
"grad_norm": 0.4746697315873271,
|
|
"learning_rate": 2.230086544304737e-05,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1213865876197815,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4269.1,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 3.629019607843137,
|
|
"grad_norm": 0.464244944957954,
|
|
"learning_rate": 2.222319855243597e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11628014594316483,
|
|
"step": 2315,
|
|
"valid_targets_mean": 4143.0,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 3.636862745098039,
|
|
"grad_norm": 0.4020090060829787,
|
|
"learning_rate": 2.2145497700748723e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11320920288562775,
|
|
"step": 2320,
|
|
"valid_targets_mean": 5850.8,
|
|
"valid_targets_min": 3387
|
|
},
|
|
{
|
|
"epoch": 3.644705882352941,
|
|
"grad_norm": 0.4007527110243504,
|
|
"learning_rate": 2.2067764074926163e-05,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13083413243293762,
|
|
"step": 2325,
|
|
"valid_targets_mean": 5778.8,
|
|
"valid_targets_min": 3775
|
|
},
|
|
{
|
|
"epoch": 3.652549019607843,
|
|
"grad_norm": 0.4347470779889978,
|
|
"learning_rate": 2.198999886240951e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10197200626134872,
|
|
"step": 2330,
|
|
"valid_targets_mean": 4524.2,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 3.660392156862745,
|
|
"grad_norm": 0.4345175758531176,
|
|
"learning_rate": 2.1912203251122475e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11358082294464111,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5199.6,
|
|
"valid_targets_min": 3988
|
|
},
|
|
{
|
|
"epoch": 3.668235294117647,
|
|
"grad_norm": 0.4007587051470908,
|
|
"learning_rate": 2.1834378429453133e-05,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13197997212409973,
|
|
"step": 2340,
|
|
"valid_targets_mean": 5778.9,
|
|
"valid_targets_min": 3808
|
|
},
|
|
{
|
|
"epoch": 3.6760784313725487,
|
|
"grad_norm": 0.4494627231362916,
|
|
"learning_rate": 2.175652558623577e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12308965623378754,
|
|
"step": 2345,
|
|
"valid_targets_mean": 5130.2,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 3.683921568627451,
|
|
"grad_norm": 0.4152150942753047,
|
|
"learning_rate": 2.1678645910732734e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1369512677192688,
|
|
"step": 2350,
|
|
"valid_targets_mean": 6060.2,
|
|
"valid_targets_min": 4055
|
|
},
|
|
{
|
|
"epoch": 3.691764705882353,
|
|
"grad_norm": 0.4712923072777983,
|
|
"learning_rate": 2.1600740592616245e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09407570213079453,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4600.4,
|
|
"valid_targets_min": 3945
|
|
},
|
|
{
|
|
"epoch": 3.699607843137255,
|
|
"grad_norm": 0.4105138321258731,
|
|
"learning_rate": 2.152281082195024e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12141579389572144,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4882.1,
|
|
"valid_targets_min": 3774
|
|
},
|
|
{
|
|
"epoch": 3.707450980392157,
|
|
"grad_norm": 0.45925298953338334,
|
|
"learning_rate": 2.1444857789172185e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11542713642120361,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4519.5,
|
|
"valid_targets_min": 3414
|
|
},
|
|
{
|
|
"epoch": 3.715294117647059,
|
|
"grad_norm": 0.45517657387663035,
|
|
"learning_rate": 2.1366882685074892e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12940552830696106,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4775.5,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 3.723137254901961,
|
|
"grad_norm": 0.42564920456574895,
|
|
"learning_rate": 2.1288886700788335e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13440310955047607,
|
|
"step": 2375,
|
|
"valid_targets_mean": 5391.8,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 3.7309803921568627,
|
|
"grad_norm": 0.6178229501585953,
|
|
"learning_rate": 2.1210871027761438e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13043555617332458,
|
|
"step": 2380,
|
|
"valid_targets_mean": 5792.1,
|
|
"valid_targets_min": 3642
|
|
},
|
|
{
|
|
"epoch": 3.7388235294117647,
|
|
"grad_norm": 0.4102212916044459,
|
|
"learning_rate": 2.1132836857743903e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13514447212219238,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5541.0,
|
|
"valid_targets_min": 4262
|
|
},
|
|
{
|
|
"epoch": 3.7466666666666666,
|
|
"grad_norm": 0.4345973715149457,
|
|
"learning_rate": 2.105478538276797e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11293156445026398,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4884.6,
|
|
"valid_targets_min": 2117
|
|
},
|
|
{
|
|
"epoch": 3.7545098039215685,
|
|
"grad_norm": 0.4055907879478308,
|
|
"learning_rate": 2.0976717795130233e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12585970759391785,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5308.1,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 3.7623529411764705,
|
|
"grad_norm": 0.5125677754224955,
|
|
"learning_rate": 2.0898635287373423e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17816469073295593,
|
|
"step": 2400,
|
|
"valid_targets_mean": 5720.0,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 3.7701960784313724,
|
|
"grad_norm": 0.4433647739611089,
|
|
"learning_rate": 2.0820539052268186e-05,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09588039666414261,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3557.0,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 3.7780392156862748,
|
|
"grad_norm": 0.40809336018652104,
|
|
"learning_rate": 2.0742430282794857e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09966695308685303,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4593.8,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 3.7858823529411767,
|
|
"grad_norm": 0.41957665576470976,
|
|
"learning_rate": 2.0664310172125242e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10396531969308853,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4152.6,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 3.7937254901960786,
|
|
"grad_norm": 0.44894426328179443,
|
|
"learning_rate": 2.0586179913604413e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11163697391748428,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4620.6,
|
|
"valid_targets_min": 3845
|
|
},
|
|
{
|
|
"epoch": 3.8015686274509806,
|
|
"grad_norm": 0.3977540873194003,
|
|
"learning_rate": 2.0508040700732438e-05,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19368122518062592,
|
|
"step": 2425,
|
|
"valid_targets_mean": 7945.9,
|
|
"valid_targets_min": 3828
|
|
},
|
|
{
|
|
"epoch": 3.8094117647058825,
|
|
"grad_norm": 0.45889914530307035,
|
|
"learning_rate": 2.0429893727146167e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11084446310997009,
|
|
"step": 2430,
|
|
"valid_targets_mean": 4031.5,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 3.8172549019607844,
|
|
"grad_norm": 0.48554569449891527,
|
|
"learning_rate": 2.0351740186601012e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371164172887802,
|
|
"step": 2435,
|
|
"valid_targets_mean": 5224.0,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 3.8250980392156864,
|
|
"grad_norm": 0.44497973302423827,
|
|
"learning_rate": 2.0273581272952708e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09165876358747482,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3842.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 3.8329411764705883,
|
|
"grad_norm": 0.44165704466795486,
|
|
"learning_rate": 2.0195418180139055e-05,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11761967837810516,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4286.0,
|
|
"valid_targets_min": 3469
|
|
},
|
|
{
|
|
"epoch": 3.8407843137254902,
|
|
"grad_norm": 0.47116094179324775,
|
|
"learning_rate": 2.0117252102161687e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12916581332683563,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3781.9,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 3.848627450980392,
|
|
"grad_norm": 0.4432899814548258,
|
|
"learning_rate": 2.0039084233067853e-05,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09984422475099564,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4347.4,
|
|
"valid_targets_min": 3304
|
|
},
|
|
{
|
|
"epoch": 3.856470588235294,
|
|
"grad_norm": 0.43238947024639623,
|
|
"learning_rate": 1.9960915766932153e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14688940346240997,
|
|
"step": 2460,
|
|
"valid_targets_mean": 6676.2,
|
|
"valid_targets_min": 4367
|
|
},
|
|
{
|
|
"epoch": 3.864313725490196,
|
|
"grad_norm": 0.4441903847270208,
|
|
"learning_rate": 1.988274789783832e-05,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12853792309761047,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4793.4,
|
|
"valid_targets_min": 4049
|
|
},
|
|
{
|
|
"epoch": 3.872156862745098,
|
|
"grad_norm": 0.4069753596366182,
|
|
"learning_rate": 1.9804581819860952e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13356426358222961,
|
|
"step": 2470,
|
|
"valid_targets_mean": 5416.6,
|
|
"valid_targets_min": 4194
|
|
},
|
|
{
|
|
"epoch": 3.88,
|
|
"grad_norm": 0.4361007055406648,
|
|
"learning_rate": 1.9726418727047295e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12676510214805603,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5579.5,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 3.887843137254902,
|
|
"grad_norm": 0.4198017556701137,
|
|
"learning_rate": 1.9648259813398987e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1500803530216217,
|
|
"step": 2480,
|
|
"valid_targets_mean": 6409.1,
|
|
"valid_targets_min": 3902
|
|
},
|
|
{
|
|
"epoch": 3.8956862745098038,
|
|
"grad_norm": 0.42378635541342735,
|
|
"learning_rate": 1.957010627285384e-05,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13810518383979797,
|
|
"step": 2485,
|
|
"valid_targets_mean": 6154.2,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 3.9035294117647057,
|
|
"grad_norm": 0.4330011404936207,
|
|
"learning_rate": 1.9491959299267572e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12688998878002167,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4943.1,
|
|
"valid_targets_min": 3557
|
|
},
|
|
{
|
|
"epoch": 3.9113725490196076,
|
|
"grad_norm": 0.43668303326520075,
|
|
"learning_rate": 1.941382008639559e-05,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1096714660525322,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4343.4,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 3.9192156862745096,
|
|
"grad_norm": 0.45479288831404163,
|
|
"learning_rate": 1.933568982787476e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.143254354596138,
|
|
"step": 2500,
|
|
"valid_targets_mean": 5751.5,
|
|
"valid_targets_min": 4181
|
|
},
|
|
{
|
|
"epoch": 3.9270588235294115,
|
|
"grad_norm": 0.42946212220942426,
|
|
"learning_rate": 1.9257569717205153e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10440149158239365,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4229.2,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 3.934901960784314,
|
|
"grad_norm": 0.43173231505010357,
|
|
"learning_rate": 1.9179460947731824e-05,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11701091378927231,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4764.4,
|
|
"valid_targets_min": 3967
|
|
},
|
|
{
|
|
"epoch": 3.942745098039216,
|
|
"grad_norm": 0.4179666973800945,
|
|
"learning_rate": 1.9101364712626577e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1057259738445282,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4593.4,
|
|
"valid_targets_min": 3174
|
|
},
|
|
{
|
|
"epoch": 3.9505882352941177,
|
|
"grad_norm": 0.41267797631647846,
|
|
"learning_rate": 1.9023282204869767e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1084958016872406,
|
|
"step": 2520,
|
|
"valid_targets_mean": 5013.1,
|
|
"valid_targets_min": 3702
|
|
},
|
|
{
|
|
"epoch": 3.9584313725490197,
|
|
"grad_norm": 0.4182805319321907,
|
|
"learning_rate": 1.8945214617232036e-05,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1471642702817917,
|
|
"step": 2525,
|
|
"valid_targets_mean": 5577.8,
|
|
"valid_targets_min": 4326
|
|
},
|
|
{
|
|
"epoch": 3.9662745098039216,
|
|
"grad_norm": 0.7234435491670909,
|
|
"learning_rate": 1.88671631422561e-05,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09615735709667206,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4327.0,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 3.9741176470588235,
|
|
"grad_norm": 0.42254129246244126,
|
|
"learning_rate": 1.8789128972238565e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0965128019452095,
|
|
"step": 2535,
|
|
"valid_targets_mean": 4908.0,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 3.9819607843137255,
|
|
"grad_norm": 0.4108845812599496,
|
|
"learning_rate": 1.8711113299211675e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1227368712425232,
|
|
"step": 2540,
|
|
"valid_targets_mean": 5048.0,
|
|
"valid_targets_min": 3740
|
|
},
|
|
{
|
|
"epoch": 3.9898039215686274,
|
|
"grad_norm": 0.34816284688558613,
|
|
"learning_rate": 1.8633117314925118e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11549834907054901,
|
|
"step": 2545,
|
|
"valid_targets_mean": 7279.1,
|
|
"valid_targets_min": 4113
|
|
},
|
|
{
|
|
"epoch": 3.9976470588235293,
|
|
"grad_norm": 0.4017959546348958,
|
|
"learning_rate": 1.855514221082782e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12120068818330765,
|
|
"step": 2550,
|
|
"valid_targets_mean": 5609.5,
|
|
"valid_targets_min": 3329
|
|
},
|
|
{
|
|
"epoch": 4.004705882352941,
|
|
"grad_norm": 0.39569157004085714,
|
|
"learning_rate": 1.8477189178049764e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1075434684753418,
|
|
"step": 2555,
|
|
"valid_targets_mean": 5506.8,
|
|
"valid_targets_min": 3669
|
|
},
|
|
{
|
|
"epoch": 4.012549019607843,
|
|
"grad_norm": 0.41935713908603817,
|
|
"learning_rate": 1.839925940738376e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1180042028427124,
|
|
"step": 2560,
|
|
"valid_targets_mean": 5294.9,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 4.020392156862745,
|
|
"grad_norm": 0.4394484999752763,
|
|
"learning_rate": 1.8321354089267272e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10508240014314651,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4761.2,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 4.028235294117647,
|
|
"grad_norm": 0.44221495285080337,
|
|
"learning_rate": 1.8243474413764236e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18124812841415405,
|
|
"step": 2570,
|
|
"valid_targets_mean": 7468.4,
|
|
"valid_targets_min": 3878
|
|
},
|
|
{
|
|
"epoch": 4.036078431372549,
|
|
"grad_norm": 0.4132976841846695,
|
|
"learning_rate": 1.8165621570546874e-05,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11682303994894028,
|
|
"step": 2575,
|
|
"valid_targets_mean": 5616.6,
|
|
"valid_targets_min": 3473
|
|
},
|
|
{
|
|
"epoch": 4.043921568627451,
|
|
"grad_norm": 0.4568921049954836,
|
|
"learning_rate": 1.808779674887753e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11188096553087234,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4916.9,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 4.051764705882353,
|
|
"grad_norm": 0.4622869017756218,
|
|
"learning_rate": 1.801000113759049e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10092595219612122,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4231.2,
|
|
"valid_targets_min": 2038
|
|
},
|
|
{
|
|
"epoch": 4.059607843137255,
|
|
"grad_norm": 0.4176738466656865,
|
|
"learning_rate": 1.7932235925073836e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08800789713859558,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4712.8,
|
|
"valid_targets_min": 3705
|
|
},
|
|
{
|
|
"epoch": 4.067450980392157,
|
|
"grad_norm": 0.4241403087923899,
|
|
"learning_rate": 1.7854502299251284e-05,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1052800789475441,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4965.0,
|
|
"valid_targets_min": 1951
|
|
},
|
|
{
|
|
"epoch": 4.075294117647059,
|
|
"grad_norm": 0.4705868076345857,
|
|
"learning_rate": 1.7776801447564032e-05,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12272444367408752,
|
|
"step": 2600,
|
|
"valid_targets_mean": 4833.2,
|
|
"valid_targets_min": 3409
|
|
},
|
|
{
|
|
"epoch": 4.083137254901961,
|
|
"grad_norm": 0.4378490088120457,
|
|
"learning_rate": 1.7699134556952634e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10248713195323944,
|
|
"step": 2605,
|
|
"valid_targets_mean": 4233.1,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 4.090980392156863,
|
|
"grad_norm": 0.42865922256952516,
|
|
"learning_rate": 1.7621502813838864e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10999415814876556,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4333.8,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 4.0988235294117645,
|
|
"grad_norm": 0.43159569485209737,
|
|
"learning_rate": 1.754390740410758e-05,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08083570003509521,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3548.9,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 4.1066666666666665,
|
|
"grad_norm": 0.5194467132035089,
|
|
"learning_rate": 1.7466349513088636e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12257933616638184,
|
|
"step": 2620,
|
|
"valid_targets_mean": 4051.9,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 4.114509803921568,
|
|
"grad_norm": 0.4139767381834643,
|
|
"learning_rate": 1.738883032553873e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11203347146511078,
|
|
"step": 2625,
|
|
"valid_targets_mean": 5300.0,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 4.12235294117647,
|
|
"grad_norm": 0.45862693952462696,
|
|
"learning_rate": 1.7311351025623385e-05,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12201839685440063,
|
|
"step": 2630,
|
|
"valid_targets_mean": 5129.0,
|
|
"valid_targets_min": 4048
|
|
},
|
|
{
|
|
"epoch": 4.130196078431372,
|
|
"grad_norm": 0.4417442163099453,
|
|
"learning_rate": 1.723391279689876e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17832894623279572,
|
|
"step": 2635,
|
|
"valid_targets_mean": 7041.9,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 4.138039215686274,
|
|
"grad_norm": 0.4350450889064667,
|
|
"learning_rate": 1.7156516822293644e-05,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10031968355178833,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4177.1,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 4.145882352941176,
|
|
"grad_norm": 0.46888520664230127,
|
|
"learning_rate": 1.7079164284091353e-05,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10806365311145782,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4084.9,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 4.153725490196078,
|
|
"grad_norm": 0.41910197413913913,
|
|
"learning_rate": 1.7001856363911687e-05,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10648825764656067,
|
|
"step": 2650,
|
|
"valid_targets_mean": 5057.6,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 4.16156862745098,
|
|
"grad_norm": 0.5218402944168531,
|
|
"learning_rate": 1.692459424269286e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11246689409017563,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3674.1,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 4.169411764705882,
|
|
"grad_norm": 0.46180954109801275,
|
|
"learning_rate": 1.6847379100673474e-05,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14503367245197296,
|
|
"step": 2660,
|
|
"valid_targets_mean": 5285.0,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 4.177254901960785,
|
|
"grad_norm": 0.444249980676699,
|
|
"learning_rate": 1.6770212117374504e-05,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10934904217720032,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4780.2,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 4.185098039215687,
|
|
"grad_norm": 0.465724690204505,
|
|
"learning_rate": 1.6693094471581244e-05,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10249033570289612,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4325.9,
|
|
"valid_targets_min": 3458
|
|
},
|
|
{
|
|
"epoch": 4.192941176470589,
|
|
"grad_norm": 0.44416118201724536,
|
|
"learning_rate": 1.6616027341325328e-05,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1000424325466156,
|
|
"step": 2675,
|
|
"valid_targets_mean": 4408.9,
|
|
"valid_targets_min": 2176
|
|
},
|
|
{
|
|
"epoch": 4.2007843137254905,
|
|
"grad_norm": 0.45587041013740054,
|
|
"learning_rate": 1.653901190386672e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11124923825263977,
|
|
"step": 2680,
|
|
"valid_targets_mean": 4076.0,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 4.2086274509803925,
|
|
"grad_norm": 0.4849010155411813,
|
|
"learning_rate": 1.646204933567574e-05,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11906752735376358,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4709.2,
|
|
"valid_targets_min": 3212
|
|
},
|
|
{
|
|
"epoch": 4.216470588235294,
|
|
"grad_norm": 0.4550980097289989,
|
|
"learning_rate": 1.638514081241509e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15008796751499176,
|
|
"step": 2690,
|
|
"valid_targets_mean": 5774.9,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 4.224313725490196,
|
|
"grad_norm": 0.44042050024817103,
|
|
"learning_rate": 1.6308287508921893e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09062080085277557,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4297.5,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 4.232156862745098,
|
|
"grad_norm": 0.4247855213774718,
|
|
"learning_rate": 1.6231490599189753e-05,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12232277542352676,
|
|
"step": 2700,
|
|
"valid_targets_mean": 5617.2,
|
|
"valid_targets_min": 3678
|
|
},
|
|
{
|
|
"epoch": 4.24,
|
|
"grad_norm": 0.4404614212649334,
|
|
"learning_rate": 1.615475125635081e-05,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09367984533309937,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4037.8,
|
|
"valid_targets_min": 3472
|
|
},
|
|
{
|
|
"epoch": 4.247843137254902,
|
|
"grad_norm": 0.4506826599579043,
|
|
"learning_rate": 1.607807065265782e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10886059701442719,
|
|
"step": 2710,
|
|
"valid_targets_mean": 5706.8,
|
|
"valid_targets_min": 2538
|
|
},
|
|
{
|
|
"epoch": 4.255686274509804,
|
|
"grad_norm": 0.4782992698604372,
|
|
"learning_rate": 1.600144995946626e-05,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12588849663734436,
|
|
"step": 2715,
|
|
"valid_targets_mean": 4821.1,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.263529411764706,
|
|
"grad_norm": 0.4510322699008573,
|
|
"learning_rate": 1.5924890347216433e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11632902175188065,
|
|
"step": 2720,
|
|
"valid_targets_mean": 4474.1,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 4.271372549019608,
|
|
"grad_norm": 0.44522967831663324,
|
|
"learning_rate": 1.5848392985415573e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11140655726194382,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4962.0,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 4.27921568627451,
|
|
"grad_norm": 0.41416180143940035,
|
|
"learning_rate": 1.5771959042619983e-05,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09167453646659851,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4499.6,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 4.287058823529412,
|
|
"grad_norm": 0.4121993785188106,
|
|
"learning_rate": 1.5695589686417224e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09622062742710114,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4420.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 4.294901960784314,
|
|
"grad_norm": 0.4992264402830286,
|
|
"learning_rate": 1.561928608340822e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09109040349721909,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3033.8,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 4.302745098039216,
|
|
"grad_norm": 0.6052650370274888,
|
|
"learning_rate": 1.554304939918946e-05,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09706152230501175,
|
|
"step": 2745,
|
|
"valid_targets_mean": 3555.8,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 4.310588235294118,
|
|
"grad_norm": 0.43459356722197345,
|
|
"learning_rate": 1.5466880798335222e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12693923711776733,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5968.5,
|
|
"valid_targets_min": 4153
|
|
},
|
|
{
|
|
"epoch": 4.3184313725490195,
|
|
"grad_norm": 0.41454868256199134,
|
|
"learning_rate": 1.539078144437973e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11799121648073196,
|
|
"step": 2755,
|
|
"valid_targets_mean": 6189.2,
|
|
"valid_targets_min": 3457
|
|
},
|
|
{
|
|
"epoch": 4.3262745098039215,
|
|
"grad_norm": 0.4376302429257829,
|
|
"learning_rate": 1.5314752499799427e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11019665002822876,
|
|
"step": 2760,
|
|
"valid_targets_mean": 5059.8,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 4.334117647058823,
|
|
"grad_norm": 0.49424493066282355,
|
|
"learning_rate": 1.5238795125995189e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12486723065376282,
|
|
"step": 2765,
|
|
"valid_targets_mean": 4199.2,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 4.341960784313725,
|
|
"grad_norm": 0.45864869228381433,
|
|
"learning_rate": 1.5162910483274593e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11594164371490479,
|
|
"step": 2770,
|
|
"valid_targets_mean": 4496.1,
|
|
"valid_targets_min": 3342
|
|
},
|
|
{
|
|
"epoch": 4.349803921568627,
|
|
"grad_norm": 0.4290020306517361,
|
|
"learning_rate": 1.5087099730834207e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14288020133972168,
|
|
"step": 2775,
|
|
"valid_targets_mean": 6175.4,
|
|
"valid_targets_min": 3887
|
|
},
|
|
{
|
|
"epoch": 4.357647058823529,
|
|
"grad_norm": 0.39691105380569963,
|
|
"learning_rate": 1.5011364026741855e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13958340883255005,
|
|
"step": 2780,
|
|
"valid_targets_mean": 6673.4,
|
|
"valid_targets_min": 3618
|
|
},
|
|
{
|
|
"epoch": 4.365490196078431,
|
|
"grad_norm": 0.43661081180233746,
|
|
"learning_rate": 1.4935704527918937e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14695270359516144,
|
|
"step": 2785,
|
|
"valid_targets_mean": 6581.8,
|
|
"valid_targets_min": 4414
|
|
},
|
|
{
|
|
"epoch": 4.373333333333333,
|
|
"grad_norm": 0.40863500299394734,
|
|
"learning_rate": 1.4860122390122764e-05,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14593222737312317,
|
|
"step": 2790,
|
|
"valid_targets_mean": 6116.5,
|
|
"valid_targets_min": 4051
|
|
},
|
|
{
|
|
"epoch": 4.381176470588235,
|
|
"grad_norm": 0.475295429629708,
|
|
"learning_rate": 1.4784618767928898e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12581712007522583,
|
|
"step": 2795,
|
|
"valid_targets_mean": 4507.4,
|
|
"valid_targets_min": 3297
|
|
},
|
|
{
|
|
"epoch": 4.389019607843137,
|
|
"grad_norm": 0.45469343912432475,
|
|
"learning_rate": 1.4709194814713507e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13349707424640656,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4666.4,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 4.396862745098039,
|
|
"grad_norm": 0.41875147310143834,
|
|
"learning_rate": 1.4633851682635766e-05,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11100316047668457,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4645.6,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 4.404705882352941,
|
|
"grad_norm": 0.4408284838489042,
|
|
"learning_rate": 1.4558590522620239e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14001163840293884,
|
|
"step": 2810,
|
|
"valid_targets_mean": 5793.5,
|
|
"valid_targets_min": 3699
|
|
},
|
|
{
|
|
"epoch": 4.412549019607843,
|
|
"grad_norm": 0.4646282293043246,
|
|
"learning_rate": 1.4483412484339301e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11910644918680191,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4785.8,
|
|
"valid_targets_min": 3638
|
|
},
|
|
{
|
|
"epoch": 4.420392156862745,
|
|
"grad_norm": 0.44310182616986576,
|
|
"learning_rate": 1.4408318716195581e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1331317126750946,
|
|
"step": 2820,
|
|
"valid_targets_mean": 5117.9,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 4.428235294117647,
|
|
"grad_norm": 0.48547553289245554,
|
|
"learning_rate": 1.4333310365304413e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09377738833427429,
|
|
"step": 2825,
|
|
"valid_targets_mean": 4225.5,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 4.436078431372549,
|
|
"grad_norm": 0.40807911759860815,
|
|
"learning_rate": 1.4258388577476322e-05,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10872749984264374,
|
|
"step": 2830,
|
|
"valid_targets_mean": 5272.0,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 4.443921568627451,
|
|
"grad_norm": 0.4279963815599532,
|
|
"learning_rate": 1.4183554497199514e-05,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.113418810069561,
|
|
"step": 2835,
|
|
"valid_targets_mean": 5528.8,
|
|
"valid_targets_min": 2550
|
|
},
|
|
{
|
|
"epoch": 4.451764705882353,
|
|
"grad_norm": 0.45965069219955873,
|
|
"learning_rate": 1.4108809267622381e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10628610849380493,
|
|
"step": 2840,
|
|
"valid_targets_mean": 4212.6,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 4.459607843137255,
|
|
"grad_norm": 0.42613985033761675,
|
|
"learning_rate": 1.4034154030536083e-05,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12714120745658875,
|
|
"step": 2845,
|
|
"valid_targets_mean": 5532.8,
|
|
"valid_targets_min": 4137
|
|
},
|
|
{
|
|
"epoch": 4.467450980392157,
|
|
"grad_norm": 0.4382198571804567,
|
|
"learning_rate": 1.3959589926357042e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1308916211128235,
|
|
"step": 2850,
|
|
"valid_targets_mean": 5463.6,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 4.475294117647059,
|
|
"grad_norm": 0.4192454419323077,
|
|
"learning_rate": 1.3885118094109575e-05,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08181822299957275,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4375.1,
|
|
"valid_targets_min": 3878
|
|
},
|
|
{
|
|
"epoch": 4.483137254901961,
|
|
"grad_norm": 0.5532407130131899,
|
|
"learning_rate": 1.3810739671408467e-05,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09779887646436691,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3727.5,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 4.490980392156863,
|
|
"grad_norm": 0.444360439292349,
|
|
"learning_rate": 1.3736455794441596e-05,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11745741963386536,
|
|
"step": 2865,
|
|
"valid_targets_mean": 4643.2,
|
|
"valid_targets_min": 3131
|
|
},
|
|
{
|
|
"epoch": 4.498823529411765,
|
|
"grad_norm": 0.4200795782839923,
|
|
"learning_rate": 1.3662267597952588e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12091678380966187,
|
|
"step": 2870,
|
|
"valid_targets_mean": 5856.4,
|
|
"valid_targets_min": 3370
|
|
},
|
|
{
|
|
"epoch": 4.506666666666667,
|
|
"grad_norm": 0.42488592060924674,
|
|
"learning_rate": 1.3588176215223463e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10534407198429108,
|
|
"step": 2875,
|
|
"valid_targets_mean": 4626.0,
|
|
"valid_targets_min": 3890
|
|
},
|
|
{
|
|
"epoch": 4.514509803921569,
|
|
"grad_norm": 0.4200545765520546,
|
|
"learning_rate": 1.3514182778057365e-05,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10357141494750977,
|
|
"step": 2880,
|
|
"valid_targets_mean": 5135.2,
|
|
"valid_targets_min": 3790
|
|
},
|
|
{
|
|
"epoch": 4.522352941176471,
|
|
"grad_norm": 0.5215283256019683,
|
|
"learning_rate": 1.3440288416761216e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12107962369918823,
|
|
"step": 2885,
|
|
"valid_targets_mean": 4660.5,
|
|
"valid_targets_min": 3653
|
|
},
|
|
{
|
|
"epoch": 4.530196078431373,
|
|
"grad_norm": 0.4424012943358386,
|
|
"learning_rate": 1.3366494260128484e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10020311921834946,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4800.6,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 4.5380392156862746,
|
|
"grad_norm": 0.4286355399479813,
|
|
"learning_rate": 1.3292801435421935e-05,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1244986429810524,
|
|
"step": 2895,
|
|
"valid_targets_mean": 5535.9,
|
|
"valid_targets_min": 3469
|
|
},
|
|
{
|
|
"epoch": 4.5458823529411765,
|
|
"grad_norm": 0.430885468445451,
|
|
"learning_rate": 1.3219211068356418e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14919069409370422,
|
|
"step": 2900,
|
|
"valid_targets_mean": 5873.8,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 4.553725490196078,
|
|
"grad_norm": 0.45289858258160803,
|
|
"learning_rate": 1.3145724283081651e-05,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10817940533161163,
|
|
"step": 2905,
|
|
"valid_targets_mean": 4748.0,
|
|
"valid_targets_min": 4218
|
|
},
|
|
{
|
|
"epoch": 4.56156862745098,
|
|
"grad_norm": 0.41980663944214935,
|
|
"learning_rate": 1.3072342202165069e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11044943332672119,
|
|
"step": 2910,
|
|
"valid_targets_mean": 5293.0,
|
|
"valid_targets_min": 3835
|
|
},
|
|
{
|
|
"epoch": 4.569411764705882,
|
|
"grad_norm": 0.4839386189792505,
|
|
"learning_rate": 1.2999065946574656e-05,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13633571565151215,
|
|
"step": 2915,
|
|
"valid_targets_mean": 5534.4,
|
|
"valid_targets_min": 3576
|
|
},
|
|
{
|
|
"epoch": 4.577254901960784,
|
|
"grad_norm": 0.42243480444753406,
|
|
"learning_rate": 1.2925896635661852e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1725214570760727,
|
|
"step": 2920,
|
|
"valid_targets_mean": 7218.0,
|
|
"valid_targets_min": 4212
|
|
},
|
|
{
|
|
"epoch": 4.585098039215686,
|
|
"grad_norm": 0.4469954447796888,
|
|
"learning_rate": 1.2852835387144414e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10223016887903214,
|
|
"step": 2925,
|
|
"valid_targets_mean": 4194.5,
|
|
"valid_targets_min": 3159
|
|
},
|
|
{
|
|
"epoch": 4.592941176470588,
|
|
"grad_norm": 0.45911654389244627,
|
|
"learning_rate": 1.2779883317089374e-05,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10432572662830353,
|
|
"step": 2930,
|
|
"valid_targets_mean": 4234.1,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 4.60078431372549,
|
|
"grad_norm": 0.452905704169797,
|
|
"learning_rate": 1.2707041539895974e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1017366498708725,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4624.2,
|
|
"valid_targets_min": 3290
|
|
},
|
|
{
|
|
"epoch": 4.608627450980392,
|
|
"grad_norm": 0.4446195839335299,
|
|
"learning_rate": 1.2634311168278652e-05,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1165042296051979,
|
|
"step": 2940,
|
|
"valid_targets_mean": 5227.2,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 4.616470588235294,
|
|
"grad_norm": 0.39868825687973575,
|
|
"learning_rate": 1.2561693313250034e-05,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09707609564065933,
|
|
"step": 2945,
|
|
"valid_targets_mean": 5251.4,
|
|
"valid_targets_min": 3902
|
|
},
|
|
{
|
|
"epoch": 4.624313725490196,
|
|
"grad_norm": 0.4216177868655537,
|
|
"learning_rate": 1.248918908410396e-05,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10996775329113007,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4927.2,
|
|
"valid_targets_min": 2015
|
|
},
|
|
{
|
|
"epoch": 4.632156862745098,
|
|
"grad_norm": 0.7264160142237203,
|
|
"learning_rate": 1.2416799588398576e-05,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08461538702249527,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4145.6,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 4.64,
|
|
"grad_norm": 0.4291908154396816,
|
|
"learning_rate": 1.2344525931939359e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11082827299833298,
|
|
"step": 2960,
|
|
"valid_targets_mean": 5010.8,
|
|
"valid_targets_min": 2231
|
|
},
|
|
{
|
|
"epoch": 4.647843137254902,
|
|
"grad_norm": 0.4199152718325381,
|
|
"learning_rate": 1.2272369218762266e-05,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12899282574653625,
|
|
"step": 2965,
|
|
"valid_targets_mean": 5659.4,
|
|
"valid_targets_min": 3966
|
|
},
|
|
{
|
|
"epoch": 4.6556862745098035,
|
|
"grad_norm": 0.4604380546521672,
|
|
"learning_rate": 1.2200330551116838e-05,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11434175074100494,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4256.5,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 4.663529411764706,
|
|
"grad_norm": 0.49072667250137836,
|
|
"learning_rate": 1.2128411029449403e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11742053925991058,
|
|
"step": 2975,
|
|
"valid_targets_mean": 4721.1,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 4.671372549019608,
|
|
"grad_norm": 0.458501520651535,
|
|
"learning_rate": 1.205661175238622e-05,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12470651417970657,
|
|
"step": 2980,
|
|
"valid_targets_mean": 5843.1,
|
|
"valid_targets_min": 3848
|
|
},
|
|
{
|
|
"epoch": 4.67921568627451,
|
|
"grad_norm": 0.4308150069093234,
|
|
"learning_rate": 1.1984933816716726e-05,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12182128429412842,
|
|
"step": 2985,
|
|
"valid_targets_mean": 5752.2,
|
|
"valid_targets_min": 3848
|
|
},
|
|
{
|
|
"epoch": 4.687058823529412,
|
|
"grad_norm": 0.44479697330206897,
|
|
"learning_rate": 1.1913378317376796e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11784543842077255,
|
|
"step": 2990,
|
|
"valid_targets_mean": 5041.1,
|
|
"valid_targets_min": 3733
|
|
},
|
|
{
|
|
"epoch": 4.694901960784314,
|
|
"grad_norm": 0.44454023318061986,
|
|
"learning_rate": 1.1841946347431961e-05,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10518812388181686,
|
|
"step": 2995,
|
|
"valid_targets_mean": 4918.2,
|
|
"valid_targets_min": 3311
|
|
},
|
|
{
|
|
"epoch": 4.702745098039216,
|
|
"grad_norm": 0.4183316201089064,
|
|
"learning_rate": 1.1770638998060764e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10675269365310669,
|
|
"step": 3000,
|
|
"valid_targets_mean": 4699.8,
|
|
"valid_targets_min": 4095
|
|
},
|
|
{
|
|
"epoch": 4.710588235294118,
|
|
"grad_norm": 0.41360698717185757,
|
|
"learning_rate": 1.1699457358538072e-05,
|
|
"loss": 0.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09616823494434357,
|
|
"step": 3005,
|
|
"valid_targets_mean": 5704.4,
|
|
"valid_targets_min": 4123
|
|
},
|
|
{
|
|
"epoch": 4.71843137254902,
|
|
"grad_norm": 0.4113294167567783,
|
|
"learning_rate": 1.1628402516218432e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10045308619737625,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4660.0,
|
|
"valid_targets_min": 3935
|
|
},
|
|
{
|
|
"epoch": 4.726274509803922,
|
|
"grad_norm": 0.4997749546359887,
|
|
"learning_rate": 1.1557475556519461e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11495951563119888,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3924.1,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 4.734117647058824,
|
|
"grad_norm": 0.4469295834755647,
|
|
"learning_rate": 1.1486677562905281e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1331641972064972,
|
|
"step": 3020,
|
|
"valid_targets_mean": 5024.4,
|
|
"valid_targets_min": 3857
|
|
},
|
|
{
|
|
"epoch": 4.741960784313726,
|
|
"grad_norm": 0.3873914400544633,
|
|
"learning_rate": 1.1416009616869959e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1114264726638794,
|
|
"step": 3025,
|
|
"valid_targets_mean": 5801.9,
|
|
"valid_targets_min": 2997
|
|
},
|
|
{
|
|
"epoch": 4.749803921568628,
|
|
"grad_norm": 0.4225044408926774,
|
|
"learning_rate": 1.134547279792098e-05,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13369326293468475,
|
|
"step": 3030,
|
|
"valid_targets_mean": 6771.4,
|
|
"valid_targets_min": 3721
|
|
},
|
|
{
|
|
"epoch": 4.75764705882353,
|
|
"grad_norm": 0.49039891697784743,
|
|
"learning_rate": 1.1275068183562747e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1427251100540161,
|
|
"step": 3035,
|
|
"valid_targets_mean": 5687.2,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 4.7654901960784315,
|
|
"grad_norm": 0.45410172670277094,
|
|
"learning_rate": 1.1204796849280167e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10159337520599365,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4214.5,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 4.773333333333333,
|
|
"grad_norm": 0.4622914859651975,
|
|
"learning_rate": 1.1134659868522158e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1267380267381668,
|
|
"step": 3045,
|
|
"valid_targets_mean": 5010.5,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 4.781176470588235,
|
|
"grad_norm": 0.47045215080087177,
|
|
"learning_rate": 1.106465831268531e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11707454174757004,
|
|
"step": 3050,
|
|
"valid_targets_mean": 5423.0,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 4.789019607843137,
|
|
"grad_norm": 0.48230473285593406,
|
|
"learning_rate": 1.0994793251097468e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10609722137451172,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4305.5,
|
|
"valid_targets_min": 3645
|
|
},
|
|
{
|
|
"epoch": 4.796862745098039,
|
|
"grad_norm": 0.5021067379610036,
|
|
"learning_rate": 1.0925065751001445e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10157807916402817,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3710.1,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 4.804705882352941,
|
|
"grad_norm": 0.4409944385549482,
|
|
"learning_rate": 1.0855476877538687e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10870078951120377,
|
|
"step": 3065,
|
|
"valid_targets_mean": 4619.1,
|
|
"valid_targets_min": 3695
|
|
},
|
|
{
|
|
"epoch": 4.812549019607843,
|
|
"grad_norm": 0.44312872363060424,
|
|
"learning_rate": 1.0786027693733015e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09856695681810379,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4376.0,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 4.820392156862745,
|
|
"grad_norm": 0.4228137519417471,
|
|
"learning_rate": 1.0716719260474365e-05,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08073513954877853,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3504.0,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.828235294117647,
|
|
"grad_norm": 0.4492912860086454,
|
|
"learning_rate": 1.0647552636502629e-05,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12045551091432571,
|
|
"step": 3080,
|
|
"valid_targets_mean": 4412.9,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 4.836078431372549,
|
|
"grad_norm": 1.0408966059706204,
|
|
"learning_rate": 1.0578528878391419e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12493710219860077,
|
|
"step": 3085,
|
|
"valid_targets_mean": 5019.9,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 4.843921568627451,
|
|
"grad_norm": 0.4192359756622533,
|
|
"learning_rate": 1.0509649040531994e-05,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09598597884178162,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4504.4,
|
|
"valid_targets_min": 3380
|
|
},
|
|
{
|
|
"epoch": 4.851764705882353,
|
|
"grad_norm": 0.426501242282498,
|
|
"learning_rate": 1.044091417511709e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11279591917991638,
|
|
"step": 3095,
|
|
"valid_targets_mean": 5271.6,
|
|
"valid_targets_min": 4448
|
|
},
|
|
{
|
|
"epoch": 4.859607843137255,
|
|
"grad_norm": 0.5341696931177127,
|
|
"learning_rate": 1.0372325332124896e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11988389492034912,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5265.9,
|
|
"valid_targets_min": 3534
|
|
},
|
|
{
|
|
"epoch": 4.867450980392157,
|
|
"grad_norm": 0.4411575534344696,
|
|
"learning_rate": 1.0303883559302999e-05,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11052796244621277,
|
|
"step": 3105,
|
|
"valid_targets_mean": 5221.1,
|
|
"valid_targets_min": 3672
|
|
},
|
|
{
|
|
"epoch": 4.875294117647059,
|
|
"grad_norm": 0.43327957470978096,
|
|
"learning_rate": 1.0235589902152351e-05,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12073686718940735,
|
|
"step": 3110,
|
|
"valid_targets_mean": 4906.5,
|
|
"valid_targets_min": 3891
|
|
},
|
|
{
|
|
"epoch": 4.8831372549019605,
|
|
"grad_norm": 0.46410452856624584,
|
|
"learning_rate": 1.0167445403911356e-05,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11970961838960648,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4996.5,
|
|
"valid_targets_min": 3709
|
|
},
|
|
{
|
|
"epoch": 4.890980392156862,
|
|
"grad_norm": 0.4457445424078122,
|
|
"learning_rate": 1.0099451105539866e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11119598895311356,
|
|
"step": 3120,
|
|
"valid_targets_mean": 4615.5,
|
|
"valid_targets_min": 2185
|
|
},
|
|
{
|
|
"epoch": 4.898823529411764,
|
|
"grad_norm": 0.4238986113816507,
|
|
"learning_rate": 1.0031608045703347e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1175379753112793,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4690.8,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 4.906666666666666,
|
|
"grad_norm": 0.4969126965651698,
|
|
"learning_rate": 9.963917260756937e-06,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10803299397230148,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3725.6,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 4.914509803921568,
|
|
"grad_norm": 0.42476715117328734,
|
|
"learning_rate": 9.89637978472972e-06,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12074100971221924,
|
|
"step": 3135,
|
|
"valid_targets_mean": 5526.5,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 4.92235294117647,
|
|
"grad_norm": 0.4732390657116341,
|
|
"learning_rate": 9.828996649308804e-06,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14218966662883759,
|
|
"step": 3140,
|
|
"valid_targets_mean": 5210.5,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 4.930196078431372,
|
|
"grad_norm": 0.4511606219481335,
|
|
"learning_rate": 9.76176888382367e-06,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10251887142658234,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4451.0,
|
|
"valid_targets_min": 3309
|
|
},
|
|
{
|
|
"epoch": 4.938039215686274,
|
|
"grad_norm": 0.40999282678558024,
|
|
"learning_rate": 9.694697515230371e-06,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1229824647307396,
|
|
"step": 3150,
|
|
"valid_targets_mean": 5916.8,
|
|
"valid_targets_min": 3639
|
|
},
|
|
{
|
|
"epoch": 4.945882352941177,
|
|
"grad_norm": 0.4529754066112067,
|
|
"learning_rate": 9.627783568095905e-06,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377158910036087,
|
|
"step": 3155,
|
|
"valid_targets_mean": 6208.2,
|
|
"valid_targets_min": 3652
|
|
},
|
|
{
|
|
"epoch": 4.953725490196079,
|
|
"grad_norm": 0.4242985299137044,
|
|
"learning_rate": 9.561028064582507e-06,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10480914264917374,
|
|
"step": 3160,
|
|
"valid_targets_mean": 5257.2,
|
|
"valid_targets_min": 4459
|
|
},
|
|
{
|
|
"epoch": 4.961568627450981,
|
|
"grad_norm": 0.41878222358740275,
|
|
"learning_rate": 9.494432024432087e-06,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13485050201416016,
|
|
"step": 3165,
|
|
"valid_targets_mean": 5783.6,
|
|
"valid_targets_min": 3438
|
|
},
|
|
{
|
|
"epoch": 4.969411764705883,
|
|
"grad_norm": 0.4483772324698896,
|
|
"learning_rate": 9.42799646495062e-06,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10306377708911896,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4810.8,
|
|
"valid_targets_min": 3566
|
|
},
|
|
{
|
|
"epoch": 4.977254901960785,
|
|
"grad_norm": 0.4575109733204383,
|
|
"learning_rate": 9.361722400992618e-06,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13620686531066895,
|
|
"step": 3175,
|
|
"valid_targets_mean": 5052.2,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.9850980392156865,
|
|
"grad_norm": 0.4664072811099875,
|
|
"learning_rate": 9.295610844945613e-06,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1414259970188141,
|
|
"step": 3180,
|
|
"valid_targets_mean": 5369.8,
|
|
"valid_targets_min": 2914
|
|
},
|
|
{
|
|
"epoch": 4.992941176470588,
|
|
"grad_norm": 0.4084175714626147,
|
|
"learning_rate": 9.229662806714721e-06,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1109371930360794,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5307.1,
|
|
"valid_targets_min": 4409
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6467547947661242,
|
|
"learning_rate": 9.163879293707172e-06,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.278501957654953,
|
|
"step": 3190,
|
|
"valid_targets_mean": 5547.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 5.007843137254902,
|
|
"grad_norm": 0.40387115368040144,
|
|
"learning_rate": 9.09826131081698e-06,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10747785866260529,
|
|
"step": 3195,
|
|
"valid_targets_mean": 5887.5,
|
|
"valid_targets_min": 3970
|
|
},
|
|
{
|
|
"epoch": 5.015686274509804,
|
|
"grad_norm": 0.45143393354352845,
|
|
"learning_rate": 9.03280986040952e-06,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13973960280418396,
|
|
"step": 3200,
|
|
"valid_targets_mean": 5593.4,
|
|
"valid_targets_min": 3313
|
|
},
|
|
{
|
|
"epoch": 5.023529411764706,
|
|
"grad_norm": 0.5170646910567398,
|
|
"learning_rate": 8.967525942306285e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12646545469760895,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4262.0,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 5.031372549019608,
|
|
"grad_norm": 0.4177365278956721,
|
|
"learning_rate": 8.902410553769575e-06,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11722061038017273,
|
|
"step": 3210,
|
|
"valid_targets_mean": 6452.1,
|
|
"valid_targets_min": 4591
|
|
},
|
|
{
|
|
"epoch": 5.03921568627451,
|
|
"grad_norm": 0.45982665324198024,
|
|
"learning_rate": 8.837464689487261e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14046096801757812,
|
|
"step": 3215,
|
|
"valid_targets_mean": 5643.9,
|
|
"valid_targets_min": 4702
|
|
},
|
|
{
|
|
"epoch": 5.047058823529412,
|
|
"grad_norm": 0.46047545116826843,
|
|
"learning_rate": 8.772689341557611e-06,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08853432536125183,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3722.0,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 5.0549019607843135,
|
|
"grad_norm": 0.4214879259510868,
|
|
"learning_rate": 8.708085499474112e-06,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1543363481760025,
|
|
"step": 3225,
|
|
"valid_targets_mean": 6443.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 5.0627450980392155,
|
|
"grad_norm": 0.49671062233371066,
|
|
"learning_rate": 8.643654150110387e-06,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09493766725063324,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3947.6,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 5.070588235294117,
|
|
"grad_norm": 0.44383290688698823,
|
|
"learning_rate": 8.579396277705071e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11513125896453857,
|
|
"step": 3235,
|
|
"valid_targets_mean": 5591.4,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 5.078431372549019,
|
|
"grad_norm": 0.41639923855857314,
|
|
"learning_rate": 8.51531286384683e-06,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12974339723587036,
|
|
"step": 3240,
|
|
"valid_targets_mean": 6583.4,
|
|
"valid_targets_min": 4301
|
|
},
|
|
{
|
|
"epoch": 5.086274509803921,
|
|
"grad_norm": 0.4817613204052127,
|
|
"learning_rate": 8.451404887459325e-06,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10065077245235443,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4468.6,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 5.094117647058823,
|
|
"grad_norm": 0.41458592266722405,
|
|
"learning_rate": 8.387673324786292e-06,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09595533460378647,
|
|
"step": 3250,
|
|
"valid_targets_mean": 5498.2,
|
|
"valid_targets_min": 3578
|
|
},
|
|
{
|
|
"epoch": 5.101960784313725,
|
|
"grad_norm": 0.455325990243027,
|
|
"learning_rate": 8.324119149376584e-06,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08556359261274338,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3785.8,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 5.109803921568627,
|
|
"grad_norm": 0.4545717072003482,
|
|
"learning_rate": 8.260743332069355e-06,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08474293351173401,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3495.5,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 5.117647058823529,
|
|
"grad_norm": 0.451686422050851,
|
|
"learning_rate": 8.197546840979172e-06,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12904678285121918,
|
|
"step": 3265,
|
|
"valid_targets_mean": 5611.8,
|
|
"valid_targets_min": 4490
|
|
},
|
|
{
|
|
"epoch": 5.125490196078431,
|
|
"grad_norm": 0.4533041440520356,
|
|
"learning_rate": 8.134530641481289e-06,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10058914870023727,
|
|
"step": 3270,
|
|
"valid_targets_mean": 5074.5,
|
|
"valid_targets_min": 4181
|
|
},
|
|
{
|
|
"epoch": 5.133333333333334,
|
|
"grad_norm": 0.4447041485071207,
|
|
"learning_rate": 8.071695696196824e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12126485258340836,
|
|
"step": 3275,
|
|
"valid_targets_mean": 6402.9,
|
|
"valid_targets_min": 2974
|
|
},
|
|
{
|
|
"epoch": 5.141176470588236,
|
|
"grad_norm": 0.45393041745826174,
|
|
"learning_rate": 8.00904296497815e-06,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10773161798715591,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4721.6,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 5.149019607843138,
|
|
"grad_norm": 0.44358510403133994,
|
|
"learning_rate": 7.946573404894133e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11503250896930695,
|
|
"step": 3285,
|
|
"valid_targets_mean": 4849.8,
|
|
"valid_targets_min": 3997
|
|
},
|
|
{
|
|
"epoch": 5.1568627450980395,
|
|
"grad_norm": 0.43628925896890625,
|
|
"learning_rate": 7.88428797021559e-06,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10749220848083496,
|
|
"step": 3290,
|
|
"valid_targets_mean": 6218.1,
|
|
"valid_targets_min": 3660
|
|
},
|
|
{
|
|
"epoch": 5.1647058823529415,
|
|
"grad_norm": 0.4997173668196613,
|
|
"learning_rate": 7.82218761240065e-06,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13712440431118011,
|
|
"step": 3295,
|
|
"valid_targets_mean": 4569.8,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 5.172549019607843,
|
|
"grad_norm": 0.5678602543997687,
|
|
"learning_rate": 7.760273280080282e-06,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10061115026473999,
|
|
"step": 3300,
|
|
"valid_targets_mean": 2956.1,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 5.180392156862745,
|
|
"grad_norm": 0.47622399456179687,
|
|
"learning_rate": 7.69854591904374e-06,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10639934241771698,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4359.8,
|
|
"valid_targets_min": 2734
|
|
},
|
|
{
|
|
"epoch": 5.188235294117647,
|
|
"grad_norm": 0.5152270602959604,
|
|
"learning_rate": 7.637006472224173e-06,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13346779346466064,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4612.8,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 5.196078431372549,
|
|
"grad_norm": 0.4780328019266813,
|
|
"learning_rate": 7.575655879684192e-06,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11803792417049408,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4777.1,
|
|
"valid_targets_min": 4313
|
|
},
|
|
{
|
|
"epoch": 5.203921568627451,
|
|
"grad_norm": 0.44076887070103893,
|
|
"learning_rate": 7.514495078601492e-06,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08803778141736984,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4427.4,
|
|
"valid_targets_min": 3267
|
|
},
|
|
{
|
|
"epoch": 5.211764705882353,
|
|
"grad_norm": 0.44122086049924814,
|
|
"learning_rate": 7.453525003254585e-06,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11798333376646042,
|
|
"step": 3325,
|
|
"valid_targets_mean": 5929.4,
|
|
"valid_targets_min": 3780
|
|
},
|
|
{
|
|
"epoch": 5.219607843137255,
|
|
"grad_norm": 0.424878856482959,
|
|
"learning_rate": 7.39274658500847e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12534697353839874,
|
|
"step": 3330,
|
|
"valid_targets_mean": 5617.8,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 5.227450980392157,
|
|
"grad_norm": 0.568778481226979,
|
|
"learning_rate": 7.33216075230047e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11138676106929779,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4802.5,
|
|
"valid_targets_min": 3568
|
|
},
|
|
{
|
|
"epoch": 5.235294117647059,
|
|
"grad_norm": 0.4452420707521999,
|
|
"learning_rate": 7.271768430625983e-06,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11331316828727722,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4633.6,
|
|
"valid_targets_min": 3254
|
|
},
|
|
{
|
|
"epoch": 5.243137254901961,
|
|
"grad_norm": 0.46420148150643264,
|
|
"learning_rate": 7.2115705425243996e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09831637889146805,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4430.6,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 5.250980392156863,
|
|
"grad_norm": 0.44459726741360717,
|
|
"learning_rate": 7.151568007564962e-06,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11376352608203888,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4682.2,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 5.258823529411765,
|
|
"grad_norm": 0.4311003479897479,
|
|
"learning_rate": 7.091761742332786e-06,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12164557725191116,
|
|
"step": 3355,
|
|
"valid_targets_mean": 5216.8,
|
|
"valid_targets_min": 4379
|
|
},
|
|
{
|
|
"epoch": 5.266666666666667,
|
|
"grad_norm": 0.4451618391957542,
|
|
"learning_rate": 7.032152660414764e-06,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.127460315823555,
|
|
"step": 3360,
|
|
"valid_targets_mean": 5033.1,
|
|
"valid_targets_min": 3806
|
|
},
|
|
{
|
|
"epoch": 5.2745098039215685,
|
|
"grad_norm": 0.46225025121912444,
|
|
"learning_rate": 6.972741672385699e-06,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10879524052143097,
|
|
"step": 3365,
|
|
"valid_targets_mean": 4410.0,
|
|
"valid_targets_min": 3384
|
|
},
|
|
{
|
|
"epoch": 5.2823529411764705,
|
|
"grad_norm": 0.45290397096114937,
|
|
"learning_rate": 6.913529685794333e-06,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09558887034654617,
|
|
"step": 3370,
|
|
"valid_targets_mean": 4651.5,
|
|
"valid_targets_min": 4220
|
|
},
|
|
{
|
|
"epoch": 5.290196078431372,
|
|
"grad_norm": 0.48261339698594724,
|
|
"learning_rate": 6.854517605149526e-06,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10471512377262115,
|
|
"step": 3375,
|
|
"valid_targets_mean": 4497.6,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.298039215686274,
|
|
"grad_norm": 0.5223086996794308,
|
|
"learning_rate": 6.795706331906402e-06,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12723124027252197,
|
|
"step": 3380,
|
|
"valid_targets_mean": 5067.9,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 5.305882352941176,
|
|
"grad_norm": 0.4484965233257951,
|
|
"learning_rate": 6.737096764452609e-06,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1439855396747589,
|
|
"step": 3385,
|
|
"valid_targets_mean": 5265.8,
|
|
"valid_targets_min": 3597
|
|
},
|
|
{
|
|
"epoch": 5.313725490196078,
|
|
"grad_norm": 0.46966081881038835,
|
|
"learning_rate": 6.678689798094582e-06,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15899275243282318,
|
|
"step": 3390,
|
|
"valid_targets_mean": 5395.5,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 5.32156862745098,
|
|
"grad_norm": 0.4610547507319241,
|
|
"learning_rate": 6.620486325043871e-06,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1168796718120575,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4572.5,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 5.329411764705882,
|
|
"grad_norm": 0.4249547346844441,
|
|
"learning_rate": 6.562487234403492e-06,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07667449116706848,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3854.9,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 5.337254901960784,
|
|
"grad_norm": 0.5036540762705398,
|
|
"learning_rate": 6.504693412154384e-06,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1367102861404419,
|
|
"step": 3405,
|
|
"valid_targets_mean": 5214.8,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 5.345098039215686,
|
|
"grad_norm": 0.49092534393309145,
|
|
"learning_rate": 6.447105741141828e-06,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12212163954973221,
|
|
"step": 3410,
|
|
"valid_targets_mean": 4840.9,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 5.352941176470588,
|
|
"grad_norm": 0.4308675993577972,
|
|
"learning_rate": 6.389725101062017e-06,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10715265572071075,
|
|
"step": 3415,
|
|
"valid_targets_mean": 4416.5,
|
|
"valid_targets_min": 3391
|
|
},
|
|
{
|
|
"epoch": 5.36078431372549,
|
|
"grad_norm": 0.4481341239142485,
|
|
"learning_rate": 6.332552368448552e-06,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09622816741466522,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4727.2,
|
|
"valid_targets_min": 3663
|
|
},
|
|
{
|
|
"epoch": 5.368627450980393,
|
|
"grad_norm": 0.42938641110064063,
|
|
"learning_rate": 6.275588416659111e-06,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11082536727190018,
|
|
"step": 3425,
|
|
"valid_targets_mean": 6231.6,
|
|
"valid_targets_min": 4385
|
|
},
|
|
{
|
|
"epoch": 5.376470588235295,
|
|
"grad_norm": 0.41006221308335794,
|
|
"learning_rate": 6.218834115862082e-06,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13855819404125214,
|
|
"step": 3430,
|
|
"valid_targets_mean": 7189.1,
|
|
"valid_targets_min": 4094
|
|
},
|
|
{
|
|
"epoch": 5.3843137254901965,
|
|
"grad_norm": 0.4851566191798374,
|
|
"learning_rate": 6.162290333023255e-06,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08945602178573608,
|
|
"step": 3435,
|
|
"valid_targets_mean": 4465.5,
|
|
"valid_targets_min": 3624
|
|
},
|
|
{
|
|
"epoch": 5.392156862745098,
|
|
"grad_norm": 0.48931355595220627,
|
|
"learning_rate": 6.105957931892619e-06,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09989559650421143,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4265.6,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 5.4,
|
|
"grad_norm": 0.47032888212749213,
|
|
"learning_rate": 6.049837772991119e-06,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10447871685028076,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4224.2,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 5.407843137254902,
|
|
"grad_norm": 0.492717333292633,
|
|
"learning_rate": 5.99393071359756e-06,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10688762366771698,
|
|
"step": 3450,
|
|
"valid_targets_mean": 4938.9,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 5.415686274509804,
|
|
"grad_norm": 1.4420089359222485,
|
|
"learning_rate": 5.93823760773546e-06,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12139762938022614,
|
|
"step": 3455,
|
|
"valid_targets_mean": 4968.8,
|
|
"valid_targets_min": 3542
|
|
},
|
|
{
|
|
"epoch": 5.423529411764706,
|
|
"grad_norm": 0.4517197547603553,
|
|
"learning_rate": 5.882759306160071e-06,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12694215774536133,
|
|
"step": 3460,
|
|
"valid_targets_mean": 5412.6,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 5.431372549019608,
|
|
"grad_norm": 0.45973692771505037,
|
|
"learning_rate": 5.827496656345299e-06,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1373184472322464,
|
|
"step": 3465,
|
|
"valid_targets_mean": 5169.2,
|
|
"valid_targets_min": 3526
|
|
},
|
|
{
|
|
"epoch": 5.43921568627451,
|
|
"grad_norm": 0.5065418637453369,
|
|
"learning_rate": 5.772450502470841e-06,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1180776059627533,
|
|
"step": 3470,
|
|
"valid_targets_mean": 4207.4,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 5.447058823529412,
|
|
"grad_norm": 0.4707896045299758,
|
|
"learning_rate": 5.71762168540922e-06,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1182604730129242,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4802.8,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 5.454901960784314,
|
|
"grad_norm": 0.5024641672118548,
|
|
"learning_rate": 5.663011042712996e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08559073507785797,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3836.0,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 5.462745098039216,
|
|
"grad_norm": 0.46735202669260495,
|
|
"learning_rate": 5.608619408601925e-06,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12076675891876221,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5519.2,
|
|
"valid_targets_min": 2934
|
|
},
|
|
{
|
|
"epoch": 5.470588235294118,
|
|
"grad_norm": 0.4696336699134175,
|
|
"learning_rate": 5.554447613950262e-06,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09073761105537415,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4277.1,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 5.47843137254902,
|
|
"grad_norm": 0.6036366533315168,
|
|
"learning_rate": 5.500496486274014e-06,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11612799763679504,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4985.4,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 5.486274509803922,
|
|
"grad_norm": 0.47726925266975956,
|
|
"learning_rate": 5.446766849718368e-06,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12205741554498672,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4844.0,
|
|
"valid_targets_min": 3442
|
|
},
|
|
{
|
|
"epoch": 5.4941176470588236,
|
|
"grad_norm": 0.4916736015437308,
|
|
"learning_rate": 5.393259525045025e-06,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1252761334180832,
|
|
"step": 3505,
|
|
"valid_targets_mean": 5064.5,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 5.5019607843137255,
|
|
"grad_norm": 0.440080474865469,
|
|
"learning_rate": 5.3399753296197286e-06,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1194039136171341,
|
|
"step": 3510,
|
|
"valid_targets_mean": 6281.8,
|
|
"valid_targets_min": 3326
|
|
},
|
|
{
|
|
"epoch": 5.509803921568627,
|
|
"grad_norm": 0.4676090968245843,
|
|
"learning_rate": 5.286915077399728e-06,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12264136224985123,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4933.1,
|
|
"valid_targets_min": 3593
|
|
},
|
|
{
|
|
"epoch": 5.517647058823529,
|
|
"grad_norm": 0.45612503188721304,
|
|
"learning_rate": 5.2340795789213896e-06,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11936895549297333,
|
|
"step": 3520,
|
|
"valid_targets_mean": 5021.1,
|
|
"valid_targets_min": 4010
|
|
},
|
|
{
|
|
"epoch": 5.525490196078431,
|
|
"grad_norm": 0.4830786890949812,
|
|
"learning_rate": 5.1814696412877665e-06,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0968177393078804,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4000.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.533333333333333,
|
|
"grad_norm": 0.42333378915841735,
|
|
"learning_rate": 5.1290860681563195e-06,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1203262209892273,
|
|
"step": 3530,
|
|
"valid_targets_mean": 5863.4,
|
|
"valid_targets_min": 3710
|
|
},
|
|
{
|
|
"epoch": 5.541176470588235,
|
|
"grad_norm": 0.4299669760963479,
|
|
"learning_rate": 5.0769296597266126e-06,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0961022824048996,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4194.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 5.549019607843137,
|
|
"grad_norm": 0.4913004808346495,
|
|
"learning_rate": 5.0250012127280755e-06,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14035651087760925,
|
|
"step": 3540,
|
|
"valid_targets_mean": 5527.6,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 5.556862745098039,
|
|
"grad_norm": 0.42420621008985054,
|
|
"learning_rate": 4.973301520407874e-06,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11041077226400375,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4931.5,
|
|
"valid_targets_min": 3822
|
|
},
|
|
{
|
|
"epoch": 5.564705882352941,
|
|
"grad_norm": 0.4397064800423962,
|
|
"learning_rate": 4.921831372518751e-06,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1008322536945343,
|
|
"step": 3550,
|
|
"valid_targets_mean": 4589.4,
|
|
"valid_targets_min": 3745
|
|
},
|
|
{
|
|
"epoch": 5.572549019607843,
|
|
"grad_norm": 0.42983114719825694,
|
|
"learning_rate": 4.870591555306996e-06,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12528643012046814,
|
|
"step": 3555,
|
|
"valid_targets_mean": 5302.1,
|
|
"valid_targets_min": 3917
|
|
},
|
|
{
|
|
"epoch": 5.580392156862745,
|
|
"grad_norm": 0.41251872258649447,
|
|
"learning_rate": 4.819582851500406e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11194668710231781,
|
|
"step": 3560,
|
|
"valid_targets_mean": 5665.6,
|
|
"valid_targets_min": 3842
|
|
},
|
|
{
|
|
"epoch": 5.588235294117647,
|
|
"grad_norm": 0.41089046624017184,
|
|
"learning_rate": 4.768806040296348e-06,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10383773595094681,
|
|
"step": 3565,
|
|
"valid_targets_mean": 5147.0,
|
|
"valid_targets_min": 4120
|
|
},
|
|
{
|
|
"epoch": 5.596078431372549,
|
|
"grad_norm": 0.44250020714243427,
|
|
"learning_rate": 4.718261897349856e-06,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924265325069427,
|
|
"step": 3570,
|
|
"valid_targets_mean": 5651.6,
|
|
"valid_targets_min": 3699
|
|
},
|
|
{
|
|
"epoch": 5.603921568627451,
|
|
"grad_norm": 0.4520177978675399,
|
|
"learning_rate": 4.667951194761773e-06,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11091027408838272,
|
|
"step": 3575,
|
|
"valid_targets_mean": 5147.1,
|
|
"valid_targets_min": 3362
|
|
},
|
|
{
|
|
"epoch": 5.6117647058823525,
|
|
"grad_norm": 0.46655243788650486,
|
|
"learning_rate": 4.6178747010669485e-06,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09887704253196716,
|
|
"step": 3580,
|
|
"valid_targets_mean": 5145.6,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 5.6196078431372545,
|
|
"grad_norm": 0.43380729747663566,
|
|
"learning_rate": 4.5680331812225335e-06,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11621832847595215,
|
|
"step": 3585,
|
|
"valid_targets_mean": 6146.0,
|
|
"valid_targets_min": 4295
|
|
},
|
|
{
|
|
"epoch": 5.627450980392156,
|
|
"grad_norm": 0.42101797383398143,
|
|
"learning_rate": 4.518427396596246e-06,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09557899832725525,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4636.6,
|
|
"valid_targets_min": 3449
|
|
},
|
|
{
|
|
"epoch": 5.635294117647058,
|
|
"grad_norm": 0.4332157513198714,
|
|
"learning_rate": 4.469058104954786e-06,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10167904198169708,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4072.9,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 5.643137254901961,
|
|
"grad_norm": 0.4433003894407488,
|
|
"learning_rate": 4.4199260604522266e-06,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08998174965381622,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4496.6,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 5.650980392156863,
|
|
"grad_norm": 0.4488408459473684,
|
|
"learning_rate": 4.371032013618519e-06,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12565404176712036,
|
|
"step": 3605,
|
|
"valid_targets_mean": 5324.5,
|
|
"valid_targets_min": 3326
|
|
},
|
|
{
|
|
"epoch": 5.658823529411765,
|
|
"grad_norm": 0.44653324730690613,
|
|
"learning_rate": 4.322376711348009e-06,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11299017071723938,
|
|
"step": 3610,
|
|
"valid_targets_mean": 5719.5,
|
|
"valid_targets_min": 4228
|
|
},
|
|
{
|
|
"epoch": 5.666666666666667,
|
|
"grad_norm": 0.4807605716107027,
|
|
"learning_rate": 4.273960896888045e-06,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10924605280160904,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4711.0,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 5.674509803921569,
|
|
"grad_norm": 0.4375515041383165,
|
|
"learning_rate": 4.225785309827595e-06,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16533643007278442,
|
|
"step": 3620,
|
|
"valid_targets_mean": 7468.0,
|
|
"valid_targets_min": 4598
|
|
},
|
|
{
|
|
"epoch": 5.682352941176471,
|
|
"grad_norm": 0.4420469898871082,
|
|
"learning_rate": 4.177850686085987e-06,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09723871201276779,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5326.0,
|
|
"valid_targets_min": 3710
|
|
},
|
|
{
|
|
"epoch": 5.690196078431373,
|
|
"grad_norm": 0.5190219956910557,
|
|
"learning_rate": 4.130157757901632e-06,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0885414406657219,
|
|
"step": 3630,
|
|
"valid_targets_mean": 4358.0,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 5.698039215686275,
|
|
"grad_norm": 0.4183514479684243,
|
|
"learning_rate": 4.0827072538208705e-06,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09757456183433533,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4744.9,
|
|
"valid_targets_min": 3655
|
|
},
|
|
{
|
|
"epoch": 5.705882352941177,
|
|
"grad_norm": 0.45008902375660204,
|
|
"learning_rate": 4.03549989868681e-06,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1252342164516449,
|
|
"step": 3640,
|
|
"valid_targets_mean": 5274.1,
|
|
"valid_targets_min": 4356
|
|
},
|
|
{
|
|
"epoch": 5.713725490196079,
|
|
"grad_norm": 0.43044920377943224,
|
|
"learning_rate": 3.988536413628286e-06,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11895223706960678,
|
|
"step": 3645,
|
|
"valid_targets_mean": 4859.6,
|
|
"valid_targets_min": 4330
|
|
},
|
|
{
|
|
"epoch": 5.7215686274509805,
|
|
"grad_norm": 0.4547106288349754,
|
|
"learning_rate": 3.941817516048827e-06,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1304386854171753,
|
|
"step": 3650,
|
|
"valid_targets_mean": 5126.4,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 5.729411764705882,
|
|
"grad_norm": 0.4358567072415122,
|
|
"learning_rate": 3.895343919615686e-06,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09702654927968979,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4727.8,
|
|
"valid_targets_min": 3603
|
|
},
|
|
{
|
|
"epoch": 5.737254901960784,
|
|
"grad_norm": 0.43283259864119533,
|
|
"learning_rate": 3.849116334248972e-06,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12856844067573547,
|
|
"step": 3660,
|
|
"valid_targets_mean": 5489.1,
|
|
"valid_targets_min": 4068
|
|
},
|
|
{
|
|
"epoch": 5.745098039215686,
|
|
"grad_norm": 0.4420398806206634,
|
|
"learning_rate": 3.803135466110761e-06,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14836335182189941,
|
|
"step": 3665,
|
|
"valid_targets_mean": 6416.6,
|
|
"valid_targets_min": 3531
|
|
},
|
|
{
|
|
"epoch": 5.752941176470588,
|
|
"grad_norm": 0.48584018258685463,
|
|
"learning_rate": 3.7574020175943514e-06,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134757399559021,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4905.2,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 5.76078431372549,
|
|
"grad_norm": 0.4466902118893254,
|
|
"learning_rate": 3.7119166873135102e-06,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11019237339496613,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4478.0,
|
|
"valid_targets_min": 3613
|
|
},
|
|
{
|
|
"epoch": 5.768627450980392,
|
|
"grad_norm": 0.43514773978127463,
|
|
"learning_rate": 3.666680170091812e-06,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09685391187667847,
|
|
"step": 3680,
|
|
"valid_targets_mean": 4997.0,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.776470588235294,
|
|
"grad_norm": 0.4563081739000789,
|
|
"learning_rate": 3.6216931569519995e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273442655801773,
|
|
"step": 3685,
|
|
"valid_targets_mean": 5875.6,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 5.784313725490196,
|
|
"grad_norm": 0.44541713200948646,
|
|
"learning_rate": 3.576956335105477e-06,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12498340010643005,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4812.2,
|
|
"valid_targets_min": 4032
|
|
},
|
|
{
|
|
"epoch": 5.792156862745098,
|
|
"grad_norm": 0.4890839073410703,
|
|
"learning_rate": 3.5324703879417553e-06,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12750136852264404,
|
|
"step": 3695,
|
|
"valid_targets_mean": 4419.8,
|
|
"valid_targets_min": 3059
|
|
},
|
|
{
|
|
"epoch": 5.8,
|
|
"grad_norm": 0.423189067918186,
|
|
"learning_rate": 3.488235995018063e-06,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10611607134342194,
|
|
"step": 3700,
|
|
"valid_targets_mean": 5321.4,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 5.807843137254902,
|
|
"grad_norm": 0.4603090763488122,
|
|
"learning_rate": 3.444253832048925e-06,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12216217070817947,
|
|
"step": 3705,
|
|
"valid_targets_mean": 5663.4,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 5.815686274509804,
|
|
"grad_norm": 0.43406157889209235,
|
|
"learning_rate": 3.4005245708958757e-06,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10026943683624268,
|
|
"step": 3710,
|
|
"valid_targets_mean": 5337.1,
|
|
"valid_targets_min": 3646
|
|
},
|
|
{
|
|
"epoch": 5.823529411764706,
|
|
"grad_norm": 0.45578923444429736,
|
|
"learning_rate": 3.3570488795571653e-06,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10260643810033798,
|
|
"step": 3715,
|
|
"valid_targets_mean": 4458.5,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 5.8313725490196076,
|
|
"grad_norm": 0.4441416835886368,
|
|
"learning_rate": 3.313827422157583e-06,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11893025785684586,
|
|
"step": 3720,
|
|
"valid_targets_mean": 5243.4,
|
|
"valid_targets_min": 4273
|
|
},
|
|
{
|
|
"epoch": 5.8392156862745095,
|
|
"grad_norm": 0.4490186340963944,
|
|
"learning_rate": 3.2708608589382783e-06,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1208035871386528,
|
|
"step": 3725,
|
|
"valid_targets_mean": 5224.9,
|
|
"valid_targets_min": 3941
|
|
},
|
|
{
|
|
"epoch": 5.847058823529411,
|
|
"grad_norm": 0.4998487790679273,
|
|
"learning_rate": 3.2281498462467177e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12360478937625885,
|
|
"step": 3730,
|
|
"valid_targets_mean": 4348.6,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 5.854901960784313,
|
|
"grad_norm": 0.41982853355713934,
|
|
"learning_rate": 3.185695036526615e-06,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09100072085857391,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4289.4,
|
|
"valid_targets_min": 3739
|
|
},
|
|
{
|
|
"epoch": 5.862745098039216,
|
|
"grad_norm": 0.4611918743721464,
|
|
"learning_rate": 3.1434970783080043e-06,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11276879161596298,
|
|
"step": 3740,
|
|
"valid_targets_mean": 5198.1,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 5.870588235294118,
|
|
"grad_norm": 0.45917677458750794,
|
|
"learning_rate": 3.1015566161972967e-06,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12548470497131348,
|
|
"step": 3745,
|
|
"valid_targets_mean": 5063.1,
|
|
"valid_targets_min": 3888
|
|
},
|
|
{
|
|
"epoch": 5.87843137254902,
|
|
"grad_norm": 0.42903473316059026,
|
|
"learning_rate": 3.059874290867464e-06,
|
|
"loss": 0.219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12700194120407104,
|
|
"step": 3750,
|
|
"valid_targets_mean": 5993.8,
|
|
"valid_targets_min": 3910
|
|
},
|
|
{
|
|
"epoch": 5.886274509803922,
|
|
"grad_norm": 0.48598276291447545,
|
|
"learning_rate": 3.01845073904824e-06,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10122105479240417,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3913.8,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 5.894117647058824,
|
|
"grad_norm": 0.46358860073680896,
|
|
"learning_rate": 2.977286593516382e-06,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11198492348194122,
|
|
"step": 3760,
|
|
"valid_targets_mean": 5098.1,
|
|
"valid_targets_min": 3774
|
|
},
|
|
{
|
|
"epoch": 5.901960784313726,
|
|
"grad_norm": 0.43396800884845904,
|
|
"learning_rate": 2.936382483086022e-06,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0948493629693985,
|
|
"step": 3765,
|
|
"valid_targets_mean": 5090.5,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 5.909803921568628,
|
|
"grad_norm": 0.42396863953417807,
|
|
"learning_rate": 2.895739032599065e-06,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10739844292402267,
|
|
"step": 3770,
|
|
"valid_targets_mean": 5082.1,
|
|
"valid_targets_min": 3741
|
|
},
|
|
{
|
|
"epoch": 5.91764705882353,
|
|
"grad_norm": 0.4367233791726713,
|
|
"learning_rate": 2.8553568629156124e-06,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11178376525640488,
|
|
"step": 3775,
|
|
"valid_targets_mean": 5284.2,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 5.925490196078432,
|
|
"grad_norm": 0.43952645084490743,
|
|
"learning_rate": 2.815236590904522e-06,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13286638259887695,
|
|
"step": 3780,
|
|
"valid_targets_mean": 5756.5,
|
|
"valid_targets_min": 3362
|
|
},
|
|
{
|
|
"epoch": 5.933333333333334,
|
|
"grad_norm": 0.47695086720882024,
|
|
"learning_rate": 2.7753788294339412e-06,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10761082172393799,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4253.6,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 5.9411764705882355,
|
|
"grad_norm": 0.5099850777126819,
|
|
"learning_rate": 2.7357841873619804e-06,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1005336195230484,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4683.2,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 5.949019607843137,
|
|
"grad_norm": 0.42328334889515123,
|
|
"learning_rate": 2.6964532695273993e-06,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10245101898908615,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4895.5,
|
|
"valid_targets_min": 3691
|
|
},
|
|
{
|
|
"epoch": 5.956862745098039,
|
|
"grad_norm": 0.47936537216048497,
|
|
"learning_rate": 2.65738667674035e-06,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11452037841081619,
|
|
"step": 3800,
|
|
"valid_targets_mean": 5202.2,
|
|
"valid_targets_min": 4291
|
|
},
|
|
{
|
|
"epoch": 5.964705882352941,
|
|
"grad_norm": 0.45793993615683437,
|
|
"learning_rate": 2.6185850057732377e-06,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12399207055568695,
|
|
"step": 3805,
|
|
"valid_targets_mean": 5232.4,
|
|
"valid_targets_min": 3835
|
|
},
|
|
{
|
|
"epoch": 5.972549019607843,
|
|
"grad_norm": 0.4303247993369076,
|
|
"learning_rate": 2.5800488493515576e-06,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12024565041065216,
|
|
"step": 3810,
|
|
"valid_targets_mean": 5481.5,
|
|
"valid_targets_min": 3927
|
|
},
|
|
{
|
|
"epoch": 5.980392156862745,
|
|
"grad_norm": 0.4339078888124253,
|
|
"learning_rate": 2.5417787961448893e-06,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11610336601734161,
|
|
"step": 3815,
|
|
"valid_targets_mean": 5475.6,
|
|
"valid_targets_min": 4163
|
|
},
|
|
{
|
|
"epoch": 5.988235294117647,
|
|
"grad_norm": 0.4643015265473758,
|
|
"learning_rate": 2.5037754307578553e-06,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12840969860553741,
|
|
"step": 3820,
|
|
"valid_targets_mean": 5129.9,
|
|
"valid_targets_min": 3500
|
|
},
|
|
{
|
|
"epoch": 5.996078431372549,
|
|
"grad_norm": 0.45333397118083585,
|
|
"learning_rate": 2.466039333721246e-06,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14173388481140137,
|
|
"step": 3825,
|
|
"valid_targets_mean": 5541.9,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 6.003137254901961,
|
|
"grad_norm": 0.64995545201576,
|
|
"learning_rate": 2.4285710814830908e-06,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11834508180618286,
|
|
"step": 3830,
|
|
"valid_targets_mean": 5217.9,
|
|
"valid_targets_min": 3748
|
|
},
|
|
{
|
|
"epoch": 6.0109803921568625,
|
|
"grad_norm": 0.40310804602190514,
|
|
"learning_rate": 2.3913712463999026e-06,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12881794571876526,
|
|
"step": 3835,
|
|
"valid_targets_mean": 7408.9,
|
|
"valid_targets_min": 4043
|
|
},
|
|
{
|
|
"epoch": 6.0188235294117645,
|
|
"grad_norm": 0.3976956884972396,
|
|
"learning_rate": 2.354440396727902e-06,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11172664165496826,
|
|
"step": 3840,
|
|
"valid_targets_mean": 5850.1,
|
|
"valid_targets_min": 3770
|
|
},
|
|
{
|
|
"epoch": 6.026666666666666,
|
|
"grad_norm": 0.46290334545230455,
|
|
"learning_rate": 2.3177790966143653e-06,
|
|
"loss": 0.2102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1118907779455185,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4627.0,
|
|
"valid_targets_min": 3205
|
|
},
|
|
{
|
|
"epoch": 6.034509803921568,
|
|
"grad_norm": 0.4417770357609195,
|
|
"learning_rate": 2.28138790608897e-06,
|
|
"loss": 0.2172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0939764678478241,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4603.8,
|
|
"valid_targets_min": 2856
|
|
},
|
|
{
|
|
"epoch": 6.04235294117647,
|
|
"grad_norm": 0.41116088453012645,
|
|
"learning_rate": 2.2452673810552783e-06,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11001232266426086,
|
|
"step": 3855,
|
|
"valid_targets_mean": 5604.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 6.050196078431372,
|
|
"grad_norm": 0.4582789471885693,
|
|
"learning_rate": 2.2094180732822235e-06,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11757993698120117,
|
|
"step": 3860,
|
|
"valid_targets_mean": 5268.5,
|
|
"valid_targets_min": 3291
|
|
},
|
|
{
|
|
"epoch": 6.058039215686274,
|
|
"grad_norm": 0.43527377714699433,
|
|
"learning_rate": 2.1738405303956877e-06,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11812390387058258,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4652.0,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 6.065882352941176,
|
|
"grad_norm": 0.436259827105265,
|
|
"learning_rate": 2.138535295870121e-06,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1061776727437973,
|
|
"step": 3870,
|
|
"valid_targets_mean": 4835.5,
|
|
"valid_targets_min": 3781
|
|
},
|
|
{
|
|
"epoch": 6.073725490196079,
|
|
"grad_norm": 0.46710848541258787,
|
|
"learning_rate": 2.1035029090202717e-06,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1308131217956543,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4839.9,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 6.081568627450981,
|
|
"grad_norm": 0.4346179493431141,
|
|
"learning_rate": 2.0687439049929113e-06,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11312983185052872,
|
|
"step": 3880,
|
|
"valid_targets_mean": 5265.2,
|
|
"valid_targets_min": 2579
|
|
},
|
|
{
|
|
"epoch": 6.089411764705883,
|
|
"grad_norm": 0.4310847748884074,
|
|
"learning_rate": 2.034258814758696e-06,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15229426324367523,
|
|
"step": 3885,
|
|
"valid_targets_mean": 7243.0,
|
|
"valid_targets_min": 3673
|
|
},
|
|
{
|
|
"epoch": 6.097254901960785,
|
|
"grad_norm": 0.4377098786768193,
|
|
"learning_rate": 2.0000481651040182e-06,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11475956439971924,
|
|
"step": 3890,
|
|
"valid_targets_mean": 4506.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 6.105098039215687,
|
|
"grad_norm": 0.4697067576333791,
|
|
"learning_rate": 1.96611247862299e-06,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11185763776302338,
|
|
"step": 3895,
|
|
"valid_targets_mean": 4808.0,
|
|
"valid_targets_min": 3482
|
|
},
|
|
{
|
|
"epoch": 6.1129411764705885,
|
|
"grad_norm": 0.39659631265979406,
|
|
"learning_rate": 1.932452273709453e-06,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10514408349990845,
|
|
"step": 3900,
|
|
"valid_targets_mean": 5980.1,
|
|
"valid_targets_min": 4308
|
|
},
|
|
{
|
|
"epoch": 6.1207843137254905,
|
|
"grad_norm": 0.4800811132838675,
|
|
"learning_rate": 1.8990680645490389e-06,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10940663516521454,
|
|
"step": 3905,
|
|
"valid_targets_mean": 4273.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 6.128627450980392,
|
|
"grad_norm": 0.4353985528320435,
|
|
"learning_rate": 1.8659603611113475e-06,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09796038269996643,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4635.4,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 6.136470588235294,
|
|
"grad_norm": 0.4208733037887433,
|
|
"learning_rate": 1.833129669142124e-06,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11479602009057999,
|
|
"step": 3915,
|
|
"valid_targets_mean": 6398.9,
|
|
"valid_targets_min": 4249
|
|
},
|
|
{
|
|
"epoch": 6.144313725490196,
|
|
"grad_norm": 0.46839862019377804,
|
|
"learning_rate": 1.8005764901555678e-06,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10964938253164291,
|
|
"step": 3920,
|
|
"valid_targets_mean": 5338.1,
|
|
"valid_targets_min": 3817
|
|
},
|
|
{
|
|
"epoch": 6.152156862745098,
|
|
"grad_norm": 0.4645376835084068,
|
|
"learning_rate": 1.7683013214266354e-06,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1521643102169037,
|
|
"step": 3925,
|
|
"valid_targets_mean": 5332.6,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 6.16,
|
|
"grad_norm": 0.40683657902501225,
|
|
"learning_rate": 1.7363046559834718e-06,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11666695028543472,
|
|
"step": 3930,
|
|
"valid_targets_mean": 6784.1,
|
|
"valid_targets_min": 4317
|
|
},
|
|
{
|
|
"epoch": 6.167843137254902,
|
|
"grad_norm": 0.4639347819876015,
|
|
"learning_rate": 1.7045869825998674e-06,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09457366168498993,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3903.6,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 6.175686274509804,
|
|
"grad_norm": 0.4646117573962641,
|
|
"learning_rate": 1.6731487857877971e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15744554996490479,
|
|
"step": 3940,
|
|
"valid_targets_mean": 5625.9,
|
|
"valid_targets_min": 3955
|
|
},
|
|
{
|
|
"epoch": 6.183529411764706,
|
|
"grad_norm": 0.4356971179363293,
|
|
"learning_rate": 1.641990545790002e-06,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10445305705070496,
|
|
"step": 3945,
|
|
"valid_targets_mean": 5696.6,
|
|
"valid_targets_min": 3260
|
|
},
|
|
{
|
|
"epoch": 6.191372549019608,
|
|
"grad_norm": 0.4351794252227166,
|
|
"learning_rate": 1.6111127385726844e-06,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11319652199745178,
|
|
"step": 3950,
|
|
"valid_targets_mean": 5047.4,
|
|
"valid_targets_min": 3681
|
|
},
|
|
{
|
|
"epoch": 6.19921568627451,
|
|
"grad_norm": 0.48584900526974223,
|
|
"learning_rate": 1.5805158358182039e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09738421440124512,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4529.9,
|
|
"valid_targets_min": 3430
|
|
},
|
|
{
|
|
"epoch": 6.207058823529412,
|
|
"grad_norm": 0.442358610321098,
|
|
"learning_rate": 1.550200304917897e-06,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10242228209972382,
|
|
"step": 3960,
|
|
"valid_targets_mean": 4843.8,
|
|
"valid_targets_min": 3220
|
|
},
|
|
{
|
|
"epoch": 6.214901960784314,
|
|
"grad_norm": 0.5001361874122662,
|
|
"learning_rate": 1.5201666089649193e-06,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0966445803642273,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4384.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 6.222745098039216,
|
|
"grad_norm": 0.4832526224656965,
|
|
"learning_rate": 1.4904152067471932e-06,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10578788816928864,
|
|
"step": 3970,
|
|
"valid_targets_mean": 4047.4,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 6.2305882352941175,
|
|
"grad_norm": 0.4561742516644297,
|
|
"learning_rate": 1.4609465527403722e-06,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0976758748292923,
|
|
"step": 3975,
|
|
"valid_targets_mean": 4819.9,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 6.2384313725490195,
|
|
"grad_norm": 0.4957291772635957,
|
|
"learning_rate": 1.431761097100921e-06,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1101255863904953,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3684.1,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 6.246274509803921,
|
|
"grad_norm": 0.465197280451629,
|
|
"learning_rate": 1.4028592856592239e-06,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11609461158514023,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4429.8,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 6.254117647058823,
|
|
"grad_norm": 0.496223651742939,
|
|
"learning_rate": 1.374241559912788e-06,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11883389949798584,
|
|
"step": 3990,
|
|
"valid_targets_mean": 5541.2,
|
|
"valid_targets_min": 4575
|
|
},
|
|
{
|
|
"epoch": 6.261960784313725,
|
|
"grad_norm": 0.45057004857361893,
|
|
"learning_rate": 1.345908357019483e-06,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09654825925827026,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4013.4,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 6.269803921568627,
|
|
"grad_norm": 0.48472218470428075,
|
|
"learning_rate": 1.3178601097908827e-06,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09483954310417175,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4327.1,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 6.277647058823529,
|
|
"grad_norm": 0.4761380329140597,
|
|
"learning_rate": 1.2900972466856377e-06,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10870771110057831,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4861.2,
|
|
"valid_targets_min": 3449
|
|
},
|
|
{
|
|
"epoch": 6.285490196078431,
|
|
"grad_norm": 0.49338258536074125,
|
|
"learning_rate": 1.2626201918029301e-06,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11417485028505325,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4991.2,
|
|
"valid_targets_min": 3913
|
|
},
|
|
{
|
|
"epoch": 6.293333333333333,
|
|
"grad_norm": 0.48241818096169126,
|
|
"learning_rate": 1.235429364876015e-06,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11160287261009216,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4995.0,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 6.301176470588235,
|
|
"grad_norm": 0.4378176483854021,
|
|
"learning_rate": 1.2085251812657762e-06,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10883471369743347,
|
|
"step": 4020,
|
|
"valid_targets_mean": 5228.6,
|
|
"valid_targets_min": 3682
|
|
},
|
|
{
|
|
"epoch": 6.309019607843137,
|
|
"grad_norm": 0.469030673041714,
|
|
"learning_rate": 1.1819080519544123e-06,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11258652806282043,
|
|
"step": 4025,
|
|
"valid_targets_mean": 4730.2,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 6.316862745098039,
|
|
"grad_norm": 0.44244478805711335,
|
|
"learning_rate": 1.155578383539142e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10519449412822723,
|
|
"step": 4030,
|
|
"valid_targets_mean": 5691.1,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 6.324705882352941,
|
|
"grad_norm": 0.5758805053505508,
|
|
"learning_rate": 1.1295365782259982e-06,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13826540112495422,
|
|
"step": 4035,
|
|
"valid_targets_mean": 6115.6,
|
|
"valid_targets_min": 3513
|
|
},
|
|
{
|
|
"epoch": 6.332549019607843,
|
|
"grad_norm": 0.47896063638163283,
|
|
"learning_rate": 1.103783033823671e-06,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1043325737118721,
|
|
"step": 4040,
|
|
"valid_targets_mean": 4979.2,
|
|
"valid_targets_min": 3179
|
|
},
|
|
{
|
|
"epoch": 6.3403921568627455,
|
|
"grad_norm": 0.4334623118587568,
|
|
"learning_rate": 1.0783181437374669e-06,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10528846085071564,
|
|
"step": 4045,
|
|
"valid_targets_mean": 5229.1,
|
|
"valid_targets_min": 3059
|
|
},
|
|
{
|
|
"epoch": 6.348235294117647,
|
|
"grad_norm": 0.4096491972779553,
|
|
"learning_rate": 1.05314229696325e-06,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12475338578224182,
|
|
"step": 4050,
|
|
"valid_targets_mean": 7550.8,
|
|
"valid_targets_min": 4536
|
|
},
|
|
{
|
|
"epoch": 6.356078431372549,
|
|
"grad_norm": 0.4573346545451403,
|
|
"learning_rate": 1.0282558780815411e-06,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10188288986682892,
|
|
"step": 4055,
|
|
"valid_targets_mean": 4367.8,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 6.363921568627451,
|
|
"grad_norm": 0.46912558391597736,
|
|
"learning_rate": 1.0036592672516088e-06,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12381172925233841,
|
|
"step": 4060,
|
|
"valid_targets_mean": 5394.8,
|
|
"valid_targets_min": 3766
|
|
},
|
|
{
|
|
"epoch": 6.371764705882353,
|
|
"grad_norm": 0.4813159773927962,
|
|
"learning_rate": 9.79352840205703e-07,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10303199291229248,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3941.1,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 6.379607843137255,
|
|
"grad_norm": 0.41667861310639953,
|
|
"learning_rate": 9.55336968243268e-07,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11974573880434036,
|
|
"step": 4070,
|
|
"valid_targets_mean": 5733.5,
|
|
"valid_targets_min": 3492
|
|
},
|
|
{
|
|
"epoch": 6.387450980392157,
|
|
"grad_norm": 0.49151600430779024,
|
|
"learning_rate": 9.316120182253141e-07,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10450279712677002,
|
|
"step": 4075,
|
|
"valid_targets_mean": 4488.6,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 6.395294117647059,
|
|
"grad_norm": 0.4415542796332715,
|
|
"learning_rate": 9.081783525687826e-07,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1253572702407837,
|
|
"step": 4080,
|
|
"valid_targets_mean": 6553.2,
|
|
"valid_targets_min": 4229
|
|
},
|
|
{
|
|
"epoch": 6.403137254901961,
|
|
"grad_norm": 0.44382860786184114,
|
|
"learning_rate": 8.850363292410269e-07,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11451376229524612,
|
|
"step": 4085,
|
|
"valid_targets_mean": 5709.0,
|
|
"valid_targets_min": 3848
|
|
},
|
|
{
|
|
"epoch": 6.410980392156863,
|
|
"grad_norm": 0.4428905242189662,
|
|
"learning_rate": 8.621863017543353e-07,
|
|
"loss": 0.2134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10134243965148926,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5144.8,
|
|
"valid_targets_min": 4120
|
|
},
|
|
{
|
|
"epoch": 6.418823529411765,
|
|
"grad_norm": 0.5018401575755567,
|
|
"learning_rate": 8.396286191605352e-07,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11206495761871338,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4646.0,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 6.426666666666667,
|
|
"grad_norm": 0.4517958808892632,
|
|
"learning_rate": 8.173636260456575e-07,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09486055374145508,
|
|
"step": 4100,
|
|
"valid_targets_mean": 4185.4,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 6.434509803921569,
|
|
"grad_norm": 0.4545491068989947,
|
|
"learning_rate": 7.953916625246804e-07,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09442710876464844,
|
|
"step": 4105,
|
|
"valid_targets_mean": 4015.5,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 6.442352941176471,
|
|
"grad_norm": 0.4598041822034105,
|
|
"learning_rate": 7.737130642363211e-07,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09323002398014069,
|
|
"step": 4110,
|
|
"valid_targets_mean": 3760.2,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 6.4501960784313725,
|
|
"grad_norm": 0.4582099692292797,
|
|
"learning_rate": 7.52328162337923e-07,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09848172962665558,
|
|
"step": 4115,
|
|
"valid_targets_mean": 4312.4,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 6.4580392156862745,
|
|
"grad_norm": 0.44772459638692896,
|
|
"learning_rate": 7.3123728350039e-07,
|
|
"loss": 0.213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1100703775882721,
|
|
"step": 4120,
|
|
"valid_targets_mean": 5088.5,
|
|
"valid_targets_min": 4233
|
|
},
|
|
{
|
|
"epoch": 6.465882352941176,
|
|
"grad_norm": 0.46327139443607485,
|
|
"learning_rate": 7.104407499031896e-07,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18428373336791992,
|
|
"step": 4125,
|
|
"valid_targets_mean": 6381.0,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 6.473725490196078,
|
|
"grad_norm": 0.43202629332535153,
|
|
"learning_rate": 6.899388792294482e-07,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10554929077625275,
|
|
"step": 4130,
|
|
"valid_targets_mean": 4890.8,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 6.48156862745098,
|
|
"grad_norm": 0.4787639033963915,
|
|
"learning_rate": 6.697319846610772e-07,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12935051321983337,
|
|
"step": 4135,
|
|
"valid_targets_mean": 5128.5,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 6.489411764705882,
|
|
"grad_norm": 0.4374673739573011,
|
|
"learning_rate": 6.498203748740106e-07,
|
|
"loss": 0.2172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11974336206912994,
|
|
"step": 4140,
|
|
"valid_targets_mean": 5625.0,
|
|
"valid_targets_min": 3430
|
|
},
|
|
{
|
|
"epoch": 6.497254901960784,
|
|
"grad_norm": 0.4718170829134424,
|
|
"learning_rate": 6.302043540334701e-07,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10960593819618225,
|
|
"step": 4145,
|
|
"valid_targets_mean": 4964.1,
|
|
"valid_targets_min": 3384
|
|
},
|
|
{
|
|
"epoch": 6.505098039215686,
|
|
"grad_norm": 0.4929681247793601,
|
|
"learning_rate": 6.108842217893384e-07,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14213602244853973,
|
|
"step": 4150,
|
|
"valid_targets_mean": 5879.5,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 6.512941176470588,
|
|
"grad_norm": 0.48504256263392026,
|
|
"learning_rate": 5.918602732715583e-07,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09823766350746155,
|
|
"step": 4155,
|
|
"valid_targets_mean": 4444.2,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 6.52078431372549,
|
|
"grad_norm": 0.4767032025354917,
|
|
"learning_rate": 5.731327990856517e-07,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11948859691619873,
|
|
"step": 4160,
|
|
"valid_targets_mean": 5461.9,
|
|
"valid_targets_min": 3945
|
|
},
|
|
{
|
|
"epoch": 6.528627450980392,
|
|
"grad_norm": 0.4247913504681906,
|
|
"learning_rate": 5.547020853082497e-07,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11250126361846924,
|
|
"step": 4165,
|
|
"valid_targets_mean": 5245.2,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 6.536470588235294,
|
|
"grad_norm": 0.47341087020563916,
|
|
"learning_rate": 5.365684134827542e-07,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1286301612854004,
|
|
"step": 4170,
|
|
"valid_targets_mean": 5085.1,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 6.544313725490196,
|
|
"grad_norm": 0.5452947122533847,
|
|
"learning_rate": 5.187320606150103e-07,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18514202535152435,
|
|
"step": 4175,
|
|
"valid_targets_mean": 6446.4,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 6.552156862745098,
|
|
"grad_norm": 0.42641629979857115,
|
|
"learning_rate": 5.011932991690959e-07,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12646891176700592,
|
|
"step": 4180,
|
|
"valid_targets_mean": 5828.9,
|
|
"valid_targets_min": 4165
|
|
},
|
|
{
|
|
"epoch": 6.5600000000000005,
|
|
"grad_norm": 0.46574222475217153,
|
|
"learning_rate": 4.839523970631387e-07,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12034496665000916,
|
|
"step": 4185,
|
|
"valid_targets_mean": 5428.6,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 6.567843137254902,
|
|
"grad_norm": 0.48648714804401355,
|
|
"learning_rate": 4.6700961766524833e-07,
|
|
"loss": 0.2168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10155089199542999,
|
|
"step": 4190,
|
|
"valid_targets_mean": 4122.8,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 6.575686274509804,
|
|
"grad_norm": 0.4655250441869616,
|
|
"learning_rate": 4.503652197894681e-07,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09567312896251678,
|
|
"step": 4195,
|
|
"valid_targets_mean": 4185.8,
|
|
"valid_targets_min": 3418
|
|
},
|
|
{
|
|
"epoch": 6.583529411764706,
|
|
"grad_norm": 0.464660545382178,
|
|
"learning_rate": 4.3401945769183884e-07,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.094794362783432,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3753.0,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 6.591372549019608,
|
|
"grad_norm": 0.40188876002080204,
|
|
"learning_rate": 4.1797258106650804e-07,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10433869063854218,
|
|
"step": 4205,
|
|
"valid_targets_mean": 5433.4,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 6.59921568627451,
|
|
"grad_norm": 0.4714750606191182,
|
|
"learning_rate": 4.0222483504191556e-07,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08846727758646011,
|
|
"step": 4210,
|
|
"valid_targets_mean": 4271.1,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 6.607058823529412,
|
|
"grad_norm": 0.5078200189738814,
|
|
"learning_rate": 3.8677646017704963e-07,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10169130563735962,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3950.5,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 6.614901960784314,
|
|
"grad_norm": 0.4232569435845233,
|
|
"learning_rate": 3.716276924577744e-07,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08592445403337479,
|
|
"step": 4220,
|
|
"valid_targets_mean": 5230.9,
|
|
"valid_targets_min": 3524
|
|
},
|
|
{
|
|
"epoch": 6.622745098039216,
|
|
"grad_norm": 0.43181541312088884,
|
|
"learning_rate": 3.5677876329322624e-07,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1324676275253296,
|
|
"step": 4225,
|
|
"valid_targets_mean": 6772.1,
|
|
"valid_targets_min": 3196
|
|
},
|
|
{
|
|
"epoch": 6.630588235294118,
|
|
"grad_norm": 0.49254515548212713,
|
|
"learning_rate": 3.422298995122675e-07,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13985073566436768,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4969.0,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 6.63843137254902,
|
|
"grad_norm": 0.4264979019492451,
|
|
"learning_rate": 3.279813233600404e-07,
|
|
"loss": 0.2078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11380667984485626,
|
|
"step": 4235,
|
|
"valid_targets_mean": 5638.6,
|
|
"valid_targets_min": 3436
|
|
},
|
|
{
|
|
"epoch": 6.646274509803922,
|
|
"grad_norm": 0.4434834036479819,
|
|
"learning_rate": 3.140332524945522e-07,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10968320071697235,
|
|
"step": 4240,
|
|
"valid_targets_mean": 4396.2,
|
|
"valid_targets_min": 3472
|
|
},
|
|
{
|
|
"epoch": 6.654117647058824,
|
|
"grad_norm": 0.4755423972418163,
|
|
"learning_rate": 3.0038589998336423e-07,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1261489987373352,
|
|
"step": 4245,
|
|
"valid_targets_mean": 5429.0,
|
|
"valid_targets_min": 3906
|
|
},
|
|
{
|
|
"epoch": 6.661960784313726,
|
|
"grad_norm": 0.4329871366469772,
|
|
"learning_rate": 2.8703947430033465e-07,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09297747164964676,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4497.1,
|
|
"valid_targets_min": 3315
|
|
},
|
|
{
|
|
"epoch": 6.669803921568628,
|
|
"grad_norm": 0.4993264893966694,
|
|
"learning_rate": 2.7399417932242325e-07,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11046084016561508,
|
|
"step": 4255,
|
|
"valid_targets_mean": 4216.9,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 6.6776470588235295,
|
|
"grad_norm": 0.4355414330786498,
|
|
"learning_rate": 2.6125021432659824e-07,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10060550272464752,
|
|
"step": 4260,
|
|
"valid_targets_mean": 5505.8,
|
|
"valid_targets_min": 2862
|
|
},
|
|
{
|
|
"epoch": 6.685490196078431,
|
|
"grad_norm": 0.4521689898500482,
|
|
"learning_rate": 2.4880777398677e-07,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378835141658783,
|
|
"step": 4265,
|
|
"valid_targets_mean": 5625.0,
|
|
"valid_targets_min": 4029
|
|
},
|
|
{
|
|
"epoch": 6.693333333333333,
|
|
"grad_norm": 0.470768846829166,
|
|
"learning_rate": 2.3666704837082888e-07,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12488949298858643,
|
|
"step": 4270,
|
|
"valid_targets_mean": 4775.0,
|
|
"valid_targets_min": 3986
|
|
},
|
|
{
|
|
"epoch": 6.701176470588235,
|
|
"grad_norm": 0.4710412868048963,
|
|
"learning_rate": 2.2482822293774297e-07,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1238384023308754,
|
|
"step": 4275,
|
|
"valid_targets_mean": 6255.5,
|
|
"valid_targets_min": 4588
|
|
},
|
|
{
|
|
"epoch": 6.709019607843137,
|
|
"grad_norm": 0.4438839076302866,
|
|
"learning_rate": 2.13291478534714e-07,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14333386719226837,
|
|
"step": 4280,
|
|
"valid_targets_mean": 6303.6,
|
|
"valid_targets_min": 3679
|
|
},
|
|
{
|
|
"epoch": 6.716862745098039,
|
|
"grad_norm": 0.5060736267850259,
|
|
"learning_rate": 2.020569913944348e-07,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11077409237623215,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3751.0,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 6.724705882352941,
|
|
"grad_norm": 0.45844627042874836,
|
|
"learning_rate": 1.9112493313236947e-07,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13419592380523682,
|
|
"step": 4290,
|
|
"valid_targets_mean": 6193.1,
|
|
"valid_targets_min": 3947
|
|
},
|
|
{
|
|
"epoch": 6.732549019607843,
|
|
"grad_norm": 0.4624929402927315,
|
|
"learning_rate": 1.8049547074416195e-07,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12782934308052063,
|
|
"step": 4295,
|
|
"valid_targets_mean": 4674.4,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 6.740392156862745,
|
|
"grad_norm": 0.44973617852573694,
|
|
"learning_rate": 1.701687666030627e-07,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1164279356598854,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4788.5,
|
|
"valid_targets_min": 3863
|
|
},
|
|
{
|
|
"epoch": 6.748235294117647,
|
|
"grad_norm": 0.4877134554107391,
|
|
"learning_rate": 1.6014497845745715e-07,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12220542132854462,
|
|
"step": 4305,
|
|
"valid_targets_mean": 4376.2,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 6.756078431372549,
|
|
"grad_norm": 0.4770935848522137,
|
|
"learning_rate": 1.504242594284544e-07,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1367311328649521,
|
|
"step": 4310,
|
|
"valid_targets_mean": 5298.9,
|
|
"valid_targets_min": 4224
|
|
},
|
|
{
|
|
"epoch": 6.763921568627451,
|
|
"grad_norm": 0.5373921676125825,
|
|
"learning_rate": 1.4100675800754914e-07,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12212381511926651,
|
|
"step": 4315,
|
|
"valid_targets_mean": 4523.1,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 6.771764705882353,
|
|
"grad_norm": 0.4927951692587741,
|
|
"learning_rate": 1.318926180543545e-07,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10165538638830185,
|
|
"step": 4320,
|
|
"valid_targets_mean": 4021.1,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 6.779607843137255,
|
|
"grad_norm": 0.4813767080894923,
|
|
"learning_rate": 1.2308197879439932e-07,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1391141563653946,
|
|
"step": 4325,
|
|
"valid_targets_mean": 5597.2,
|
|
"valid_targets_min": 4134
|
|
},
|
|
{
|
|
"epoch": 6.7874509803921566,
|
|
"grad_norm": 0.43435379671631896,
|
|
"learning_rate": 1.1457497481700775e-07,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1326904147863388,
|
|
"step": 4330,
|
|
"valid_targets_mean": 5858.1,
|
|
"valid_targets_min": 3410
|
|
},
|
|
{
|
|
"epoch": 6.7952941176470585,
|
|
"grad_norm": 0.509799192903302,
|
|
"learning_rate": 1.0637173607323859e-07,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08625923097133636,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3410.9,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 6.80313725490196,
|
|
"grad_norm": 0.4340451395208277,
|
|
"learning_rate": 9.847238787390246e-08,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10468437522649765,
|
|
"step": 4340,
|
|
"valid_targets_mean": 5285.2,
|
|
"valid_targets_min": 3760
|
|
},
|
|
{
|
|
"epoch": 6.810980392156862,
|
|
"grad_norm": 0.4662601092806204,
|
|
"learning_rate": 9.087705088764997e-08,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09582984447479248,
|
|
"step": 4345,
|
|
"valid_targets_mean": 4779.4,
|
|
"valid_targets_min": 3127
|
|
},
|
|
{
|
|
"epoch": 6.818823529411764,
|
|
"grad_norm": 0.46696313433638315,
|
|
"learning_rate": 8.358584113911994e-08,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10853664577007294,
|
|
"step": 4350,
|
|
"valid_targets_mean": 4434.6,
|
|
"valid_targets_min": 3719
|
|
},
|
|
{
|
|
"epoch": 6.826666666666666,
|
|
"grad_norm": 0.43846142448189535,
|
|
"learning_rate": 7.659887000717403e-08,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10556395351886749,
|
|
"step": 4355,
|
|
"valid_targets_mean": 4671.9,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 6.834509803921568,
|
|
"grad_norm": 0.4319322520744759,
|
|
"learning_rate": 6.991624422320042e-08,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10148314386606216,
|
|
"step": 4360,
|
|
"valid_targets_mean": 4710.1,
|
|
"valid_targets_min": 3220
|
|
},
|
|
{
|
|
"epoch": 6.842352941176471,
|
|
"grad_norm": 0.4528238104041261,
|
|
"learning_rate": 6.353806586946842e-08,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10324162989854813,
|
|
"step": 4365,
|
|
"valid_targets_mean": 4713.1,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 6.850196078431373,
|
|
"grad_norm": 0.44883130023888307,
|
|
"learning_rate": 5.7464432377580814e-08,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1069837287068367,
|
|
"step": 4370,
|
|
"valid_targets_mean": 4678.0,
|
|
"valid_targets_min": 2848
|
|
},
|
|
{
|
|
"epoch": 6.858039215686275,
|
|
"grad_norm": 0.502502931033106,
|
|
"learning_rate": 5.169543652698617e-08,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10652319341897964,
|
|
"step": 4375,
|
|
"valid_targets_mean": 4192.1,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 6.865882352941177,
|
|
"grad_norm": 0.4817734391686949,
|
|
"learning_rate": 4.623116644354886e-08,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12233863025903702,
|
|
"step": 4380,
|
|
"valid_targets_mean": 5835.8,
|
|
"valid_targets_min": 3830
|
|
},
|
|
{
|
|
"epoch": 6.873725490196079,
|
|
"grad_norm": 0.444884821554049,
|
|
"learning_rate": 4.107170559822127e-08,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09412867575883865,
|
|
"step": 4385,
|
|
"valid_targets_mean": 4147.2,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 6.881568627450981,
|
|
"grad_norm": 0.45787283402160717,
|
|
"learning_rate": 3.621713280575367e-08,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10872447490692139,
|
|
"step": 4390,
|
|
"valid_targets_mean": 5069.1,
|
|
"valid_targets_min": 3293
|
|
},
|
|
{
|
|
"epoch": 6.889411764705883,
|
|
"grad_norm": 0.4342624586083637,
|
|
"learning_rate": 3.166752222349523e-08,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09016773104667664,
|
|
"step": 4395,
|
|
"valid_targets_mean": 4896.4,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 6.8972549019607845,
|
|
"grad_norm": 0.4981911991886726,
|
|
"learning_rate": 2.7422943350263743e-08,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11719105392694473,
|
|
"step": 4400,
|
|
"valid_targets_mean": 4600.9,
|
|
"valid_targets_min": 3802
|
|
},
|
|
{
|
|
"epoch": 6.905098039215686,
|
|
"grad_norm": 0.47616977358182333,
|
|
"learning_rate": 2.3483461025282094e-08,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08278848975896835,
|
|
"step": 4405,
|
|
"valid_targets_mean": 3947.2,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 6.912941176470588,
|
|
"grad_norm": 0.47507102563863857,
|
|
"learning_rate": 1.9849135427187916e-08,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16222912073135376,
|
|
"step": 4410,
|
|
"valid_targets_mean": 5750.8,
|
|
"valid_targets_min": 3535
|
|
},
|
|
{
|
|
"epoch": 6.92078431372549,
|
|
"grad_norm": 0.43316713506324794,
|
|
"learning_rate": 1.6520022073112098e-08,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0887688621878624,
|
|
"step": 4415,
|
|
"valid_targets_mean": 4106.0,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 6.928627450980392,
|
|
"grad_norm": 0.46871403580777404,
|
|
"learning_rate": 1.3496171817832804e-08,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262306571006775,
|
|
"step": 4420,
|
|
"valid_targets_mean": 6219.2,
|
|
"valid_targets_min": 4586
|
|
},
|
|
{
|
|
"epoch": 6.936470588235294,
|
|
"grad_norm": 0.44141115841411704,
|
|
"learning_rate": 1.0777630853000543e-08,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09531784057617188,
|
|
"step": 4425,
|
|
"valid_targets_mean": 5062.0,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 6.944313725490196,
|
|
"grad_norm": 0.48753846856369104,
|
|
"learning_rate": 8.364440706429833e-09,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11849364638328552,
|
|
"step": 4430,
|
|
"valid_targets_mean": 4558.0,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 6.952156862745098,
|
|
"grad_norm": 0.49429667262142357,
|
|
"learning_rate": 6.2566382414641635e-09,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12699516117572784,
|
|
"step": 4435,
|
|
"valid_targets_mean": 5793.4,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 6.96,
|
|
"grad_norm": 0.4789452496037441,
|
|
"learning_rate": 4.454255656414219e-09,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11449083685874939,
|
|
"step": 4440,
|
|
"valid_targets_mean": 5632.1,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 6.967843137254902,
|
|
"grad_norm": 0.4578636924842011,
|
|
"learning_rate": 2.957320484067161e-09,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10642892122268677,
|
|
"step": 4445,
|
|
"valid_targets_mean": 4728.5,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 6.975686274509804,
|
|
"grad_norm": 0.4583003210525026,
|
|
"learning_rate": 1.7658555912647423e-09,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13077768683433533,
|
|
"step": 4450,
|
|
"valid_targets_mean": 6211.1,
|
|
"valid_targets_min": 4483
|
|
},
|
|
{
|
|
"epoch": 6.983529411764706,
|
|
"grad_norm": 0.45176736594513867,
|
|
"learning_rate": 8.79879178554699e-10,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11069822311401367,
|
|
"step": 4455,
|
|
"valid_targets_mean": 4677.2,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 6.991372549019608,
|
|
"grad_norm": 0.4435188034232649,
|
|
"learning_rate": 2.9940477991097227e-10,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09977850317955017,
|
|
"step": 4460,
|
|
"valid_targets_mean": 4503.6,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 6.99921568627451,
|
|
"grad_norm": 0.45734595449193466,
|
|
"learning_rate": 2.4441262527208355e-11,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10217998921871185,
|
|
"step": 4465,
|
|
"valid_targets_mean": 4522.4,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"step": 4466,
|
|
"total_flos": 2.8947179058078024e+18,
|
|
"train_loss": 0.0,
|
|
"train_runtime": 3.2247,
|
|
"train_samples_per_second": 22141.844,
|
|
"train_steps_per_second": 1384.951
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4466,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.8947179058078024e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|