9988 lines
277 KiB
JSON
9988 lines
277 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4522,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.007739938080495356,
|
|
"grad_norm": 27.498263352487882,
|
|
"learning_rate": 7.048458149779737e-07,
|
|
"loss": 0.8916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8890290856361389,
|
|
"step": 5,
|
|
"valid_targets_mean": 3787.4,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 0.015479876160990712,
|
|
"grad_norm": 24.63501550618504,
|
|
"learning_rate": 1.5859030837004408e-06,
|
|
"loss": 0.8692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8395868539810181,
|
|
"step": 10,
|
|
"valid_targets_mean": 3744.1,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 0.02321981424148607,
|
|
"grad_norm": 18.363293273448306,
|
|
"learning_rate": 2.466960352422908e-06,
|
|
"loss": 0.8052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7638745307922363,
|
|
"step": 15,
|
|
"valid_targets_mean": 3806.1,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 0.030959752321981424,
|
|
"grad_norm": 8.207394915342753,
|
|
"learning_rate": 3.348017621145375e-06,
|
|
"loss": 0.6851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6485142707824707,
|
|
"step": 20,
|
|
"valid_targets_mean": 3883.8,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 0.03869969040247678,
|
|
"grad_norm": 3.462959759774062,
|
|
"learning_rate": 4.229074889867842e-06,
|
|
"loss": 0.5964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5730105638504028,
|
|
"step": 25,
|
|
"valid_targets_mean": 3872.5,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 0.04643962848297214,
|
|
"grad_norm": 1.7229720500525338,
|
|
"learning_rate": 5.110132158590309e-06,
|
|
"loss": 0.5486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5393049716949463,
|
|
"step": 30,
|
|
"valid_targets_mean": 3780.5,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 0.05417956656346749,
|
|
"grad_norm": 1.4486640167954894,
|
|
"learning_rate": 5.991189427312776e-06,
|
|
"loss": 0.5244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5158563852310181,
|
|
"step": 35,
|
|
"valid_targets_mean": 3663.5,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 0.06191950464396285,
|
|
"grad_norm": 1.1214202272141562,
|
|
"learning_rate": 6.872246696035243e-06,
|
|
"loss": 0.4985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4886733293533325,
|
|
"step": 40,
|
|
"valid_targets_mean": 3721.1,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 0.0696594427244582,
|
|
"grad_norm": 0.7788891552011866,
|
|
"learning_rate": 7.75330396475771e-06,
|
|
"loss": 0.4748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46776801347732544,
|
|
"step": 45,
|
|
"valid_targets_mean": 3723.2,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 0.07739938080495357,
|
|
"grad_norm": 0.5823039700286535,
|
|
"learning_rate": 8.634361233480178e-06,
|
|
"loss": 0.4547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44776591658592224,
|
|
"step": 50,
|
|
"valid_targets_mean": 3877.2,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 0.08513931888544891,
|
|
"grad_norm": 0.5018727930407911,
|
|
"learning_rate": 9.515418502202644e-06,
|
|
"loss": 0.4396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43902742862701416,
|
|
"step": 55,
|
|
"valid_targets_mean": 3845.5,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 0.09287925696594428,
|
|
"grad_norm": 0.816826419129901,
|
|
"learning_rate": 1.0396475770925112e-05,
|
|
"loss": 0.5069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5049954652786255,
|
|
"step": 60,
|
|
"valid_targets_mean": 4920.5,
|
|
"valid_targets_min": 2219
|
|
},
|
|
{
|
|
"epoch": 0.10061919504643962,
|
|
"grad_norm": 2.3765631896050023,
|
|
"learning_rate": 1.127753303964758e-05,
|
|
"loss": 0.5976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7650168538093567,
|
|
"step": 65,
|
|
"valid_targets_mean": 965.5,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 0.10835913312693499,
|
|
"grad_norm": 0.5949609231514115,
|
|
"learning_rate": 1.2158590308370044e-05,
|
|
"loss": 0.5382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4562525153160095,
|
|
"step": 70,
|
|
"valid_targets_mean": 4698.2,
|
|
"valid_targets_min": 3199
|
|
},
|
|
{
|
|
"epoch": 0.11609907120743033,
|
|
"grad_norm": 1.4403148435313564,
|
|
"learning_rate": 1.3039647577092512e-05,
|
|
"loss": 0.5901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7671957015991211,
|
|
"step": 75,
|
|
"valid_targets_mean": 877.1,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 0.1238390092879257,
|
|
"grad_norm": 0.39456016690831686,
|
|
"learning_rate": 1.3920704845814978e-05,
|
|
"loss": 0.4254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4167764484882355,
|
|
"step": 80,
|
|
"valid_targets_mean": 4635.7,
|
|
"valid_targets_min": 3134
|
|
},
|
|
{
|
|
"epoch": 0.13157894736842105,
|
|
"grad_norm": 1.1333578016658112,
|
|
"learning_rate": 1.4801762114537446e-05,
|
|
"loss": 0.5439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7369047403335571,
|
|
"step": 85,
|
|
"valid_targets_mean": 855.6,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 0.1393188854489164,
|
|
"grad_norm": 0.3384357445702928,
|
|
"learning_rate": 1.5682819383259912e-05,
|
|
"loss": 0.4806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40316689014434814,
|
|
"step": 90,
|
|
"valid_targets_mean": 4777.3,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 0.14705882352941177,
|
|
"grad_norm": 0.9424988329223944,
|
|
"learning_rate": 1.656387665198238e-05,
|
|
"loss": 0.4601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7080361247062683,
|
|
"step": 95,
|
|
"valid_targets_mean": 898.8,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 0.15479876160990713,
|
|
"grad_norm": 0.3211027573357073,
|
|
"learning_rate": 1.7444933920704847e-05,
|
|
"loss": 0.4841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3624253273010254,
|
|
"step": 100,
|
|
"valid_targets_mean": 4618.4,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 0.16253869969040247,
|
|
"grad_norm": 0.29675307532349626,
|
|
"learning_rate": 1.8325991189427313e-05,
|
|
"loss": 0.3772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3852824568748474,
|
|
"step": 105,
|
|
"valid_targets_mean": 3491.6,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 0.17027863777089783,
|
|
"grad_norm": 0.3078052269922965,
|
|
"learning_rate": 1.9207048458149783e-05,
|
|
"loss": 0.5094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3487318158149719,
|
|
"step": 110,
|
|
"valid_targets_mean": 4612.6,
|
|
"valid_targets_min": 3000
|
|
},
|
|
{
|
|
"epoch": 0.1780185758513932,
|
|
"grad_norm": 0.31060804119055135,
|
|
"learning_rate": 2.008810572687225e-05,
|
|
"loss": 0.3591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3591061532497406,
|
|
"step": 115,
|
|
"valid_targets_mean": 3892.6,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 0.18575851393188855,
|
|
"grad_norm": 0.6981090658708504,
|
|
"learning_rate": 2.0969162995594715e-05,
|
|
"loss": 0.5453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3517632782459259,
|
|
"step": 120,
|
|
"valid_targets_mean": 4844.6,
|
|
"valid_targets_min": 2731
|
|
},
|
|
{
|
|
"epoch": 0.19349845201238391,
|
|
"grad_norm": 1.7558420368807455,
|
|
"learning_rate": 2.1850220264317184e-05,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6185149550437927,
|
|
"step": 125,
|
|
"valid_targets_mean": 984.1,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.20123839009287925,
|
|
"grad_norm": 0.4731696268398377,
|
|
"learning_rate": 2.2731277533039647e-05,
|
|
"loss": 0.4853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3489753305912018,
|
|
"step": 130,
|
|
"valid_targets_mean": 4991.7,
|
|
"valid_targets_min": 3133
|
|
},
|
|
{
|
|
"epoch": 0.2089783281733746,
|
|
"grad_norm": 0.35901278161595424,
|
|
"learning_rate": 2.3612334801762117e-05,
|
|
"loss": 0.3523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37420839071273804,
|
|
"step": 135,
|
|
"valid_targets_mean": 3466.8,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 0.21671826625386997,
|
|
"grad_norm": 0.6085312932445831,
|
|
"learning_rate": 2.4493392070484583e-05,
|
|
"loss": 0.5584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42087826132774353,
|
|
"step": 140,
|
|
"valid_targets_mean": 5756.4,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 0.22445820433436534,
|
|
"grad_norm": 0.3462372372664137,
|
|
"learning_rate": 2.5374449339207052e-05,
|
|
"loss": 0.4119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3945181369781494,
|
|
"step": 145,
|
|
"valid_targets_mean": 5488.7,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 0.23219814241486067,
|
|
"grad_norm": 0.30251769156649366,
|
|
"learning_rate": 2.6255506607929518e-05,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4069705009460449,
|
|
"step": 150,
|
|
"valid_targets_mean": 5880.8,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 0.23993808049535603,
|
|
"grad_norm": 0.2695290742163978,
|
|
"learning_rate": 2.7136563876651988e-05,
|
|
"loss": 0.3982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3916853666305542,
|
|
"step": 155,
|
|
"valid_targets_mean": 5631.9,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 0.2476780185758514,
|
|
"grad_norm": 0.2500660315057484,
|
|
"learning_rate": 2.801762114537445e-05,
|
|
"loss": 0.4038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3797670900821686,
|
|
"step": 160,
|
|
"valid_targets_mean": 5617.9,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 0.25541795665634676,
|
|
"grad_norm": 0.2561956670065491,
|
|
"learning_rate": 2.8898678414096916e-05,
|
|
"loss": 0.3905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38050007820129395,
|
|
"step": 165,
|
|
"valid_targets_mean": 6436.5,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 0.2631578947368421,
|
|
"grad_norm": 0.23714082913321008,
|
|
"learning_rate": 2.9779735682819386e-05,
|
|
"loss": 0.3888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40142783522605896,
|
|
"step": 170,
|
|
"valid_targets_mean": 5309.5,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 0.2708978328173375,
|
|
"grad_norm": 0.2575725809272298,
|
|
"learning_rate": 3.066079295154185e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3651473820209503,
|
|
"step": 175,
|
|
"valid_targets_mean": 5691.1,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 0.2786377708978328,
|
|
"grad_norm": 0.24343611272058363,
|
|
"learning_rate": 3.154185022026432e-05,
|
|
"loss": 0.3733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3711909055709839,
|
|
"step": 180,
|
|
"valid_targets_mean": 5925.6,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 0.28637770897832815,
|
|
"grad_norm": 0.22366236728746522,
|
|
"learning_rate": 3.2422907488986784e-05,
|
|
"loss": 0.3709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3628928065299988,
|
|
"step": 185,
|
|
"valid_targets_mean": 5658.9,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 0.29411764705882354,
|
|
"grad_norm": 0.22746268852082738,
|
|
"learning_rate": 3.330396475770926e-05,
|
|
"loss": 0.3662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38804638385772705,
|
|
"step": 190,
|
|
"valid_targets_mean": 5660.4,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 0.3018575851393189,
|
|
"grad_norm": 0.20579309800356566,
|
|
"learning_rate": 3.418502202643172e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33736681938171387,
|
|
"step": 195,
|
|
"valid_targets_mean": 6256.5,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 0.30959752321981426,
|
|
"grad_norm": 0.25855609417059044,
|
|
"learning_rate": 3.506607929515419e-05,
|
|
"loss": 0.361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3671156167984009,
|
|
"step": 200,
|
|
"valid_targets_mean": 5626.4,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 0.3173374613003096,
|
|
"grad_norm": 0.23863617425269906,
|
|
"learning_rate": 3.5947136563876655e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35353490710258484,
|
|
"step": 205,
|
|
"valid_targets_mean": 5776.4,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 0.32507739938080493,
|
|
"grad_norm": 0.42206249526466866,
|
|
"learning_rate": 3.682819383259912e-05,
|
|
"loss": 0.3529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3515063524246216,
|
|
"step": 210,
|
|
"valid_targets_mean": 5494.7,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 0.3328173374613003,
|
|
"grad_norm": 0.22375852517599262,
|
|
"learning_rate": 3.770925110132159e-05,
|
|
"loss": 0.3505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3503708243370056,
|
|
"step": 215,
|
|
"valid_targets_mean": 6014.0,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 0.34055727554179566,
|
|
"grad_norm": 0.2180789169840547,
|
|
"learning_rate": 3.8590308370044053e-05,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36841925978660583,
|
|
"step": 220,
|
|
"valid_targets_mean": 6489.6,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 0.34829721362229105,
|
|
"grad_norm": 0.4971742127118358,
|
|
"learning_rate": 3.9471365638766526e-05,
|
|
"loss": 0.4934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39835458993911743,
|
|
"step": 225,
|
|
"valid_targets_mean": 3702.6,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 0.3560371517027864,
|
|
"grad_norm": 0.567793988393539,
|
|
"learning_rate": 3.999997859903667e-05,
|
|
"loss": 0.3476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34381765127182007,
|
|
"step": 230,
|
|
"valid_targets_mean": 5997.3,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 0.3637770897832817,
|
|
"grad_norm": 0.42449326322685316,
|
|
"learning_rate": 3.999973783872516e-05,
|
|
"loss": 0.4594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3804088830947876,
|
|
"step": 235,
|
|
"valid_targets_mean": 4160.1,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 0.3715170278637771,
|
|
"grad_norm": 0.2885520210439671,
|
|
"learning_rate": 3.9999229570129e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3382093906402588,
|
|
"step": 240,
|
|
"valid_targets_mean": 6533.9,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 0.37925696594427244,
|
|
"grad_norm": 0.24452656032187137,
|
|
"learning_rate": 3.99984538000466e-05,
|
|
"loss": 0.4483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3420991897583008,
|
|
"step": 245,
|
|
"valid_targets_mean": 6688.6,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 0.38699690402476783,
|
|
"grad_norm": 0.2408422566418978,
|
|
"learning_rate": 3.999741053885433e-05,
|
|
"loss": 0.3481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36307471990585327,
|
|
"step": 250,
|
|
"valid_targets_mean": 6724.0,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 0.39473684210526316,
|
|
"grad_norm": 0.357016959814869,
|
|
"learning_rate": 3.999609980050644e-05,
|
|
"loss": 0.4941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34709322452545166,
|
|
"step": 255,
|
|
"valid_targets_mean": 6117.0,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 0.4024767801857585,
|
|
"grad_norm": 0.2641747805669383,
|
|
"learning_rate": 3.9994521602534816e-05,
|
|
"loss": 0.337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32915085554122925,
|
|
"step": 260,
|
|
"valid_targets_mean": 6469.6,
|
|
"valid_targets_min": 209
|
|
},
|
|
{
|
|
"epoch": 0.4102167182662539,
|
|
"grad_norm": 0.3295586047349743,
|
|
"learning_rate": 3.999267596604879e-05,
|
|
"loss": 0.4548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3559246063232422,
|
|
"step": 265,
|
|
"valid_targets_mean": 6510.8,
|
|
"valid_targets_min": 206
|
|
},
|
|
{
|
|
"epoch": 0.4179566563467492,
|
|
"grad_norm": 0.248299839878867,
|
|
"learning_rate": 3.999056291573483e-05,
|
|
"loss": 0.3327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34315234422683716,
|
|
"step": 270,
|
|
"valid_targets_mean": 5183.3,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 0.42569659442724456,
|
|
"grad_norm": 0.3001166398831379,
|
|
"learning_rate": 3.998818247985624e-05,
|
|
"loss": 0.4864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33440300822257996,
|
|
"step": 275,
|
|
"valid_targets_mean": 6249.5,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 0.43343653250773995,
|
|
"grad_norm": 0.31823428640792223,
|
|
"learning_rate": 3.998553469025275e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34233564138412476,
|
|
"step": 280,
|
|
"valid_targets_mean": 5410.6,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 0.4411764705882353,
|
|
"grad_norm": 0.2956164861668372,
|
|
"learning_rate": 3.998261958234011e-05,
|
|
"loss": 0.4799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3295554220676422,
|
|
"step": 285,
|
|
"valid_targets_mean": 6375.5,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 0.44891640866873067,
|
|
"grad_norm": 0.35615299380374865,
|
|
"learning_rate": 3.99794371951096e-05,
|
|
"loss": 0.3389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37559157609939575,
|
|
"step": 290,
|
|
"valid_targets_mean": 3327.1,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.456656346749226,
|
|
"grad_norm": 1.0262345971059195,
|
|
"learning_rate": 3.997598757112753e-05,
|
|
"loss": 0.6868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6669789552688599,
|
|
"step": 295,
|
|
"valid_targets_mean": 3046.5,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 0.46439628482972134,
|
|
"grad_norm": 0.5343751555550158,
|
|
"learning_rate": 3.9972270756534654e-05,
|
|
"loss": 0.6375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6142510771751404,
|
|
"step": 300,
|
|
"valid_targets_mean": 3748.6,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 0.47213622291021673,
|
|
"grad_norm": 0.4182810292446974,
|
|
"learning_rate": 3.996828680104555e-05,
|
|
"loss": 0.6237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5894930362701416,
|
|
"step": 305,
|
|
"valid_targets_mean": 3423.9,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 0.47987616099071206,
|
|
"grad_norm": 0.3185093329148675,
|
|
"learning_rate": 3.996403575794799e-05,
|
|
"loss": 0.6103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5521119832992554,
|
|
"step": 310,
|
|
"valid_targets_mean": 4002.1,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 0.48761609907120745,
|
|
"grad_norm": 0.37432093942130523,
|
|
"learning_rate": 3.995951768410217e-05,
|
|
"loss": 0.6131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6414017081260681,
|
|
"step": 315,
|
|
"valid_targets_mean": 3102.1,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 0.4953560371517028,
|
|
"grad_norm": 0.39155061140754155,
|
|
"learning_rate": 3.995473263993999e-05,
|
|
"loss": 0.5841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5928991436958313,
|
|
"step": 320,
|
|
"valid_targets_mean": 2004.2,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 0.5030959752321982,
|
|
"grad_norm": 0.34751382527602087,
|
|
"learning_rate": 3.994968068946424e-05,
|
|
"loss": 0.6018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6139780282974243,
|
|
"step": 325,
|
|
"valid_targets_mean": 2664.8,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 0.5108359133126935,
|
|
"grad_norm": 0.3187826626048478,
|
|
"learning_rate": 3.9944361900247734e-05,
|
|
"loss": 0.5838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6045299172401428,
|
|
"step": 330,
|
|
"valid_targets_mean": 3044.0,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 0.5185758513931888,
|
|
"grad_norm": 0.2759051812409209,
|
|
"learning_rate": 3.9938776343432414e-05,
|
|
"loss": 0.5712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5843140482902527,
|
|
"step": 335,
|
|
"valid_targets_mean": 3938.2,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 0.5263157894736842,
|
|
"grad_norm": 0.2772345334873349,
|
|
"learning_rate": 3.99329240937284e-05,
|
|
"loss": 0.5819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5815340876579285,
|
|
"step": 340,
|
|
"valid_targets_mean": 3717.0,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 0.5340557275541795,
|
|
"grad_norm": 0.3095152740456224,
|
|
"learning_rate": 3.992680522941297e-05,
|
|
"loss": 0.5569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.532058835029602,
|
|
"step": 345,
|
|
"valid_targets_mean": 4406.2,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 0.541795665634675,
|
|
"grad_norm": 0.26910389920695843,
|
|
"learning_rate": 3.9920419832329557e-05,
|
|
"loss": 0.5826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6190795302391052,
|
|
"step": 350,
|
|
"valid_targets_mean": 3612.3,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 0.5495356037151703,
|
|
"grad_norm": 0.3306735146561446,
|
|
"learning_rate": 3.9913767987886605e-05,
|
|
"loss": 0.5534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5690146088600159,
|
|
"step": 355,
|
|
"valid_targets_mean": 2256.9,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 0.5572755417956656,
|
|
"grad_norm": 0.30541395315317327,
|
|
"learning_rate": 3.990684978505645e-05,
|
|
"loss": 0.5671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5810322761535645,
|
|
"step": 360,
|
|
"valid_targets_mean": 2972.2,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 0.565015479876161,
|
|
"grad_norm": 0.46765288393783766,
|
|
"learning_rate": 3.989966531637414e-05,
|
|
"loss": 0.6026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6708934307098389,
|
|
"step": 365,
|
|
"valid_targets_mean": 3760.0,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 0.5727554179566563,
|
|
"grad_norm": 0.35568234548763034,
|
|
"learning_rate": 3.989221467793618e-05,
|
|
"loss": 0.6828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6310309171676636,
|
|
"step": 370,
|
|
"valid_targets_mean": 4103.2,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 0.5804953560371517,
|
|
"grad_norm": 0.3853331235626357,
|
|
"learning_rate": 3.988449796939925e-05,
|
|
"loss": 0.7051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7051464319229126,
|
|
"step": 375,
|
|
"valid_targets_mean": 3301.9,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 0.2958366873442947,
|
|
"learning_rate": 3.9876515293978876e-05,
|
|
"loss": 0.6834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6605377197265625,
|
|
"step": 380,
|
|
"valid_targets_mean": 4432.9,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 0.5959752321981424,
|
|
"grad_norm": 0.30173698641891955,
|
|
"learning_rate": 3.986826675844804e-05,
|
|
"loss": 0.6757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6649307012557983,
|
|
"step": 385,
|
|
"valid_targets_mean": 4270.7,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 0.6037151702786377,
|
|
"grad_norm": 0.27833555450463027,
|
|
"learning_rate": 3.9859752473135775e-05,
|
|
"loss": 0.6734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6818743348121643,
|
|
"step": 390,
|
|
"valid_targets_mean": 4288.7,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.6114551083591331,
|
|
"grad_norm": 0.2663665897444136,
|
|
"learning_rate": 3.985097255192567e-05,
|
|
"loss": 0.678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6485745906829834,
|
|
"step": 395,
|
|
"valid_targets_mean": 4493.8,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 0.6191950464396285,
|
|
"grad_norm": 0.2929322730570602,
|
|
"learning_rate": 3.9841927112254345e-05,
|
|
"loss": 0.6564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6103849411010742,
|
|
"step": 400,
|
|
"valid_targets_mean": 4781.4,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 0.6269349845201239,
|
|
"grad_norm": 0.26858533498718284,
|
|
"learning_rate": 3.983261627510989e-05,
|
|
"loss": 0.6728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6421018838882446,
|
|
"step": 405,
|
|
"valid_targets_mean": 4028.6,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 0.6346749226006192,
|
|
"grad_norm": 0.3072915656621856,
|
|
"learning_rate": 3.982304016503023e-05,
|
|
"loss": 0.6577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.693551778793335,
|
|
"step": 410,
|
|
"valid_targets_mean": 3116.2,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 0.6424148606811145,
|
|
"grad_norm": 0.27587900136940297,
|
|
"learning_rate": 3.9813198910101504e-05,
|
|
"loss": 0.6737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6969301700592041,
|
|
"step": 415,
|
|
"valid_targets_mean": 4106.4,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 0.6501547987616099,
|
|
"grad_norm": 0.2760678689799395,
|
|
"learning_rate": 3.980309264195628e-05,
|
|
"loss": 0.6694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6591912508010864,
|
|
"step": 420,
|
|
"valid_targets_mean": 3609.7,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 0.6578947368421053,
|
|
"grad_norm": 0.2764823821422492,
|
|
"learning_rate": 3.9792721495771854e-05,
|
|
"loss": 0.675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6644749641418457,
|
|
"step": 425,
|
|
"valid_targets_mean": 4175.3,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 0.6656346749226006,
|
|
"grad_norm": 0.31453512479026563,
|
|
"learning_rate": 3.978208561026843e-05,
|
|
"loss": 0.653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.642825186252594,
|
|
"step": 430,
|
|
"valid_targets_mean": 3942.0,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 0.673374613003096,
|
|
"grad_norm": 0.286493864202461,
|
|
"learning_rate": 3.977118512770725e-05,
|
|
"loss": 0.671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6697770357131958,
|
|
"step": 435,
|
|
"valid_targets_mean": 3787.9,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.6811145510835913,
|
|
"grad_norm": 0.4174632741114651,
|
|
"learning_rate": 3.976002019388868e-05,
|
|
"loss": 0.6211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5744283199310303,
|
|
"step": 440,
|
|
"valid_targets_mean": 3564.7,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 0.6888544891640866,
|
|
"grad_norm": 0.28972661984365233,
|
|
"learning_rate": 3.97485909581503e-05,
|
|
"loss": 0.5594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5278236865997314,
|
|
"step": 445,
|
|
"valid_targets_mean": 4363.4,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.6965944272445821,
|
|
"grad_norm": 0.3296186384600296,
|
|
"learning_rate": 3.9736897573364877e-05,
|
|
"loss": 0.5549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5294859409332275,
|
|
"step": 450,
|
|
"valid_targets_mean": 3516.3,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 0.7043343653250774,
|
|
"grad_norm": 0.2672412958327892,
|
|
"learning_rate": 3.972494019593833e-05,
|
|
"loss": 0.5541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5282274484634399,
|
|
"step": 455,
|
|
"valid_targets_mean": 4421.9,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 0.7120743034055728,
|
|
"grad_norm": 0.3206868474953304,
|
|
"learning_rate": 3.971271898580762e-05,
|
|
"loss": 0.5622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5659201741218567,
|
|
"step": 460,
|
|
"valid_targets_mean": 3265.3,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 0.7198142414860681,
|
|
"grad_norm": 0.28235198735827144,
|
|
"learning_rate": 3.970023410643865e-05,
|
|
"loss": 0.5327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4957345724105835,
|
|
"step": 465,
|
|
"valid_targets_mean": 4266.4,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 0.7275541795665634,
|
|
"grad_norm": 0.29391199556923253,
|
|
"learning_rate": 3.968748572482403e-05,
|
|
"loss": 0.561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5639551281929016,
|
|
"step": 470,
|
|
"valid_targets_mean": 3498.2,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 0.7352941176470589,
|
|
"grad_norm": 0.25581799093107427,
|
|
"learning_rate": 3.967447401148089e-05,
|
|
"loss": 0.5388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5359454154968262,
|
|
"step": 475,
|
|
"valid_targets_mean": 4387.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 0.7430340557275542,
|
|
"grad_norm": 0.29431985332098287,
|
|
"learning_rate": 3.966119914044857e-05,
|
|
"loss": 0.5456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5397093892097473,
|
|
"step": 480,
|
|
"valid_targets_mean": 3467.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 0.7507739938080495,
|
|
"grad_norm": 0.24926590239696003,
|
|
"learning_rate": 3.9647661289286285e-05,
|
|
"loss": 0.5301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5277090072631836,
|
|
"step": 485,
|
|
"valid_targets_mean": 4467.1,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.7585139318885449,
|
|
"grad_norm": 0.27728231604309744,
|
|
"learning_rate": 3.9633860639070784e-05,
|
|
"loss": 0.5516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5744354724884033,
|
|
"step": 490,
|
|
"valid_targets_mean": 3536.6,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 0.7662538699690402,
|
|
"grad_norm": 0.26060206848407785,
|
|
"learning_rate": 3.961979737439389e-05,
|
|
"loss": 0.5399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5519894361495972,
|
|
"step": 495,
|
|
"valid_targets_mean": 4088.9,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 0.7739938080495357,
|
|
"grad_norm": 0.2748544212180825,
|
|
"learning_rate": 3.960547168336008e-05,
|
|
"loss": 0.5448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5477074980735779,
|
|
"step": 500,
|
|
"valid_targets_mean": 3560.9,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 0.781733746130031,
|
|
"grad_norm": 0.2372781074226009,
|
|
"learning_rate": 3.95908837575839e-05,
|
|
"loss": 0.5356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5239625573158264,
|
|
"step": 505,
|
|
"valid_targets_mean": 4477.0,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 0.7894736842105263,
|
|
"grad_norm": 0.29987857608813384,
|
|
"learning_rate": 3.957603379218745e-05,
|
|
"loss": 0.5378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5479399561882019,
|
|
"step": 510,
|
|
"valid_targets_mean": 3193.1,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 0.7972136222910217,
|
|
"grad_norm": 0.23879296426110705,
|
|
"learning_rate": 3.956092198579779e-05,
|
|
"loss": 0.5453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5285787582397461,
|
|
"step": 515,
|
|
"valid_targets_mean": 4601.6,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 0.804953560371517,
|
|
"grad_norm": 0.3003101747802433,
|
|
"learning_rate": 3.9545548540544234e-05,
|
|
"loss": 0.5385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5538339614868164,
|
|
"step": 520,
|
|
"valid_targets_mean": 3430.9,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 0.8126934984520123,
|
|
"grad_norm": 0.27713863446143705,
|
|
"learning_rate": 3.9529913662055674e-05,
|
|
"loss": 0.5245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5210225582122803,
|
|
"step": 525,
|
|
"valid_targets_mean": 3828.6,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 0.8204334365325078,
|
|
"grad_norm": 0.31802575452342136,
|
|
"learning_rate": 3.9514017559457836e-05,
|
|
"loss": 0.6272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5316219925880432,
|
|
"step": 530,
|
|
"valid_targets_mean": 3390.7,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 0.8281733746130031,
|
|
"grad_norm": 0.4400672120594183,
|
|
"learning_rate": 3.949786044537045e-05,
|
|
"loss": 0.532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.514349639415741,
|
|
"step": 535,
|
|
"valid_targets_mean": 3718.5,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 0.8359133126934984,
|
|
"grad_norm": 0.3180404431328225,
|
|
"learning_rate": 3.948144253590444e-05,
|
|
"loss": 0.6409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5209081172943115,
|
|
"step": 540,
|
|
"valid_targets_mean": 3193.4,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 0.8436532507739938,
|
|
"grad_norm": 0.2852430394353412,
|
|
"learning_rate": 3.946476405065905e-05,
|
|
"loss": 0.5253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5139719247817993,
|
|
"step": 545,
|
|
"valid_targets_mean": 3690.3,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 0.8513931888544891,
|
|
"grad_norm": 0.30582614746042297,
|
|
"learning_rate": 3.944782521271883e-05,
|
|
"loss": 0.5889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5461388230323792,
|
|
"step": 550,
|
|
"valid_targets_mean": 3144.1,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 0.8591331269349846,
|
|
"grad_norm": 1.8702796127043897,
|
|
"learning_rate": 3.943062624865073e-05,
|
|
"loss": 0.5224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5273900628089905,
|
|
"step": 555,
|
|
"valid_targets_mean": 3871.4,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 0.8668730650154799,
|
|
"grad_norm": 0.32225175874177364,
|
|
"learning_rate": 3.941316738850104e-05,
|
|
"loss": 0.5828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5342344045639038,
|
|
"step": 560,
|
|
"valid_targets_mean": 3256.5,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 0.8746130030959752,
|
|
"grad_norm": 0.2846234612458556,
|
|
"learning_rate": 3.93954488657923e-05,
|
|
"loss": 0.5246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5348793268203735,
|
|
"step": 565,
|
|
"valid_targets_mean": 3665.0,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 0.8823529411764706,
|
|
"grad_norm": 0.35211387033157693,
|
|
"learning_rate": 3.9377470917520195e-05,
|
|
"loss": 0.6338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.547661542892456,
|
|
"step": 570,
|
|
"valid_targets_mean": 3457.3,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 0.8900928792569659,
|
|
"grad_norm": 0.3402558497079927,
|
|
"learning_rate": 3.935923378415039e-05,
|
|
"loss": 0.5159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5063190460205078,
|
|
"step": 575,
|
|
"valid_targets_mean": 3141.0,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.8978328173374613,
|
|
"grad_norm": 0.36129320683677985,
|
|
"learning_rate": 3.934073770961529e-05,
|
|
"loss": 0.6167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5281348824501038,
|
|
"step": 580,
|
|
"valid_targets_mean": 3509.7,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 0.9055727554179567,
|
|
"grad_norm": 0.31889906529021045,
|
|
"learning_rate": 3.93219829413108e-05,
|
|
"loss": 0.5292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5105188488960266,
|
|
"step": 585,
|
|
"valid_targets_mean": 3106.4,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 0.913312693498452,
|
|
"grad_norm": 0.3370879399958154,
|
|
"learning_rate": 3.9302969730093e-05,
|
|
"loss": 0.5646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5129860639572144,
|
|
"step": 590,
|
|
"valid_targets_mean": 3530.0,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.9210526315789473,
|
|
"grad_norm": 0.3392191717648417,
|
|
"learning_rate": 3.928369833027481e-05,
|
|
"loss": 0.5261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5395605564117432,
|
|
"step": 595,
|
|
"valid_targets_mean": 3219.6,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 0.9287925696594427,
|
|
"grad_norm": 1.7002170982642362,
|
|
"learning_rate": 3.9264168999622546e-05,
|
|
"loss": 0.5264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3979344069957733,
|
|
"step": 600,
|
|
"valid_targets_mean": 4948.2,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 0.9365325077399381,
|
|
"grad_norm": 0.6142730945509727,
|
|
"learning_rate": 3.924438199935254e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3134812116622925,
|
|
"step": 605,
|
|
"valid_targets_mean": 4927.9,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 0.9442724458204335,
|
|
"grad_norm": 0.5052147029359612,
|
|
"learning_rate": 3.922433759412757e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32525157928466797,
|
|
"step": 610,
|
|
"valid_targets_mean": 4602.5,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 0.9520123839009288,
|
|
"grad_norm": 0.3812393438896603,
|
|
"learning_rate": 3.920403605205338e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688894271850586,
|
|
"step": 615,
|
|
"valid_targets_mean": 4983.0,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 0.9597523219814241,
|
|
"grad_norm": 0.34925410568953047,
|
|
"learning_rate": 3.918347764467506e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2836517095565796,
|
|
"step": 620,
|
|
"valid_targets_mean": 4184.5,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 0.9674922600619195,
|
|
"grad_norm": 0.351666818591646,
|
|
"learning_rate": 3.9162662646973435e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27183276414871216,
|
|
"step": 625,
|
|
"valid_targets_mean": 4258.2,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 0.9752321981424149,
|
|
"grad_norm": 0.31582740166973394,
|
|
"learning_rate": 3.914159133736137e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672705054283142,
|
|
"step": 630,
|
|
"valid_targets_mean": 4959.4,
|
|
"valid_targets_min": 2049
|
|
},
|
|
{
|
|
"epoch": 0.9829721362229102,
|
|
"grad_norm": 0.4536433142490171,
|
|
"learning_rate": 3.9120263997680045e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28067830204963684,
|
|
"step": 635,
|
|
"valid_targets_mean": 3119.1,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 0.9907120743034056,
|
|
"grad_norm": 0.39515712109965,
|
|
"learning_rate": 3.909868091319521e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588626742362976,
|
|
"step": 640,
|
|
"valid_targets_mean": 2866.9,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 0.9984520123839009,
|
|
"grad_norm": 0.4547636659189772,
|
|
"learning_rate": 3.907684237259335e-05,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707788646221161,
|
|
"step": 645,
|
|
"valid_targets_mean": 3248.7,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 1.0061919504643964,
|
|
"grad_norm": 0.5005937194203448,
|
|
"learning_rate": 3.905474866797781e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3151233196258545,
|
|
"step": 650,
|
|
"valid_targets_mean": 3799.7,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 1.0139318885448916,
|
|
"grad_norm": 0.4070325949618539,
|
|
"learning_rate": 3.9032400094864914e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30069494247436523,
|
|
"step": 655,
|
|
"valid_targets_mean": 3723.8,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 1.021671826625387,
|
|
"grad_norm": 0.321059771710596,
|
|
"learning_rate": 3.9009796952180015e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29494786262512207,
|
|
"step": 660,
|
|
"valid_targets_mean": 3488.4,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 1.0294117647058822,
|
|
"grad_norm": 0.2605247246019829,
|
|
"learning_rate": 3.898693954225346e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27534887194633484,
|
|
"step": 665,
|
|
"valid_targets_mean": 3832.2,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 1.0371517027863777,
|
|
"grad_norm": 0.24671564416743816,
|
|
"learning_rate": 3.8963828170816585e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27223318815231323,
|
|
"step": 670,
|
|
"valid_targets_mean": 3689.5,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 1.0448916408668731,
|
|
"grad_norm": 0.24606612797939645,
|
|
"learning_rate": 3.894046314699762e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651505470275879,
|
|
"step": 675,
|
|
"valid_targets_mean": 3592.6,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 1.0526315789473684,
|
|
"grad_norm": 0.2260900443624755,
|
|
"learning_rate": 3.891684478331752e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718721032142639,
|
|
"step": 680,
|
|
"valid_targets_mean": 3646.3,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 1.0603715170278638,
|
|
"grad_norm": 0.2600184306548634,
|
|
"learning_rate": 3.889297339568583e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704000473022461,
|
|
"step": 685,
|
|
"valid_targets_mean": 3811.8,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 1.068111455108359,
|
|
"grad_norm": 0.24054067218203404,
|
|
"learning_rate": 3.886884930339643e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27456608414649963,
|
|
"step": 690,
|
|
"valid_targets_mean": 3824.1,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 1.0758513931888545,
|
|
"grad_norm": 0.33752433348333694,
|
|
"learning_rate": 3.884447282912328e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556321918964386,
|
|
"step": 695,
|
|
"valid_targets_mean": 3658.0,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 1.08359133126935,
|
|
"grad_norm": 0.23605861186508154,
|
|
"learning_rate": 3.8819844298916095e-05,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26629340648651123,
|
|
"step": 700,
|
|
"valid_targets_mean": 3583.1,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 1.0913312693498451,
|
|
"grad_norm": 0.3921003744533098,
|
|
"learning_rate": 3.879496404219599e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.322517454624176,
|
|
"step": 705,
|
|
"valid_targets_mean": 4880.6,
|
|
"valid_targets_min": 3320
|
|
},
|
|
{
|
|
"epoch": 1.0990712074303406,
|
|
"grad_norm": 0.7390921042843152,
|
|
"learning_rate": 3.8769832391751054e-05,
|
|
"loss": 0.3528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5353279709815979,
|
|
"step": 710,
|
|
"valid_targets_mean": 978.9,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 1.1068111455108358,
|
|
"grad_norm": 0.36775224701738957,
|
|
"learning_rate": 3.874444968373194e-05,
|
|
"loss": 0.4105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3031452000141144,
|
|
"step": 715,
|
|
"valid_targets_mean": 4807.5,
|
|
"valid_targets_min": 2887
|
|
},
|
|
{
|
|
"epoch": 1.1145510835913313,
|
|
"grad_norm": 1.0786358229542865,
|
|
"learning_rate": 3.871881625764733e-05,
|
|
"loss": 0.3657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5715183019638062,
|
|
"step": 720,
|
|
"valid_targets_mean": 870.8,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 1.1222910216718267,
|
|
"grad_norm": 0.34310443520760536,
|
|
"learning_rate": 3.86929324563594e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2960064709186554,
|
|
"step": 725,
|
|
"valid_targets_mean": 4695.3,
|
|
"valid_targets_min": 3169
|
|
},
|
|
{
|
|
"epoch": 1.130030959752322,
|
|
"grad_norm": 1.0804442648867592,
|
|
"learning_rate": 3.866679862607927e-05,
|
|
"loss": 0.3511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5478573441505432,
|
|
"step": 730,
|
|
"valid_targets_mean": 994.2,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 1.1377708978328174,
|
|
"grad_norm": 0.360396122030144,
|
|
"learning_rate": 3.864041511636232e-05,
|
|
"loss": 0.4166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28815361857414246,
|
|
"step": 735,
|
|
"valid_targets_mean": 4788.1,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 1.1455108359133126,
|
|
"grad_norm": 0.4087289946359379,
|
|
"learning_rate": 3.861378228010355e-05,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3261953592300415,
|
|
"step": 740,
|
|
"valid_targets_mean": 3206.7,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 1.153250773993808,
|
|
"grad_norm": 0.4780957904103452,
|
|
"learning_rate": 3.858690047353283e-05,
|
|
"loss": 0.4471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30404067039489746,
|
|
"step": 745,
|
|
"valid_targets_mean": 4860.4,
|
|
"valid_targets_min": 3021
|
|
},
|
|
{
|
|
"epoch": 1.1609907120743035,
|
|
"grad_norm": 0.3682399520104118,
|
|
"learning_rate": 3.855977005621019e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935354709625244,
|
|
"step": 750,
|
|
"valid_targets_mean": 4770.2,
|
|
"valid_targets_min": 2896
|
|
},
|
|
{
|
|
"epoch": 1.1687306501547987,
|
|
"grad_norm": 0.6767920695948291,
|
|
"learning_rate": 3.8532391391020955e-05,
|
|
"loss": 0.4261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3012024462223053,
|
|
"step": 755,
|
|
"valid_targets_mean": 4724.9,
|
|
"valid_targets_min": 3054
|
|
},
|
|
{
|
|
"epoch": 1.1764705882352942,
|
|
"grad_norm": 0.36826751681068876,
|
|
"learning_rate": 3.8504764844170903e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29629576206207275,
|
|
"step": 760,
|
|
"valid_targets_mean": 4654.0,
|
|
"valid_targets_min": 1994
|
|
},
|
|
{
|
|
"epoch": 1.1842105263157894,
|
|
"grad_norm": 0.5833460050254995,
|
|
"learning_rate": 3.847689078518139e-05,
|
|
"loss": 0.4486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900720238685608,
|
|
"step": 765,
|
|
"valid_targets_mean": 4692.8,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 1.1919504643962848,
|
|
"grad_norm": 0.321508866521892,
|
|
"learning_rate": 3.84487695868844e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898852825164795,
|
|
"step": 770,
|
|
"valid_targets_mean": 5052.9,
|
|
"valid_targets_min": 2772
|
|
},
|
|
{
|
|
"epoch": 1.1996904024767803,
|
|
"grad_norm": 0.6472393134859138,
|
|
"learning_rate": 3.8420401625417515e-05,
|
|
"loss": 0.45,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.300260066986084,
|
|
"step": 775,
|
|
"valid_targets_mean": 5030.0,
|
|
"valid_targets_min": 3232
|
|
},
|
|
{
|
|
"epoch": 1.2074303405572755,
|
|
"grad_norm": 0.36962146171226384,
|
|
"learning_rate": 3.8391787280218975e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28521478176116943,
|
|
"step": 780,
|
|
"valid_targets_mean": 5112.5,
|
|
"valid_targets_min": 3319
|
|
},
|
|
{
|
|
"epoch": 1.215170278637771,
|
|
"grad_norm": 0.4413616020249357,
|
|
"learning_rate": 3.836292693402249e-05,
|
|
"loss": 0.4608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3293500542640686,
|
|
"step": 785,
|
|
"valid_targets_mean": 5787.9,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 1.2229102167182662,
|
|
"grad_norm": 0.29916002249851853,
|
|
"learning_rate": 3.833382097285222e-05,
|
|
"loss": 0.3307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3384448289871216,
|
|
"step": 790,
|
|
"valid_targets_mean": 6091.0,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 1.2306501547987616,
|
|
"grad_norm": 0.27609290820358207,
|
|
"learning_rate": 3.8304469786017546e-05,
|
|
"loss": 0.3351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33724185824394226,
|
|
"step": 795,
|
|
"valid_targets_mean": 5457.9,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 1.238390092879257,
|
|
"grad_norm": 0.23105458725907574,
|
|
"learning_rate": 3.82748737661079e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32450199127197266,
|
|
"step": 800,
|
|
"valid_targets_mean": 5283.4,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 1.2461300309597523,
|
|
"grad_norm": 0.20559856680682617,
|
|
"learning_rate": 3.824503330898751e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31146126985549927,
|
|
"step": 805,
|
|
"valid_targets_mean": 6079.6,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 1.2538699690402477,
|
|
"grad_norm": 0.20938831137903696,
|
|
"learning_rate": 3.821494881379006e-05,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3465697765350342,
|
|
"step": 810,
|
|
"valid_targets_mean": 5938.4,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 1.261609907120743,
|
|
"grad_norm": 0.2087550963572121,
|
|
"learning_rate": 3.818462068291343e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3188783526420593,
|
|
"step": 815,
|
|
"valid_targets_mean": 5991.7,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.2693498452012384,
|
|
"grad_norm": 0.21712779141847421,
|
|
"learning_rate": 3.8154049322014234e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3149917721748352,
|
|
"step": 820,
|
|
"valid_targets_mean": 5768.7,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 1.2770897832817338,
|
|
"grad_norm": 0.21617519573527436,
|
|
"learning_rate": 3.8123235140002454e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31415867805480957,
|
|
"step": 825,
|
|
"valid_targets_mean": 5973.1,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.284829721362229,
|
|
"grad_norm": 0.21349092464324684,
|
|
"learning_rate": 3.809217854903595e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3223072290420532,
|
|
"step": 830,
|
|
"valid_targets_mean": 5782.9,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 1.2925696594427245,
|
|
"grad_norm": 0.2533940536498886,
|
|
"learning_rate": 3.806087996451492e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31293028593063354,
|
|
"step": 835,
|
|
"valid_targets_mean": 5733.2,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 1.3003095975232197,
|
|
"grad_norm": 0.22296870967088264,
|
|
"learning_rate": 3.80293398050764e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32170212268829346,
|
|
"step": 840,
|
|
"valid_targets_mean": 5674.5,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.3080495356037152,
|
|
"grad_norm": 0.21141096835758275,
|
|
"learning_rate": 3.7997558492588604e-05,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3242834210395813,
|
|
"step": 845,
|
|
"valid_targets_mean": 5429.0,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 1.3157894736842106,
|
|
"grad_norm": 0.23409416865690388,
|
|
"learning_rate": 3.796553645214534e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3016406297683716,
|
|
"step": 850,
|
|
"valid_targets_mean": 5852.8,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 1.3235294117647058,
|
|
"grad_norm": 0.2153237116231331,
|
|
"learning_rate": 3.793327411206026e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32699912786483765,
|
|
"step": 855,
|
|
"valid_targets_mean": 5466.6,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 1.3312693498452013,
|
|
"grad_norm": 0.2102593217750858,
|
|
"learning_rate": 3.790077190386119e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29996955394744873,
|
|
"step": 860,
|
|
"valid_targets_mean": 5432.9,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 1.3390092879256965,
|
|
"grad_norm": 0.19443509120589783,
|
|
"learning_rate": 3.786803026228433e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32547545433044434,
|
|
"step": 865,
|
|
"valid_targets_mean": 6355.9,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.346749226006192,
|
|
"grad_norm": 1.2262560300444747,
|
|
"learning_rate": 3.783504962526843e-05,
|
|
"loss": 0.4241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6231747269630432,
|
|
"step": 870,
|
|
"valid_targets_mean": 805.4,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 1.3544891640866874,
|
|
"grad_norm": 0.26322694744407815,
|
|
"learning_rate": 3.780183043394897e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31365588307380676,
|
|
"step": 875,
|
|
"valid_targets_mean": 6133.8,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 1.3622291021671826,
|
|
"grad_norm": 1.2546441578931171,
|
|
"learning_rate": 3.77683731326522e-05,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.637216329574585,
|
|
"step": 880,
|
|
"valid_targets_mean": 817.7,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 1.369969040247678,
|
|
"grad_norm": 0.21547704699885215,
|
|
"learning_rate": 3.7734678168889265e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3019489645957947,
|
|
"step": 885,
|
|
"valid_targets_mean": 6547.3,
|
|
"valid_targets_min": 1808
|
|
},
|
|
{
|
|
"epoch": 1.3777089783281733,
|
|
"grad_norm": 0.25460824954473954,
|
|
"learning_rate": 3.7700745993350163e-05,
|
|
"loss": 0.3972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3096722662448883,
|
|
"step": 890,
|
|
"valid_targets_mean": 6934.5,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 1.3854489164086687,
|
|
"grad_norm": 0.20856778695128625,
|
|
"learning_rate": 3.766657705989775e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3086932301521301,
|
|
"step": 895,
|
|
"valid_targets_mean": 6416.0,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 1.3931888544891642,
|
|
"grad_norm": 0.2768476041064059,
|
|
"learning_rate": 3.7632171825561654e-05,
|
|
"loss": 0.4443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31610509753227234,
|
|
"step": 900,
|
|
"valid_targets_mean": 6050.4,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 1.4009287925696594,
|
|
"grad_norm": 0.23870151875941853,
|
|
"learning_rate": 3.759753075053217e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2982058823108673,
|
|
"step": 905,
|
|
"valid_targets_mean": 5863.9,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 1.4086687306501549,
|
|
"grad_norm": 0.28849776936301974,
|
|
"learning_rate": 3.756265429815409e-05,
|
|
"loss": 0.4036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3145429491996765,
|
|
"step": 910,
|
|
"valid_targets_mean": 6129.1,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 1.41640866873065,
|
|
"grad_norm": 0.22206749079754642,
|
|
"learning_rate": 3.752754293492054e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30173906683921814,
|
|
"step": 915,
|
|
"valid_targets_mean": 6139.4,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.4241486068111455,
|
|
"grad_norm": 0.3244540858302964,
|
|
"learning_rate": 3.74921971304667e-05,
|
|
"loss": 0.4415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3162187933921814,
|
|
"step": 920,
|
|
"valid_targets_mean": 6069.3,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 1.431888544891641,
|
|
"grad_norm": 0.2621650464018516,
|
|
"learning_rate": 3.7456617357563544e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3015885353088379,
|
|
"step": 925,
|
|
"valid_targets_mean": 6344.3,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.4396284829721362,
|
|
"grad_norm": 0.3458128442881739,
|
|
"learning_rate": 3.74208040921115e-05,
|
|
"loss": 0.4381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30289798974990845,
|
|
"step": 930,
|
|
"valid_targets_mean": 6771.4,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 1.4473684210526316,
|
|
"grad_norm": 0.2406306838992549,
|
|
"learning_rate": 3.738475781313412e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29957884550094604,
|
|
"step": 935,
|
|
"valid_targets_mean": 6569.9,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 1.4551083591331269,
|
|
"grad_norm": 1.0890906718527247,
|
|
"learning_rate": 3.7348479002771626e-05,
|
|
"loss": 0.5439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5633716583251953,
|
|
"step": 940,
|
|
"valid_targets_mean": 2007.4,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 1.4628482972136223,
|
|
"grad_norm": 0.43146436276794325,
|
|
"learning_rate": 3.731196814627451e-05,
|
|
"loss": 0.537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5484505295753479,
|
|
"step": 945,
|
|
"valid_targets_mean": 3351.7,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 1.4705882352941178,
|
|
"grad_norm": 0.3693270579395574,
|
|
"learning_rate": 3.727522573199698e-05,
|
|
"loss": 0.5286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5364041328430176,
|
|
"step": 950,
|
|
"valid_targets_mean": 3195.7,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 1.478328173374613,
|
|
"grad_norm": 0.29530664061699236,
|
|
"learning_rate": 3.7238252251390516e-05,
|
|
"loss": 0.5248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5011584758758545,
|
|
"step": 955,
|
|
"valid_targets_mean": 3282.1,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 1.4860681114551084,
|
|
"grad_norm": 0.2944537488166681,
|
|
"learning_rate": 3.72010481989972e-05,
|
|
"loss": 0.5089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48435264825820923,
|
|
"step": 960,
|
|
"valid_targets_mean": 3273.5,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 1.4938080495356036,
|
|
"grad_norm": 0.25308284878628706,
|
|
"learning_rate": 3.716361407244318e-05,
|
|
"loss": 0.5143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45746666193008423,
|
|
"step": 965,
|
|
"valid_targets_mean": 3904.3,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 1.501547987616099,
|
|
"grad_norm": 0.2800913113576946,
|
|
"learning_rate": 3.712595037243196e-05,
|
|
"loss": 0.5209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5511980056762695,
|
|
"step": 970,
|
|
"valid_targets_mean": 3417.9,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 1.5092879256965945,
|
|
"grad_norm": 0.3425451493605539,
|
|
"learning_rate": 3.708805760273772e-05,
|
|
"loss": 0.5137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5454850196838379,
|
|
"step": 975,
|
|
"valid_targets_mean": 2151.9,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 1.5170278637770898,
|
|
"grad_norm": 0.3787810658576009,
|
|
"learning_rate": 3.704993627019862e-05,
|
|
"loss": 0.5053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4864479601383209,
|
|
"step": 980,
|
|
"valid_targets_mean": 2757.0,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 1.524767801857585,
|
|
"grad_norm": 0.2921403855061511,
|
|
"learning_rate": 3.701158688470995e-05,
|
|
"loss": 0.5144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5254135131835938,
|
|
"step": 985,
|
|
"valid_targets_mean": 2699.5,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 1.5325077399380804,
|
|
"grad_norm": 0.2426840397044415,
|
|
"learning_rate": 3.697300995921734e-05,
|
|
"loss": 0.5033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4875762462615967,
|
|
"step": 990,
|
|
"valid_targets_mean": 3419.5,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 1.5402476780185759,
|
|
"grad_norm": 0.24448603182217452,
|
|
"learning_rate": 3.6934206009709924e-05,
|
|
"loss": 0.504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49858126044273376,
|
|
"step": 995,
|
|
"valid_targets_mean": 3438.5,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.5479876160990713,
|
|
"grad_norm": 0.22965528888914688,
|
|
"learning_rate": 3.689517555521339e-05,
|
|
"loss": 0.5053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4831920862197876,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4330.6,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 1.5557275541795665,
|
|
"grad_norm": 0.31468770956494635,
|
|
"learning_rate": 3.685591911778309e-05,
|
|
"loss": 0.5045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5404433608055115,
|
|
"step": 1005,
|
|
"valid_targets_mean": 2712.3,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.5634674922600618,
|
|
"grad_norm": 0.5817705512269371,
|
|
"learning_rate": 3.681643722249701e-05,
|
|
"loss": 0.529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6638132333755493,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3619.0,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 1.5712074303405572,
|
|
"grad_norm": 0.3648360971366639,
|
|
"learning_rate": 3.677673039744879e-05,
|
|
"loss": 0.6352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6222278475761414,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3834.9,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 1.5789473684210527,
|
|
"grad_norm": 0.32698973061983083,
|
|
"learning_rate": 3.673679917374061e-05,
|
|
"loss": 0.6352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.617664098739624,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3756.7,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 1.586687306501548,
|
|
"grad_norm": 0.2830632758552773,
|
|
"learning_rate": 3.669664408547613e-05,
|
|
"loss": 0.638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6287327408790588,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3851.6,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 1.5944272445820433,
|
|
"grad_norm": 0.2802670751849616,
|
|
"learning_rate": 3.665626566975334e-05,
|
|
"loss": 0.622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6469075083732605,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3610.4,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 1.6021671826625385,
|
|
"grad_norm": 0.5746477792443985,
|
|
"learning_rate": 3.6615664466657356e-05,
|
|
"loss": 0.6188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5779888033866882,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4324.1,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 1.609907120743034,
|
|
"grad_norm": 0.2611196743984416,
|
|
"learning_rate": 3.657484101925319e-05,
|
|
"loss": 0.6334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6118436455726624,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3782.9,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 1.6176470588235294,
|
|
"grad_norm": 0.25887727784099573,
|
|
"learning_rate": 3.6533795873578525e-05,
|
|
"loss": 0.6147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5919389128684998,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4237.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 1.6253869969040249,
|
|
"grad_norm": 0.25732648980068695,
|
|
"learning_rate": 3.64925295786364e-05,
|
|
"loss": 0.618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6029390692710876,
|
|
"step": 1050,
|
|
"valid_targets_mean": 4379.6,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 1.63312693498452,
|
|
"grad_norm": 0.21795849143852913,
|
|
"learning_rate": 3.645104268638782e-05,
|
|
"loss": 0.6018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5439825057983398,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4701.0,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 1.6408668730650153,
|
|
"grad_norm": 0.23958202688175403,
|
|
"learning_rate": 3.6409335751744424e-05,
|
|
"loss": 0.6261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.591541051864624,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4621.8,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 1.6486068111455108,
|
|
"grad_norm": 0.2781052550275807,
|
|
"learning_rate": 3.636740933256107e-05,
|
|
"loss": 0.6309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6771111488342285,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3181.1,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 1.6563467492260062,
|
|
"grad_norm": 0.25712931871804257,
|
|
"learning_rate": 3.632526398962832e-05,
|
|
"loss": 0.6289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.674242377281189,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4021.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.6640866873065017,
|
|
"grad_norm": 0.2961493658083976,
|
|
"learning_rate": 3.628290028666499e-05,
|
|
"loss": 0.6143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6786669492721558,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3290.7,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 1.671826625386997,
|
|
"grad_norm": 0.23968242835898657,
|
|
"learning_rate": 3.6240318790310575e-05,
|
|
"loss": 0.6234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6534320116043091,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4297.4,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 1.6795665634674921,
|
|
"grad_norm": 0.4884094214850918,
|
|
"learning_rate": 3.61975200701177e-05,
|
|
"loss": 0.5992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5298591256141663,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3676.8,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 1.6873065015479876,
|
|
"grad_norm": 0.3643101812251646,
|
|
"learning_rate": 3.615450469854448e-05,
|
|
"loss": 0.5322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5457352995872498,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3877.6,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 1.695046439628483,
|
|
"grad_norm": 0.30447164520985975,
|
|
"learning_rate": 3.611127325094687e-05,
|
|
"loss": 0.5174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5533092021942139,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3730.8,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 1.7027863777089784,
|
|
"grad_norm": 0.2632240849142495,
|
|
"learning_rate": 3.606782630557096e-05,
|
|
"loss": 0.517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4995309114456177,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3764.0,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 1.7105263157894737,
|
|
"grad_norm": 0.2686990863267933,
|
|
"learning_rate": 3.602416444354527e-05,
|
|
"loss": 0.5177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49753332138061523,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3619.3,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 1.718266253869969,
|
|
"grad_norm": 0.28075216063066616,
|
|
"learning_rate": 3.598028824887294e-05,
|
|
"loss": 0.5112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5117040872573853,
|
|
"step": 1110,
|
|
"valid_targets_mean": 3383.6,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 1.7260061919504643,
|
|
"grad_norm": 0.2936610357214042,
|
|
"learning_rate": 3.5936198308423945e-05,
|
|
"loss": 0.5119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5290514230728149,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3617.2,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 1.7337461300309598,
|
|
"grad_norm": 0.27660324527716695,
|
|
"learning_rate": 3.5891895211927224e-05,
|
|
"loss": 0.5089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4962262511253357,
|
|
"step": 1120,
|
|
"valid_targets_mean": 3628.0,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 1.7414860681114552,
|
|
"grad_norm": 0.3811284473734991,
|
|
"learning_rate": 3.584737955196283e-05,
|
|
"loss": 0.5106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5050268173217773,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3666.4,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 1.7492260061919505,
|
|
"grad_norm": 0.2648889838317306,
|
|
"learning_rate": 3.5802651923953935e-05,
|
|
"loss": 0.4981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49460041522979736,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3281.1,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 1.7569659442724457,
|
|
"grad_norm": 0.2718418618821752,
|
|
"learning_rate": 3.5757712926158955e-05,
|
|
"loss": 0.5084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49924200773239136,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3577.5,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 1.7647058823529411,
|
|
"grad_norm": 0.27431418240671784,
|
|
"learning_rate": 3.571256315966347e-05,
|
|
"loss": 0.5108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47402089834213257,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3314.5,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 1.7724458204334366,
|
|
"grad_norm": 0.2564844836195745,
|
|
"learning_rate": 3.5667203228372223e-05,
|
|
"loss": 0.5129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5020731687545776,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3562.3,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.780185758513932,
|
|
"grad_norm": 0.2772908633347582,
|
|
"learning_rate": 3.562163373900104e-05,
|
|
"loss": 0.5076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5051349997520447,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3536.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 1.7879256965944272,
|
|
"grad_norm": 0.27783867244585414,
|
|
"learning_rate": 3.557585530106871e-05,
|
|
"loss": 0.5011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5100010633468628,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3570.4,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 1.7956656346749225,
|
|
"grad_norm": 0.2640495732367976,
|
|
"learning_rate": 3.552986852688882e-05,
|
|
"loss": 0.5176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5041922330856323,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3647.4,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 1.803405572755418,
|
|
"grad_norm": 0.30837490508879645,
|
|
"learning_rate": 3.548367403156161e-05,
|
|
"loss": 0.5026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49832335114479065,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3439.6,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 1.8111455108359134,
|
|
"grad_norm": 0.2448056216842935,
|
|
"learning_rate": 3.543727243296566e-05,
|
|
"loss": 0.4996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4566479027271271,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3415.4,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.8188854489164088,
|
|
"grad_norm": 0.2686321084902195,
|
|
"learning_rate": 3.539066435174973e-05,
|
|
"loss": 0.5897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4951170086860657,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3413.9,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 1.826625386996904,
|
|
"grad_norm": 0.26531376318379785,
|
|
"learning_rate": 3.5343850411324365e-05,
|
|
"loss": 0.5051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48725682497024536,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3740.7,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 1.8343653250773992,
|
|
"grad_norm": 0.28844405150813635,
|
|
"learning_rate": 3.529683123785364e-05,
|
|
"loss": 0.6051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5289224982261658,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3384.0,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 1.8421052631578947,
|
|
"grad_norm": 0.2875906635912091,
|
|
"learning_rate": 3.52496074602467e-05,
|
|
"loss": 0.4967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4944559335708618,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3341.3,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 1.8498452012383901,
|
|
"grad_norm": 0.2999440429963698,
|
|
"learning_rate": 3.5202179710149424e-05,
|
|
"loss": 0.5507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5002520680427551,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3684.2,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 1.8575851393188856,
|
|
"grad_norm": 0.27378736325845426,
|
|
"learning_rate": 3.5154548621935935e-05,
|
|
"loss": 0.4974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4879138171672821,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3157.4,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 1.8653250773993808,
|
|
"grad_norm": 0.3149901795551711,
|
|
"learning_rate": 3.5106714832700105e-05,
|
|
"loss": 0.5511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5306912660598755,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3240.1,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 1.873065015479876,
|
|
"grad_norm": 0.2881469633593329,
|
|
"learning_rate": 3.5058678982247077e-05,
|
|
"loss": 0.4956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49573755264282227,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3223.1,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 1.8808049535603715,
|
|
"grad_norm": 0.3398191064146385,
|
|
"learning_rate": 3.501044171308466e-05,
|
|
"loss": 0.5983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5118584632873535,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3842.8,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 1.888544891640867,
|
|
"grad_norm": 0.290175062177776,
|
|
"learning_rate": 3.4962003670414784e-05,
|
|
"loss": 0.4968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5039758682250977,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3401.7,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 1.8962848297213624,
|
|
"grad_norm": 0.31198286705513983,
|
|
"learning_rate": 3.491336550212481e-05,
|
|
"loss": 0.581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5648090839385986,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3138.4,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 1.9040247678018576,
|
|
"grad_norm": 0.2661019694587257,
|
|
"learning_rate": 3.486452785877893e-05,
|
|
"loss": 0.5048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5225221514701843,
|
|
"step": 1230,
|
|
"valid_targets_mean": 3528.3,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 1.9117647058823528,
|
|
"grad_norm": 0.31721144163666776,
|
|
"learning_rate": 3.48154913936094e-05,
|
|
"loss": 0.5345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6026899218559265,
|
|
"step": 1235,
|
|
"valid_targets_mean": 2719.7,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 1.9195046439628483,
|
|
"grad_norm": 0.26375599112748943,
|
|
"learning_rate": 3.476625676250787e-05,
|
|
"loss": 0.4941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5072019100189209,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3570.4,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 1.9272445820433437,
|
|
"grad_norm": 1.196875556763483,
|
|
"learning_rate": 3.4716824624016536e-05,
|
|
"loss": 0.5219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5470794439315796,
|
|
"step": 1245,
|
|
"valid_targets_mean": 2341.9,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 1.9349845201238391,
|
|
"grad_norm": 0.5041948674706175,
|
|
"learning_rate": 3.466719563931941e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29759520292282104,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4556.8,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 1.9427244582043344,
|
|
"grad_norm": 0.5113024800664534,
|
|
"learning_rate": 3.461737047223342e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28335943818092346,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4626.2,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 1.9504643962848296,
|
|
"grad_norm": 0.32182751369038415,
|
|
"learning_rate": 3.456734978919954e-05,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24115651845932007,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4938.1,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 1.958204334365325,
|
|
"grad_norm": 0.3821105449276526,
|
|
"learning_rate": 3.4517134259273914e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25365540385246277,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4161.5,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 1.9659442724458205,
|
|
"grad_norm": 0.4369475053668114,
|
|
"learning_rate": 3.446672455411884e-05,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25452330708503723,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4068.4,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 1.973684210526316,
|
|
"grad_norm": 0.3569061559036345,
|
|
"learning_rate": 3.441612134799385e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24566206336021423,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4988.2,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 1.9814241486068112,
|
|
"grad_norm": 0.45912219986684555,
|
|
"learning_rate": 3.436532531774667e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23875859379768372,
|
|
"step": 1280,
|
|
"valid_targets_mean": 2636.5,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 1.9891640866873064,
|
|
"grad_norm": 0.45580297027169286,
|
|
"learning_rate": 3.431433714280414e-05,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534239888191223,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2712.8,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 1.9969040247678018,
|
|
"grad_norm": 0.31168003129137145,
|
|
"learning_rate": 3.426315750516317e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2331083118915558,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3597.4,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 2.0046439628482973,
|
|
"grad_norm": 0.5103214125393608,
|
|
"learning_rate": 3.4211787089381574e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27903103828430176,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3714.5,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 2.0123839009287927,
|
|
"grad_norm": 0.39295155182569375,
|
|
"learning_rate": 3.416022658256897e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2680414319038391,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3769.2,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 2.0201238390092877,
|
|
"grad_norm": 0.3272626636654759,
|
|
"learning_rate": 3.4108476674377514e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2655096650123596,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3845.8,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 2.027863777089783,
|
|
"grad_norm": 0.2732574974596415,
|
|
"learning_rate": 3.405653805699274e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26245835423469543,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3735.0,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 2.0356037151702786,
|
|
"grad_norm": 0.2712390493558475,
|
|
"learning_rate": 3.4004411425124284e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2471611052751541,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3725.4,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 2.043343653250774,
|
|
"grad_norm": 0.2512238217564995,
|
|
"learning_rate": 3.3952097475996577e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25063636898994446,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3718.9,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 2.0510835913312695,
|
|
"grad_norm": 0.22821012580363015,
|
|
"learning_rate": 3.389959690933954e-05,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24928836524486542,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3712.7,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 2.0588235294117645,
|
|
"grad_norm": 0.22831571575576312,
|
|
"learning_rate": 3.3846910427379185e-05,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2482607364654541,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3745.0,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 2.06656346749226,
|
|
"grad_norm": 0.23178074845240826,
|
|
"learning_rate": 3.3794038734828274e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23751229047775269,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3624.5,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 2.0743034055727554,
|
|
"grad_norm": 0.24193568839363994,
|
|
"learning_rate": 3.374098253887688e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25756365060806274,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3754.9,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.082043343653251,
|
|
"grad_norm": 0.23695259143606895,
|
|
"learning_rate": 3.368774254918289e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24640145897865295,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3859.0,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 2.0897832817337463,
|
|
"grad_norm": 0.32090577322942737,
|
|
"learning_rate": 3.3634319477862564e-05,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28764480352401733,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4864.0,
|
|
"valid_targets_min": 2389
|
|
},
|
|
{
|
|
"epoch": 2.0975232198142413,
|
|
"grad_norm": 0.2879338699005393,
|
|
"learning_rate": 3.358071403948098e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.280213326215744,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4532.4,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 2.1052631578947367,
|
|
"grad_norm": 0.44386523029058467,
|
|
"learning_rate": 3.352692695104246e-05,
|
|
"loss": 0.4191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900093197822571,
|
|
"step": 1360,
|
|
"valid_targets_mean": 5055.2,
|
|
"valid_targets_min": 2995
|
|
},
|
|
{
|
|
"epoch": 2.113003095975232,
|
|
"grad_norm": 0.3450052019243592,
|
|
"learning_rate": 3.347295893198104e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31770309805870056,
|
|
"step": 1365,
|
|
"valid_targets_mean": 2780.0,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 2.1207430340557276,
|
|
"grad_norm": 0.2892694211884592,
|
|
"learning_rate": 3.341881070415079e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753623127937317,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4765.8,
|
|
"valid_targets_min": 2211
|
|
},
|
|
{
|
|
"epoch": 2.128482972136223,
|
|
"grad_norm": 0.318657661472304,
|
|
"learning_rate": 3.336448299181617e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3088338375091553,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3243.5,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 2.136222910216718,
|
|
"grad_norm": 0.46435258242786137,
|
|
"learning_rate": 3.330997652164238e-05,
|
|
"loss": 0.4346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27842897176742554,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4668.5,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 2.1439628482972135,
|
|
"grad_norm": 0.27840415200949786,
|
|
"learning_rate": 3.325529202268558e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2687762975692749,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4707.3,
|
|
"valid_targets_min": 2483
|
|
},
|
|
{
|
|
"epoch": 2.151702786377709,
|
|
"grad_norm": 0.4995993661021603,
|
|
"learning_rate": 3.3200430226383186e-05,
|
|
"loss": 0.4197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28142714500427246,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4803.9,
|
|
"valid_targets_min": 3123
|
|
},
|
|
{
|
|
"epoch": 2.1594427244582044,
|
|
"grad_norm": 0.2798767360248391,
|
|
"learning_rate": 3.3145391866544065e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2734988331794739,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4720.2,
|
|
"valid_targets_min": 2652
|
|
},
|
|
{
|
|
"epoch": 2.1671826625387,
|
|
"grad_norm": 0.5324473851772071,
|
|
"learning_rate": 3.309017767933874e-05,
|
|
"loss": 0.4,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29299354553222656,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3505.7,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 2.174922600619195,
|
|
"grad_norm": 0.2944261713188788,
|
|
"learning_rate": 3.303478840328951e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27454084157943726,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4646.0,
|
|
"valid_targets_min": 2402
|
|
},
|
|
{
|
|
"epoch": 2.1826625386996903,
|
|
"grad_norm": 0.6217198638470305,
|
|
"learning_rate": 3.297922477926059e-05,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.506507396697998,
|
|
"step": 1410,
|
|
"valid_targets_mean": 973.6,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 2.1904024767801857,
|
|
"grad_norm": 0.34723868101669675,
|
|
"learning_rate": 3.292348755044822e-05,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265421986579895,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4744.8,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 2.198142414860681,
|
|
"grad_norm": 0.590357416595514,
|
|
"learning_rate": 3.286757746237069e-05,
|
|
"loss": 0.42,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3052990436553955,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3931.4,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 2.2058823529411766,
|
|
"grad_norm": 0.3328997389836525,
|
|
"learning_rate": 3.281149526285838e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753206491470337,
|
|
"step": 1425,
|
|
"valid_targets_mean": 5010.2,
|
|
"valid_targets_min": 2642
|
|
},
|
|
{
|
|
"epoch": 2.2136222910216716,
|
|
"grad_norm": 0.8104802106394998,
|
|
"learning_rate": 3.2755241702043804e-05,
|
|
"loss": 0.4274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5476750135421753,
|
|
"step": 1430,
|
|
"valid_targets_mean": 812.3,
|
|
"valid_targets_min": 129
|
|
},
|
|
{
|
|
"epoch": 2.221362229102167,
|
|
"grad_norm": 0.27975206121231494,
|
|
"learning_rate": 3.269881753235147e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30727869272232056,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5991.4,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 2.2291021671826625,
|
|
"grad_norm": 0.2612526934687287,
|
|
"learning_rate": 3.264222350848794e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31010913848876953,
|
|
"step": 1440,
|
|
"valid_targets_mean": 5973.2,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 2.236842105263158,
|
|
"grad_norm": 0.2455578408806073,
|
|
"learning_rate": 3.258546038743163e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30132877826690674,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5461.6,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 2.2445820433436534,
|
|
"grad_norm": 0.21483936544276544,
|
|
"learning_rate": 3.2528528928422746e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3172188997268677,
|
|
"step": 1450,
|
|
"valid_targets_mean": 5406.7,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 2.2523219814241484,
|
|
"grad_norm": 0.2090607792408986,
|
|
"learning_rate": 3.247142989295314e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2924061715602875,
|
|
"step": 1455,
|
|
"valid_targets_mean": 5525.3,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 2.260061919504644,
|
|
"grad_norm": 0.20293147819430635,
|
|
"learning_rate": 3.241416404475604e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30801934003829956,
|
|
"step": 1460,
|
|
"valid_targets_mean": 5494.2,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 2.2678018575851393,
|
|
"grad_norm": 0.20223727748723483,
|
|
"learning_rate": 3.235673214979596e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3055727779865265,
|
|
"step": 1465,
|
|
"valid_targets_mean": 5719.7,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 2.2755417956656347,
|
|
"grad_norm": 0.2244427524352248,
|
|
"learning_rate": 3.229913497625834e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3200482130050659,
|
|
"step": 1470,
|
|
"valid_targets_mean": 5339.7,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 2.28328173374613,
|
|
"grad_norm": 0.20117483244600837,
|
|
"learning_rate": 3.2241373294539335e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31746959686279297,
|
|
"step": 1475,
|
|
"valid_targets_mean": 5534.7,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 2.291021671826625,
|
|
"grad_norm": 0.19763289332027806,
|
|
"learning_rate": 3.218344787723549e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30094313621520996,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5558.2,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 2.2987616099071206,
|
|
"grad_norm": 0.22053499248067207,
|
|
"learning_rate": 3.212535949913342e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002566397190094,
|
|
"step": 1485,
|
|
"valid_targets_mean": 5328.4,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 2.306501547987616,
|
|
"grad_norm": 0.2018240685164142,
|
|
"learning_rate": 3.206710893719941e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.299068808555603,
|
|
"step": 1490,
|
|
"valid_targets_mean": 5696.6,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 2.3142414860681115,
|
|
"grad_norm": 0.19839043053686353,
|
|
"learning_rate": 3.200869697056909e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29391247034072876,
|
|
"step": 1495,
|
|
"valid_targets_mean": 5787.3,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 2.321981424148607,
|
|
"grad_norm": 0.2100964895221913,
|
|
"learning_rate": 3.195012438053694e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3001542091369629,
|
|
"step": 1500,
|
|
"valid_targets_mean": 5574.5,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 2.329721362229102,
|
|
"grad_norm": 0.19379539690676625,
|
|
"learning_rate": 3.189139195054589e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29790928959846497,
|
|
"step": 1505,
|
|
"valid_targets_mean": 5946.5,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 2.3374613003095974,
|
|
"grad_norm": 0.18323635931003598,
|
|
"learning_rate": 3.183250046617681e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967008352279663,
|
|
"step": 1510,
|
|
"valid_targets_mean": 6433.6,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 2.345201238390093,
|
|
"grad_norm": 1.5138239584186803,
|
|
"learning_rate": 3.177345071513802e-05,
|
|
"loss": 0.3491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5346885919570923,
|
|
"step": 1515,
|
|
"valid_targets_mean": 1094.5,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 2.3529411764705883,
|
|
"grad_norm": 0.2210286473923357,
|
|
"learning_rate": 3.171424348725477e-05,
|
|
"loss": 0.3629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2890721559524536,
|
|
"step": 1520,
|
|
"valid_targets_mean": 6291.4,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 2.3606811145510838,
|
|
"grad_norm": 0.5329824300505085,
|
|
"learning_rate": 3.165487957445862e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4229048490524292,
|
|
"step": 1525,
|
|
"valid_targets_mean": 1675.7,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 2.3684210526315788,
|
|
"grad_norm": 0.21744773050553187,
|
|
"learning_rate": 3.1595359770776916e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2937527000904083,
|
|
"step": 1530,
|
|
"valid_targets_mean": 6662.4,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 2.376160990712074,
|
|
"grad_norm": 0.345960698993302,
|
|
"learning_rate": 3.153568487232211e-05,
|
|
"loss": 0.3755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3933253288269043,
|
|
"step": 1535,
|
|
"valid_targets_mean": 2726.5,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 2.3839009287925697,
|
|
"grad_norm": 0.21706802905185751,
|
|
"learning_rate": 3.1475855677281166e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28911012411117554,
|
|
"step": 1540,
|
|
"valid_targets_mean": 6219.8,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 2.391640866873065,
|
|
"grad_norm": 0.6388643057959812,
|
|
"learning_rate": 3.1415872985904834e-05,
|
|
"loss": 0.4203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5680261850357056,
|
|
"step": 1545,
|
|
"valid_targets_mean": 844.2,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 2.3993808049535605,
|
|
"grad_norm": 0.22217536868300833,
|
|
"learning_rate": 3.135573760049697e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30481141805648804,
|
|
"step": 1550,
|
|
"valid_targets_mean": 5912.4,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 2.4071207430340555,
|
|
"grad_norm": 0.4043310355740823,
|
|
"learning_rate": 3.1295450325403816e-05,
|
|
"loss": 0.3799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41852450370788574,
|
|
"step": 1555,
|
|
"valid_targets_mean": 1662.7,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 2.414860681114551,
|
|
"grad_norm": 0.21201836882611183,
|
|
"learning_rate": 3.123501196700321e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790394425392151,
|
|
"step": 1560,
|
|
"valid_targets_mean": 6104.8,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 2.4226006191950464,
|
|
"grad_norm": 0.34613568571863146,
|
|
"learning_rate": 3.1174423333693834e-05,
|
|
"loss": 0.4175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.317615807056427,
|
|
"step": 1565,
|
|
"valid_targets_mean": 2831.9,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 2.430340557275542,
|
|
"grad_norm": 0.22332188953827645,
|
|
"learning_rate": 3.111368523588438e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29257962107658386,
|
|
"step": 1570,
|
|
"valid_targets_mean": 6694.6,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 2.4380804953560373,
|
|
"grad_norm": 0.2971841129235766,
|
|
"learning_rate": 3.1052798485982704e-05,
|
|
"loss": 0.4165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32863613963127136,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3286.7,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 2.4458204334365323,
|
|
"grad_norm": 0.24010556902131083,
|
|
"learning_rate": 3.099176389838499e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969801127910614,
|
|
"step": 1580,
|
|
"valid_targets_mean": 6532.3,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 2.4535603715170278,
|
|
"grad_norm": 0.8368878018350674,
|
|
"learning_rate": 3.0930582289464845e-05,
|
|
"loss": 0.467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5269646048545837,
|
|
"step": 1585,
|
|
"valid_targets_mean": 1694.4,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 2.461300309597523,
|
|
"grad_norm": 0.3852472209356564,
|
|
"learning_rate": 3.086925447756233e-05,
|
|
"loss": 0.5116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5252423286437988,
|
|
"step": 1590,
|
|
"valid_targets_mean": 2654.1,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 2.4690402476780187,
|
|
"grad_norm": 0.4185079049384377,
|
|
"learning_rate": 3.0807781282973116e-05,
|
|
"loss": 0.5042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4889848530292511,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2251.2,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 2.476780185758514,
|
|
"grad_norm": 0.3251892587321371,
|
|
"learning_rate": 3.0746163527937394e-05,
|
|
"loss": 0.5057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5050521492958069,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3427.8,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 2.484520123839009,
|
|
"grad_norm": 0.27763604764013067,
|
|
"learning_rate": 3.068440203662897e-05,
|
|
"loss": 0.4862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48870787024497986,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3676.3,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 2.4922600619195046,
|
|
"grad_norm": 0.2978423856246572,
|
|
"learning_rate": 3.062249763514423e-05,
|
|
"loss": 0.4949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4745335876941681,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3749.4,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.28535884423577484,
|
|
"learning_rate": 3.0560451151491015e-05,
|
|
"loss": 0.4777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46801337599754333,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3321.1,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 2.5077399380804954,
|
|
"grad_norm": 0.25573524955054744,
|
|
"learning_rate": 3.049826341557762e-05,
|
|
"loss": 0.4907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46471384167671204,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3494.8,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 2.515479876160991,
|
|
"grad_norm": 0.2704098081694883,
|
|
"learning_rate": 3.0435935259201694e-05,
|
|
"loss": 0.4928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49206823110580444,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3470.5,
|
|
"valid_targets_min": 217
|
|
},
|
|
{
|
|
"epoch": 2.523219814241486,
|
|
"grad_norm": 0.3613891949031483,
|
|
"learning_rate": 3.0373467516039064e-05,
|
|
"loss": 0.4827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.504246175289154,
|
|
"step": 1630,
|
|
"valid_targets_mean": 2027.0,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 2.5309597523219813,
|
|
"grad_norm": 0.2993399465249272,
|
|
"learning_rate": 3.0310861021632623e-05,
|
|
"loss": 0.4876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47332870960235596,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2672.5,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 2.538699690402477,
|
|
"grad_norm": 0.28570519167825026,
|
|
"learning_rate": 3.0248116613381158e-05,
|
|
"loss": 0.4783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48622676730155945,
|
|
"step": 1640,
|
|
"valid_targets_mean": 3140.0,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 2.5464396284829722,
|
|
"grad_norm": 0.260459678455228,
|
|
"learning_rate": 3.0185235130528095e-05,
|
|
"loss": 0.4856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4773997664451599,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3984.5,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 2.5541795665634677,
|
|
"grad_norm": 0.2358431695714774,
|
|
"learning_rate": 3.012221741415036e-05,
|
|
"loss": 0.4695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.453387975692749,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3525.9,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 2.5619195046439627,
|
|
"grad_norm": 0.23437801931318308,
|
|
"learning_rate": 3.0059064307147062e-05,
|
|
"loss": 0.4822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4606804847717285,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4537.2,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 2.569659442724458,
|
|
"grad_norm": 0.37134303269159846,
|
|
"learning_rate": 2.9995776654228234e-05,
|
|
"loss": 0.6226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6754633188247681,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3944.9,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 2.5773993808049536,
|
|
"grad_norm": 0.3408131034808108,
|
|
"learning_rate": 2.993235530190354e-05,
|
|
"loss": 0.6146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7503311634063721,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3327.8,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 2.585139318885449,
|
|
"grad_norm": 0.2706082871505943,
|
|
"learning_rate": 2.9868801098470962e-05,
|
|
"loss": 0.6142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6656273007392883,
|
|
"step": 1670,
|
|
"valid_targets_mean": 4198.4,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 2.5928792569659445,
|
|
"grad_norm": 0.27419658766605415,
|
|
"learning_rate": 2.9805114894005453e-05,
|
|
"loss": 0.5969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5934143662452698,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3893.0,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 2.6006191950464395,
|
|
"grad_norm": 0.2950157731334292,
|
|
"learning_rate": 2.9741297540347523e-05,
|
|
"loss": 0.6108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6103219985961914,
|
|
"step": 1680,
|
|
"valid_targets_mean": 3704.1,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 2.608359133126935,
|
|
"grad_norm": 0.29152776400220365,
|
|
"learning_rate": 2.9677349891091914e-05,
|
|
"loss": 0.6054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5810167789459229,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3793.6,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 2.6160990712074303,
|
|
"grad_norm": 0.27885705902361735,
|
|
"learning_rate": 2.9613272801576135e-05,
|
|
"loss": 0.5971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5746978521347046,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3647.5,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 2.623839009287926,
|
|
"grad_norm": 0.27064381881971133,
|
|
"learning_rate": 2.954906712886903e-05,
|
|
"loss": 0.5939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6130411028862,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3801.7,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 2.6315789473684212,
|
|
"grad_norm": 0.23362859956976426,
|
|
"learning_rate": 2.9484733731759328e-05,
|
|
"loss": 0.5929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5757726430892944,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4259.5,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 2.6393188854489162,
|
|
"grad_norm": 0.24831392611460615,
|
|
"learning_rate": 2.9420273470744157e-05,
|
|
"loss": 0.595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.60444575548172,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4118.3,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 2.6470588235294117,
|
|
"grad_norm": 0.23418555573502448,
|
|
"learning_rate": 2.9355687208017487e-05,
|
|
"loss": 0.5932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5817404985427856,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4160.2,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 2.654798761609907,
|
|
"grad_norm": 0.22545564376461213,
|
|
"learning_rate": 2.9290975807458673e-05,
|
|
"loss": 0.6084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5721398591995239,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4790.0,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 2.6625386996904026,
|
|
"grad_norm": 0.21078992018367956,
|
|
"learning_rate": 2.9226140134620848e-05,
|
|
"loss": 0.5938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5598536729812622,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4677.6,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 2.670278637770898,
|
|
"grad_norm": 0.22657822343606848,
|
|
"learning_rate": 2.916118105671936e-05,
|
|
"loss": 0.6078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5606298446655273,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3947.1,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 2.678018575851393,
|
|
"grad_norm": 0.3844171308380163,
|
|
"learning_rate": 2.9096099442620175e-05,
|
|
"loss": 0.6036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6283884048461914,
|
|
"step": 1730,
|
|
"valid_targets_mean": 2494.4,
|
|
"valid_targets_min": 400
|
|
},
|
|
{
|
|
"epoch": 2.6857585139318885,
|
|
"grad_norm": 0.41712002977487755,
|
|
"learning_rate": 2.9030896162828246e-05,
|
|
"loss": 0.5127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5305888652801514,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3803.9,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 2.693498452012384,
|
|
"grad_norm": 0.32166115956174823,
|
|
"learning_rate": 2.8965572089475884e-05,
|
|
"loss": 0.4985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5770869255065918,
|
|
"step": 1740,
|
|
"valid_targets_mean": 2460.7,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 2.7012383900928794,
|
|
"grad_norm": 0.258440678973098,
|
|
"learning_rate": 2.8900128096311083e-05,
|
|
"loss": 0.5099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5238660573959351,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3748.4,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 2.708978328173375,
|
|
"grad_norm": 0.33202507463884656,
|
|
"learning_rate": 2.8834565058685835e-05,
|
|
"loss": 0.4997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6142587065696716,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2391.1,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 2.71671826625387,
|
|
"grad_norm": 0.27643948174422,
|
|
"learning_rate": 2.876888385354442e-05,
|
|
"loss": 0.4904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5044270753860474,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3659.7,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 2.7244582043343653,
|
|
"grad_norm": 0.2668274942477383,
|
|
"learning_rate": 2.870308535941168e-05,
|
|
"loss": 0.4903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5903304219245911,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3227.0,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 2.7321981424148607,
|
|
"grad_norm": 0.2701999564070377,
|
|
"learning_rate": 2.8637170456381273e-05,
|
|
"loss": 0.4978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5087469220161438,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3305.4,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 2.739938080495356,
|
|
"grad_norm": 0.26460285603564915,
|
|
"learning_rate": 2.857114002610388e-05,
|
|
"loss": 0.4918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.555443286895752,
|
|
"step": 1770,
|
|
"valid_targets_mean": 3149.7,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 2.7476780185758516,
|
|
"grad_norm": 0.24516865825694797,
|
|
"learning_rate": 2.8504994951775428e-05,
|
|
"loss": 0.4832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5106723308563232,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3709.8,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 2.7554179566563466,
|
|
"grad_norm": 0.25820869540442093,
|
|
"learning_rate": 2.84387361181253e-05,
|
|
"loss": 0.4903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5207809209823608,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3436.5,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 2.763157894736842,
|
|
"grad_norm": 0.23167125014408785,
|
|
"learning_rate": 2.8372364411404446e-05,
|
|
"loss": 0.4991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5310456156730652,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4309.8,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.7708978328173375,
|
|
"grad_norm": 0.27139534885971545,
|
|
"learning_rate": 2.8305880719373588e-05,
|
|
"loss": 0.4901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5053688883781433,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3643.2,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 2.778637770897833,
|
|
"grad_norm": 0.27865068722857134,
|
|
"learning_rate": 2.8239285931291287e-05,
|
|
"loss": 0.4914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5141915678977966,
|
|
"step": 1795,
|
|
"valid_targets_mean": 3911.8,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 2.7863777089783284,
|
|
"grad_norm": 0.23277472280633252,
|
|
"learning_rate": 2.8172580937902104e-05,
|
|
"loss": 0.4832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5116044878959656,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3986.0,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 2.7941176470588234,
|
|
"grad_norm": 0.2389418404705503,
|
|
"learning_rate": 2.810576663142465e-05,
|
|
"loss": 0.5022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.499719500541687,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4058.6,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 2.801857585139319,
|
|
"grad_norm": 0.24063241400005703,
|
|
"learning_rate": 2.8038843905539662e-05,
|
|
"loss": 0.487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47190621495246887,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3589.4,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 2.8095975232198143,
|
|
"grad_norm": 0.25893509435646783,
|
|
"learning_rate": 2.7971813655378056e-05,
|
|
"loss": 0.4911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4986731708049774,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3394.9,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 2.8173374613003097,
|
|
"grad_norm": 0.2707819972143254,
|
|
"learning_rate": 2.7904676777508952e-05,
|
|
"loss": 0.5619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5266321897506714,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3667.1,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 2.825077399380805,
|
|
"grad_norm": 0.2621859396364079,
|
|
"learning_rate": 2.7837434169927662e-05,
|
|
"loss": 0.4901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.509067952632904,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3378.0,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 2.8328173374613,
|
|
"grad_norm": 0.29154239057914144,
|
|
"learning_rate": 2.7770086732043714e-05,
|
|
"loss": 0.5769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.527146577835083,
|
|
"step": 1830,
|
|
"valid_targets_mean": 2999.8,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 2.8405572755417956,
|
|
"grad_norm": 0.2662608665524818,
|
|
"learning_rate": 2.7702635364668792e-05,
|
|
"loss": 0.487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5003966093063354,
|
|
"step": 1835,
|
|
"valid_targets_mean": 3465.6,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 2.848297213622291,
|
|
"grad_norm": 0.3577813591653784,
|
|
"learning_rate": 2.7635080970004707e-05,
|
|
"loss": 0.5317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5860791802406311,
|
|
"step": 1840,
|
|
"valid_targets_mean": 2587.6,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 2.8560371517027865,
|
|
"grad_norm": 0.30949967622057983,
|
|
"learning_rate": 2.7567424451631305e-05,
|
|
"loss": 0.4842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5043954849243164,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3274.3,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 2.863777089783282,
|
|
"grad_norm": 0.34789083654937797,
|
|
"learning_rate": 2.7499666714494413e-05,
|
|
"loss": 0.5246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6251345276832581,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2213.1,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 2.871517027863777,
|
|
"grad_norm": 0.27157818345514895,
|
|
"learning_rate": 2.7431808664893717e-05,
|
|
"loss": 0.4875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48893094062805176,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3187.1,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 2.8792569659442724,
|
|
"grad_norm": 0.5298709464965203,
|
|
"learning_rate": 2.736385121047062e-05,
|
|
"loss": 0.5762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9096724390983582,
|
|
"step": 1860,
|
|
"valid_targets_mean": 1165.0,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 2.886996904024768,
|
|
"grad_norm": 0.2749016246602086,
|
|
"learning_rate": 2.7295795260196158e-05,
|
|
"loss": 0.4833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4703792631626129,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3309.6,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 2.8947368421052633,
|
|
"grad_norm": 0.567256773685926,
|
|
"learning_rate": 2.7227641724358784e-05,
|
|
"loss": 0.5513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9056364297866821,
|
|
"step": 1870,
|
|
"valid_targets_mean": 1005.5,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 2.9024767801857587,
|
|
"grad_norm": 0.256707672760419,
|
|
"learning_rate": 2.715939151455222e-05,
|
|
"loss": 0.4974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46790891885757446,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3382.6,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 2.9102167182662537,
|
|
"grad_norm": 0.4069069061124025,
|
|
"learning_rate": 2.709104554366325e-05,
|
|
"loss": 0.5021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.62578284740448,
|
|
"step": 1880,
|
|
"valid_targets_mean": 1660.4,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 2.917956656346749,
|
|
"grad_norm": 0.27219624417713606,
|
|
"learning_rate": 2.702260472585954e-05,
|
|
"loss": 0.4981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4624258279800415,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3356.6,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 2.9256965944272446,
|
|
"grad_norm": 0.3472972243010068,
|
|
"learning_rate": 2.6954069976577382e-05,
|
|
"loss": 0.4983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.583042323589325,
|
|
"step": 1890,
|
|
"valid_targets_mean": 2242.9,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 2.93343653250774,
|
|
"grad_norm": 0.7399054747979198,
|
|
"learning_rate": 2.6885442212509433e-05,
|
|
"loss": 0.3424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29441481828689575,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4536.9,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 2.9411764705882355,
|
|
"grad_norm": 0.3893940609364208,
|
|
"learning_rate": 2.681672235159252e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24782559275627136,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4706.6,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 2.9489164086687305,
|
|
"grad_norm": 0.35358777717340467,
|
|
"learning_rate": 2.6747911312995288e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21790900826454163,
|
|
"step": 1905,
|
|
"valid_targets_mean": 5179.4,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 2.956656346749226,
|
|
"grad_norm": 0.3383674658067145,
|
|
"learning_rate": 2.6679010017105943e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24186939001083374,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4519.4,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 2.9643962848297214,
|
|
"grad_norm": 0.3758217025273845,
|
|
"learning_rate": 2.6610019385519937e-05,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24426902830600739,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4008.1,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 2.972136222910217,
|
|
"grad_norm": 0.39829654305760687,
|
|
"learning_rate": 2.654094034102766e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24655434489250183,
|
|
"step": 1920,
|
|
"valid_targets_mean": 4820.1,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 2.9798761609907123,
|
|
"grad_norm": 0.586412451248822,
|
|
"learning_rate": 2.647177380760203e-05,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24943487346172333,
|
|
"step": 1925,
|
|
"valid_targets_mean": 2952.3,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 2.9876160990712073,
|
|
"grad_norm": 0.3992478671522089,
|
|
"learning_rate": 2.6402520710386236e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23647047579288483,
|
|
"step": 1930,
|
|
"valid_targets_mean": 2867.5,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 2.9953560371517027,
|
|
"grad_norm": 0.2848537134364877,
|
|
"learning_rate": 2.633318197568127e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21147334575653076,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3687.5,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 3.003095975232198,
|
|
"grad_norm": 0.7434059358374174,
|
|
"learning_rate": 2.6263758530933586e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2701603174209595,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3785.3,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 3.0108359133126936,
|
|
"grad_norm": 0.4718821721818314,
|
|
"learning_rate": 2.6194251304722688e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608640491962433,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3751.2,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 3.018575851393189,
|
|
"grad_norm": 0.3281933384271648,
|
|
"learning_rate": 2.6124661226748708e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24726948142051697,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3883.6,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 3.026315789473684,
|
|
"grad_norm": 0.2805179987080011,
|
|
"learning_rate": 2.6054989227819967e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24924269318580627,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3835.6,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 3.0340557275541795,
|
|
"grad_norm": 0.2676943300602903,
|
|
"learning_rate": 2.5985236239840525e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24992302060127258,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3575.0,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 3.041795665634675,
|
|
"grad_norm": 0.2238207530657882,
|
|
"learning_rate": 2.591540319579771e-05,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24594703316688538,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3773.7,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 3.0495356037151704,
|
|
"grad_norm": 0.2435124161246869,
|
|
"learning_rate": 2.5845491029749678e-05,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23646098375320435,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3777.3,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 3.057275541795666,
|
|
"grad_norm": 0.20981205870058817,
|
|
"learning_rate": 2.577550067681285e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23276078701019287,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3814.9,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 3.065015479876161,
|
|
"grad_norm": 0.21968601516898223,
|
|
"learning_rate": 2.5705433073149464e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2418629229068756,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3898.8,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 3.0727554179566563,
|
|
"grad_norm": 0.22126041507586686,
|
|
"learning_rate": 2.5635289155955034e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23148900270462036,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3729.5,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 3.0804953560371517,
|
|
"grad_norm": 0.21624805673651615,
|
|
"learning_rate": 2.5565069863445802e-05,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24025356769561768,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3804.0,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 3.088235294117647,
|
|
"grad_norm": 0.37540814041745507,
|
|
"learning_rate": 2.54947761348462e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2857537865638733,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4964.8,
|
|
"valid_targets_min": 2437
|
|
},
|
|
{
|
|
"epoch": 3.0959752321981426,
|
|
"grad_norm": 0.26180484065347887,
|
|
"learning_rate": 2.5424408910376296e-05,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26855945587158203,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4907.5,
|
|
"valid_targets_min": 3098
|
|
},
|
|
{
|
|
"epoch": 3.1037151702786376,
|
|
"grad_norm": 0.5508976321829111,
|
|
"learning_rate": 2.5353969131239204e-05,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704641819000244,
|
|
"step": 2005,
|
|
"valid_targets_mean": 4839.3,
|
|
"valid_targets_min": 2419
|
|
},
|
|
{
|
|
"epoch": 3.111455108359133,
|
|
"grad_norm": 0.27206305112570145,
|
|
"learning_rate": 2.5283457739608494e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2743394076824188,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4763.2,
|
|
"valid_targets_min": 2304
|
|
},
|
|
{
|
|
"epoch": 3.1191950464396285,
|
|
"grad_norm": 0.3853955993345269,
|
|
"learning_rate": 2.5212875678615603e-05,
|
|
"loss": 0.3686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695494294166565,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4746.2,
|
|
"valid_targets_min": 2753
|
|
},
|
|
{
|
|
"epoch": 3.126934984520124,
|
|
"grad_norm": 0.2576410634917597,
|
|
"learning_rate": 2.5142223892337207e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25809624791145325,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4622.8,
|
|
"valid_targets_min": 2830
|
|
},
|
|
{
|
|
"epoch": 3.1346749226006194,
|
|
"grad_norm": 0.5319181678962628,
|
|
"learning_rate": 2.50715033257826e-05,
|
|
"loss": 0.4212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828100323677063,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4741.5,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 3.1424148606811144,
|
|
"grad_norm": 0.3082649435055225,
|
|
"learning_rate": 2.5000714924881056e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263888955116272,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4781.2,
|
|
"valid_targets_min": 3327
|
|
},
|
|
{
|
|
"epoch": 3.15015479876161,
|
|
"grad_norm": 0.48673364500410554,
|
|
"learning_rate": 2.492985963646917e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38943415880203247,
|
|
"step": 2035,
|
|
"valid_targets_mean": 1474.7,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 3.1578947368421053,
|
|
"grad_norm": 0.32308036400484774,
|
|
"learning_rate": 2.48589384082782e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2712411880493164,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4764.6,
|
|
"valid_targets_min": 2478
|
|
},
|
|
{
|
|
"epoch": 3.1656346749226008,
|
|
"grad_norm": 1.0188946811199884,
|
|
"learning_rate": 2.4787952188921384e-05,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5174533128738403,
|
|
"step": 2045,
|
|
"valid_targets_mean": 905.6,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 3.173374613003096,
|
|
"grad_norm": 0.3087682849097361,
|
|
"learning_rate": 2.4716901927881253e-05,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25654029846191406,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4740.0,
|
|
"valid_targets_min": 2461
|
|
},
|
|
{
|
|
"epoch": 3.181114551083591,
|
|
"grad_norm": 0.6735138689902527,
|
|
"learning_rate": 2.464578857549694e-05,
|
|
"loss": 0.3613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46948477625846863,
|
|
"step": 2055,
|
|
"valid_targets_mean": 878.2,
|
|
"valid_targets_min": 159
|
|
},
|
|
{
|
|
"epoch": 3.1888544891640866,
|
|
"grad_norm": 0.298237658260617,
|
|
"learning_rate": 2.4574613082951464e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2622295618057251,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4672.1,
|
|
"valid_targets_min": 2558
|
|
},
|
|
{
|
|
"epoch": 3.196594427244582,
|
|
"grad_norm": 0.610108048223464,
|
|
"learning_rate": 2.4503376402258983e-05,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45512655377388,
|
|
"step": 2065,
|
|
"valid_targets_mean": 998.0,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 3.2043343653250775,
|
|
"grad_norm": 0.3010919949822854,
|
|
"learning_rate": 2.4432079486252123e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26082831621170044,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4944.3,
|
|
"valid_targets_min": 3172
|
|
},
|
|
{
|
|
"epoch": 3.212074303405573,
|
|
"grad_norm": 0.813400413191084,
|
|
"learning_rate": 2.4360723288569155e-05,
|
|
"loss": 0.3574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45499011874198914,
|
|
"step": 2075,
|
|
"valid_targets_mean": 983.4,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 3.219814241486068,
|
|
"grad_norm": 0.255438363674361,
|
|
"learning_rate": 2.4289308763641293e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29936984181404114,
|
|
"step": 2080,
|
|
"valid_targets_mean": 5441.6,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 3.2275541795665634,
|
|
"grad_norm": 0.2466146241648484,
|
|
"learning_rate": 2.421783686667992e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31255465745925903,
|
|
"step": 2085,
|
|
"valid_targets_mean": 5756.2,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 3.235294117647059,
|
|
"grad_norm": 0.23160487038615185,
|
|
"learning_rate": 2.4146308553663782e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28331291675567627,
|
|
"step": 2090,
|
|
"valid_targets_mean": 5889.1,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 3.2430340557275543,
|
|
"grad_norm": 0.2158686429382094,
|
|
"learning_rate": 2.4074724781326237e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3194703459739685,
|
|
"step": 2095,
|
|
"valid_targets_mean": 5961.3,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 3.2507739938080498,
|
|
"grad_norm": 0.19636858657804998,
|
|
"learning_rate": 2.4003086507142453e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29831090569496155,
|
|
"step": 2100,
|
|
"valid_targets_mean": 5714.0,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 3.2585139318885448,
|
|
"grad_norm": 0.2134245684220827,
|
|
"learning_rate": 2.3931394689316574e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30476224422454834,
|
|
"step": 2105,
|
|
"valid_targets_mean": 5337.6,
|
|
"valid_targets_min": 408
|
|
},
|
|
{
|
|
"epoch": 3.26625386996904,
|
|
"grad_norm": 0.21687039038000902,
|
|
"learning_rate": 2.3859650286768928e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968940734863281,
|
|
"step": 2110,
|
|
"valid_targets_mean": 5438.8,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 3.2739938080495357,
|
|
"grad_norm": 0.1992242859348089,
|
|
"learning_rate": 2.3787854259123205e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2977619767189026,
|
|
"step": 2115,
|
|
"valid_targets_mean": 5660.5,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 3.281733746130031,
|
|
"grad_norm": 0.20016910827872322,
|
|
"learning_rate": 2.37160075666936e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29510340094566345,
|
|
"step": 2120,
|
|
"valid_targets_mean": 5774.7,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 3.2894736842105265,
|
|
"grad_norm": 0.1999478791141014,
|
|
"learning_rate": 2.364411117047198e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2847527861595154,
|
|
"step": 2125,
|
|
"valid_targets_mean": 5829.5,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 3.2972136222910216,
|
|
"grad_norm": 0.2011987204029175,
|
|
"learning_rate": 2.3572166032115032e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768564820289612,
|
|
"step": 2130,
|
|
"valid_targets_mean": 5642.9,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 3.304953560371517,
|
|
"grad_norm": 0.19052463472059494,
|
|
"learning_rate": 2.3500173113931402e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861713767051697,
|
|
"step": 2135,
|
|
"valid_targets_mean": 6218.2,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 3.3126934984520124,
|
|
"grad_norm": 0.22120912941066947,
|
|
"learning_rate": 2.342813337886881e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.290706068277359,
|
|
"step": 2140,
|
|
"valid_targets_mean": 5509.6,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 3.320433436532508,
|
|
"grad_norm": 0.20261828824401432,
|
|
"learning_rate": 2.335604779050118e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736980617046356,
|
|
"step": 2145,
|
|
"valid_targets_mean": 5951.1,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 3.3281733746130033,
|
|
"grad_norm": 0.1977503756556589,
|
|
"learning_rate": 2.3283917313015766e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27933841943740845,
|
|
"step": 2150,
|
|
"valid_targets_mean": 5832.3,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 3.3359133126934983,
|
|
"grad_norm": 0.19985067530191788,
|
|
"learning_rate": 2.3211742911200225e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866722643375397,
|
|
"step": 2155,
|
|
"valid_targets_mean": 6418.6,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 3.343653250773994,
|
|
"grad_norm": 0.17992273660540387,
|
|
"learning_rate": 2.313952555042973e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856246829032898,
|
|
"step": 2160,
|
|
"valid_targets_mean": 6394.8,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 3.3513931888544892,
|
|
"grad_norm": 0.2368022920562518,
|
|
"learning_rate": 2.3067266196654056e-05,
|
|
"loss": 0.3912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28423428535461426,
|
|
"step": 2165,
|
|
"valid_targets_mean": 6349.0,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 3.3591331269349847,
|
|
"grad_norm": 0.18583384119467142,
|
|
"learning_rate": 2.2994965816384673e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751348614692688,
|
|
"step": 2170,
|
|
"valid_targets_mean": 6417.1,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 3.36687306501548,
|
|
"grad_norm": 0.19483535938648608,
|
|
"learning_rate": 2.2922625376681777e-05,
|
|
"loss": 0.3736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2895943522453308,
|
|
"step": 2175,
|
|
"valid_targets_mean": 7150.5,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 3.374613003095975,
|
|
"grad_norm": 0.7654681516995816,
|
|
"learning_rate": 2.2850245845141404e-05,
|
|
"loss": 0.3429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5419661402702332,
|
|
"step": 2180,
|
|
"valid_targets_mean": 834.6,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 3.3823529411764706,
|
|
"grad_norm": 0.18790728727742792,
|
|
"learning_rate": 2.2777828189882466e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29097840189933777,
|
|
"step": 2185,
|
|
"valid_targets_mean": 6313.1,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 3.390092879256966,
|
|
"grad_norm": 0.834044396270399,
|
|
"learning_rate": 2.2705373379533787e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.551470160484314,
|
|
"step": 2190,
|
|
"valid_targets_mean": 796.4,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 3.3978328173374615,
|
|
"grad_norm": 0.19950524934444502,
|
|
"learning_rate": 2.263288238322118e-05,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2748904824256897,
|
|
"step": 2195,
|
|
"valid_targets_mean": 6093.4,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 3.405572755417957,
|
|
"grad_norm": 0.7105021188330463,
|
|
"learning_rate": 2.256035617055446e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5855259895324707,
|
|
"step": 2200,
|
|
"valid_targets_mean": 830.1,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 3.413312693498452,
|
|
"grad_norm": 0.19169772199526086,
|
|
"learning_rate": 2.2487795711614468e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27826640009880066,
|
|
"step": 2205,
|
|
"valid_targets_mean": 6361.8,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 3.4210526315789473,
|
|
"grad_norm": 0.6382550672700259,
|
|
"learning_rate": 2.241520197694013e-05,
|
|
"loss": 0.3949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5455023050308228,
|
|
"step": 2210,
|
|
"valid_targets_mean": 802.3,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 3.428792569659443,
|
|
"grad_norm": 0.19658403930227733,
|
|
"learning_rate": 2.2342575937515446e-05,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281864196062088,
|
|
"step": 2215,
|
|
"valid_targets_mean": 6045.7,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 3.4365325077399382,
|
|
"grad_norm": 0.6910818969014576,
|
|
"learning_rate": 2.2269918564756505e-05,
|
|
"loss": 0.3949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5533510446548462,
|
|
"step": 2220,
|
|
"valid_targets_mean": 778.7,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 3.4442724458204337,
|
|
"grad_norm": 0.20259746728155953,
|
|
"learning_rate": 2.219723083049851e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28305596113204956,
|
|
"step": 2225,
|
|
"valid_targets_mean": 6171.3,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 3.4520123839009287,
|
|
"grad_norm": 0.6559631141416314,
|
|
"learning_rate": 2.2124513706982755e-05,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5851170420646667,
|
|
"step": 2230,
|
|
"valid_targets_mean": 944.8,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 3.459752321981424,
|
|
"grad_norm": 0.36908142151766193,
|
|
"learning_rate": 2.205176816684365e-05,
|
|
"loss": 0.4956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.488143652677536,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3094.3,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.4674922600619196,
|
|
"grad_norm": 0.4670807497096684,
|
|
"learning_rate": 2.1978995183095662e-05,
|
|
"loss": 0.4959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5342972874641418,
|
|
"step": 2240,
|
|
"valid_targets_mean": 1920.0,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 3.475232198142415,
|
|
"grad_norm": 0.3314464750256539,
|
|
"learning_rate": 2.190619572912037e-05,
|
|
"loss": 0.4874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49756696820259094,
|
|
"step": 2245,
|
|
"valid_targets_mean": 2706.2,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 3.4829721362229105,
|
|
"grad_norm": 0.3580749731554559,
|
|
"learning_rate": 2.183337077865338e-05,
|
|
"loss": 0.4747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4797508716583252,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2330.8,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 3.4907120743034055,
|
|
"grad_norm": 0.28216176109838204,
|
|
"learning_rate": 2.1760521305771345e-05,
|
|
"loss": 0.4835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47524362802505493,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3534.0,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 3.498452012383901,
|
|
"grad_norm": 0.2813830030382484,
|
|
"learning_rate": 2.1687648284878912e-05,
|
|
"loss": 0.465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48308515548706055,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3620.2,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 3.5061919504643964,
|
|
"grad_norm": 0.2780766232232125,
|
|
"learning_rate": 2.1614752690695707e-05,
|
|
"loss": 0.4776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45722496509552,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3821.6,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 3.513931888544892,
|
|
"grad_norm": 0.315985804135193,
|
|
"learning_rate": 2.154183549824326e-05,
|
|
"loss": 0.4729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48328864574432373,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3024.7,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 3.5216718266253872,
|
|
"grad_norm": 0.31230075549133257,
|
|
"learning_rate": 2.1468897682832018e-05,
|
|
"loss": 0.4665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4611700177192688,
|
|
"step": 2275,
|
|
"valid_targets_mean": 2763.7,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 3.5294117647058822,
|
|
"grad_norm": 0.3132140616714211,
|
|
"learning_rate": 2.1395940220048276e-05,
|
|
"loss": 0.4797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4537048935890198,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3144.2,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 3.5371517027863777,
|
|
"grad_norm": 0.367466972477817,
|
|
"learning_rate": 2.132296408574109e-05,
|
|
"loss": 0.4617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47464239597320557,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2372.6,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 3.544891640866873,
|
|
"grad_norm": 0.29090930421289685,
|
|
"learning_rate": 2.1249970256009297e-05,
|
|
"loss": 0.4741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4417138695716858,
|
|
"step": 2290,
|
|
"valid_targets_mean": 2830.0,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 3.5526315789473686,
|
|
"grad_norm": 0.28413410844059256,
|
|
"learning_rate": 2.1176959707188396e-05,
|
|
"loss": 0.4605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46339908242225647,
|
|
"step": 2295,
|
|
"valid_targets_mean": 2977.7,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 3.560371517027864,
|
|
"grad_norm": 0.24879077986706072,
|
|
"learning_rate": 2.1103933415837527e-05,
|
|
"loss": 0.4673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4672059118747711,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3759.2,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 3.568111455108359,
|
|
"grad_norm": 0.2948180473459102,
|
|
"learning_rate": 2.1030892358726378e-05,
|
|
"loss": 0.5684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5660414695739746,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4735.6,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 3.5758513931888545,
|
|
"grad_norm": 0.3189716166953591,
|
|
"learning_rate": 2.095783751282216e-05,
|
|
"loss": 0.5881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5425939559936523,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4437.0,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 3.58359133126935,
|
|
"grad_norm": 0.30510770929671405,
|
|
"learning_rate": 2.088476985527651e-05,
|
|
"loss": 0.6181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5556634068489075,
|
|
"step": 2315,
|
|
"valid_targets_mean": 4264.1,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 3.5913312693498454,
|
|
"grad_norm": 0.27864610568546166,
|
|
"learning_rate": 2.0811690363412417e-05,
|
|
"loss": 0.5984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6041171550750732,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3503.9,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 3.599071207430341,
|
|
"grad_norm": 0.2433644608418967,
|
|
"learning_rate": 2.0738600014711175e-05,
|
|
"loss": 0.5942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6303431987762451,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4163.0,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 3.606811145510836,
|
|
"grad_norm": 0.2536236975453713,
|
|
"learning_rate": 2.0665499786799287e-05,
|
|
"loss": 0.5977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6380211114883423,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3816.2,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 3.6145510835913313,
|
|
"grad_norm": 0.2463447157381407,
|
|
"learning_rate": 2.0592390657435395e-05,
|
|
"loss": 0.5855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6047422885894775,
|
|
"step": 2335,
|
|
"valid_targets_mean": 4045.8,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 3.6222910216718267,
|
|
"grad_norm": 0.2810296551323612,
|
|
"learning_rate": 2.0519273604497214e-05,
|
|
"loss": 0.573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5888134241104126,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3426.6,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 3.6300309597523217,
|
|
"grad_norm": 0.2822549978815102,
|
|
"learning_rate": 2.0446149605968426e-05,
|
|
"loss": 0.5879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.56917804479599,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3857.1,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 3.6377708978328176,
|
|
"grad_norm": 0.25038407469356005,
|
|
"learning_rate": 2.037301963992563e-05,
|
|
"loss": 0.5761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6057415008544922,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3702.2,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 3.6455108359133126,
|
|
"grad_norm": 0.2637385459068476,
|
|
"learning_rate": 2.029988468452523e-05,
|
|
"loss": 0.5852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5788705348968506,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4113.9,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 3.653250773993808,
|
|
"grad_norm": 0.23250932401234214,
|
|
"learning_rate": 2.0226745717990375e-05,
|
|
"loss": 0.5969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5905635356903076,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4190.3,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 3.6609907120743035,
|
|
"grad_norm": 0.22672230871354934,
|
|
"learning_rate": 2.0153603718597864e-05,
|
|
"loss": 0.584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5908460021018982,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4339.2,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 3.6687306501547985,
|
|
"grad_norm": 0.2264758690385569,
|
|
"learning_rate": 2.0080459664665063e-05,
|
|
"loss": 0.5949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5943804979324341,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4695.3,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 3.6764705882352944,
|
|
"grad_norm": 0.2216483472115662,
|
|
"learning_rate": 2.00073145345368e-05,
|
|
"loss": 0.5779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5562400221824646,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4633.9,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 3.6842105263157894,
|
|
"grad_norm": 0.2805903497771566,
|
|
"learning_rate": 1.9934169306572335e-05,
|
|
"loss": 0.5222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4724697470664978,
|
|
"step": 2380,
|
|
"valid_targets_mean": 4319.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 3.691950464396285,
|
|
"grad_norm": 0.26022493906126765,
|
|
"learning_rate": 1.98610249591322e-05,
|
|
"loss": 0.48,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4419862627983093,
|
|
"step": 2385,
|
|
"valid_targets_mean": 4809.1,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 3.6996904024767803,
|
|
"grad_norm": 0.2916735939317325,
|
|
"learning_rate": 1.978788247056517e-05,
|
|
"loss": 0.5088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4868018627166748,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3717.0,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 3.7074303405572753,
|
|
"grad_norm": 0.2473675418923464,
|
|
"learning_rate": 1.9714742819195153e-05,
|
|
"loss": 0.471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40800565481185913,
|
|
"step": 2395,
|
|
"valid_targets_mean": 4199.5,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 3.715170278637771,
|
|
"grad_norm": 0.2584806605970158,
|
|
"learning_rate": 1.964160698330811e-05,
|
|
"loss": 0.5009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4500221014022827,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3818.9,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 3.722910216718266,
|
|
"grad_norm": 0.2148138714487101,
|
|
"learning_rate": 1.9568475941138954e-05,
|
|
"loss": 0.4622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42796361446380615,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4357.0,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 3.7306501547987616,
|
|
"grad_norm": 0.25686193533511126,
|
|
"learning_rate": 1.9495350670858496e-05,
|
|
"loss": 0.5026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45598334074020386,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3918.5,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 3.738390092879257,
|
|
"grad_norm": 0.22488876299542093,
|
|
"learning_rate": 1.942223215056034e-05,
|
|
"loss": 0.4716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4457656741142273,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4063.1,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 3.746130030959752,
|
|
"grad_norm": 0.23762505494746253,
|
|
"learning_rate": 1.9349121358247792e-05,
|
|
"loss": 0.4808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43950557708740234,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3417.5,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 3.753869969040248,
|
|
"grad_norm": 0.2550271763615995,
|
|
"learning_rate": 1.9276019271820813e-05,
|
|
"loss": 0.4771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49705398082733154,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3247.6,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 3.761609907120743,
|
|
"grad_norm": 0.23962111299752886,
|
|
"learning_rate": 1.9202926869062905e-05,
|
|
"loss": 0.486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45085281133651733,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3815.3,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 3.7693498452012384,
|
|
"grad_norm": 0.2895378716989859,
|
|
"learning_rate": 1.9129845127628045e-05,
|
|
"loss": 0.4844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4883165955543518,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3516.8,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 3.777089783281734,
|
|
"grad_norm": 0.2492521288157181,
|
|
"learning_rate": 1.9056775025027606e-05,
|
|
"loss": 0.479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4766923189163208,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3548.4,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 3.784829721362229,
|
|
"grad_norm": 0.25876077568959965,
|
|
"learning_rate": 1.8983717538617303e-05,
|
|
"loss": 0.4731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4735720157623291,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3108.8,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 3.7925696594427247,
|
|
"grad_norm": 0.23370550185118139,
|
|
"learning_rate": 1.8910673645584075e-05,
|
|
"loss": 0.4938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5189200043678284,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3956.9,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 3.8003095975232197,
|
|
"grad_norm": 0.25506112483930443,
|
|
"learning_rate": 1.8837644322933064e-05,
|
|
"loss": 0.4817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5111297369003296,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3006.1,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 3.808049535603715,
|
|
"grad_norm": 0.27726916506453947,
|
|
"learning_rate": 1.8764630547474513e-05,
|
|
"loss": 0.4748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4804767668247223,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3219.2,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 3.8157894736842106,
|
|
"grad_norm": 0.5490727080634173,
|
|
"learning_rate": 1.8691633295810708e-05,
|
|
"loss": 0.5434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.900316596031189,
|
|
"step": 2465,
|
|
"valid_targets_mean": 1110.0,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 3.8235294117647056,
|
|
"grad_norm": 0.2553628403736818,
|
|
"learning_rate": 1.8618653544322938e-05,
|
|
"loss": 0.4831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4841254949569702,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3346.5,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 3.8312693498452015,
|
|
"grad_norm": 0.5562141431818495,
|
|
"learning_rate": 1.85456922691584e-05,
|
|
"loss": 0.5602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.931420087814331,
|
|
"step": 2475,
|
|
"valid_targets_mean": 1103.4,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 3.8390092879256965,
|
|
"grad_norm": 0.24114916406938958,
|
|
"learning_rate": 1.8472750446217164e-05,
|
|
"loss": 0.4815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4636272192001343,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3436.7,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 3.846749226006192,
|
|
"grad_norm": 0.4285222750948915,
|
|
"learning_rate": 1.839982905113911e-05,
|
|
"loss": 0.5031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6630858778953552,
|
|
"step": 2485,
|
|
"valid_targets_mean": 1655.0,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 3.8544891640866874,
|
|
"grad_norm": 0.2429759992882951,
|
|
"learning_rate": 1.8326929059290895e-05,
|
|
"loss": 0.4897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43966197967529297,
|
|
"step": 2490,
|
|
"valid_targets_mean": 3273.7,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 3.8622291021671824,
|
|
"grad_norm": 0.32656088378879544,
|
|
"learning_rate": 1.825405144575289e-05,
|
|
"loss": 0.4893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5776164531707764,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2103.5,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 3.8699690402476783,
|
|
"grad_norm": 0.2596741895049066,
|
|
"learning_rate": 1.818119718530614e-05,
|
|
"loss": 0.503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4582991302013397,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3329.1,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 3.8777089783281733,
|
|
"grad_norm": 0.3964909687145568,
|
|
"learning_rate": 1.8108367252419326e-05,
|
|
"loss": 0.4816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5169562101364136,
|
|
"step": 2505,
|
|
"valid_targets_mean": 2073.9,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 3.8854489164086687,
|
|
"grad_norm": 0.23300570592396938,
|
|
"learning_rate": 1.8035562621235742e-05,
|
|
"loss": 0.5584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45323455333709717,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3753.7,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 3.893188854489164,
|
|
"grad_norm": 0.22409797603931678,
|
|
"learning_rate": 1.7962784265560255e-05,
|
|
"loss": 0.4542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43701744079589844,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3801.1,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 3.900928792569659,
|
|
"grad_norm": 0.24561928319735887,
|
|
"learning_rate": 1.7890033158846277e-05,
|
|
"loss": 0.5718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4819703996181488,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3548.1,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 3.9086687306501546,
|
|
"grad_norm": 0.21750688249553263,
|
|
"learning_rate": 1.781731027418275e-05,
|
|
"loss": 0.4605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43665897846221924,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4182.1,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 3.91640866873065,
|
|
"grad_norm": 0.27779304523972387,
|
|
"learning_rate": 1.7744616584281135e-05,
|
|
"loss": 0.5199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4716933071613312,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3701.5,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 3.9241486068111455,
|
|
"grad_norm": 0.23308170216085283,
|
|
"learning_rate": 1.7671953061462382e-05,
|
|
"loss": 0.4643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43994855880737305,
|
|
"step": 2535,
|
|
"valid_targets_mean": 3829.9,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 3.931888544891641,
|
|
"grad_norm": 0.6285001891742295,
|
|
"learning_rate": 1.759932067764396e-05,
|
|
"loss": 0.3861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27027222514152527,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4718.9,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 3.939628482972136,
|
|
"grad_norm": 0.4064773651787613,
|
|
"learning_rate": 1.752672040432682e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23830467462539673,
|
|
"step": 2545,
|
|
"valid_targets_mean": 5331.9,
|
|
"valid_targets_min": 1956
|
|
},
|
|
{
|
|
"epoch": 3.9473684210526314,
|
|
"grad_norm": 0.43418764139274585,
|
|
"learning_rate": 1.7454153212582418e-05,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21572345495224,
|
|
"step": 2550,
|
|
"valid_targets_mean": 5134.1,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 3.955108359133127,
|
|
"grad_norm": 0.3944825210574653,
|
|
"learning_rate": 1.7381620073039727e-05,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22284483909606934,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4941.0,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 3.9628482972136223,
|
|
"grad_norm": 0.3515170790116526,
|
|
"learning_rate": 1.7309121955872253e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24947771430015564,
|
|
"step": 2560,
|
|
"valid_targets_mean": 4170.8,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 3.9705882352941178,
|
|
"grad_norm": 0.6034518443638968,
|
|
"learning_rate": 1.723665983078505e-05,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25377264618873596,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4252.7,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 3.9783281733746128,
|
|
"grad_norm": 0.7355655244183138,
|
|
"learning_rate": 1.7164234667001763e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24411222338676453,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3253.8,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 3.986068111455108,
|
|
"grad_norm": 0.40006843347268484,
|
|
"learning_rate": 1.7091847433251664e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23060306906700134,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2460.5,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 3.9938080495356036,
|
|
"grad_norm": 0.33250695955184023,
|
|
"learning_rate": 1.7019499097756675e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22133678197860718,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3589.9,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 4.001547987616099,
|
|
"grad_norm": 0.7977562431553336,
|
|
"learning_rate": 1.694719062821843e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671439051628113,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3681.9,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 4.0092879256965945,
|
|
"grad_norm": 0.3859485702696889,
|
|
"learning_rate": 1.6874922991805352e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2500758767127991,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3970.2,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 4.0170278637770895,
|
|
"grad_norm": 0.35065039732561026,
|
|
"learning_rate": 1.680269715513968e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24183142185211182,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3712.5,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 4.024767801857585,
|
|
"grad_norm": 0.3289219099040923,
|
|
"learning_rate": 1.6730514084284562e-05,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24284854531288147,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3618.2,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 4.03250773993808,
|
|
"grad_norm": 0.28305743878864514,
|
|
"learning_rate": 1.665837474473112e-05,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23493747413158417,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3822.5,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 4.040247678018575,
|
|
"grad_norm": 0.25995485998864215,
|
|
"learning_rate": 1.658628010138556e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2361966222524643,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3878.1,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 4.047987616099071,
|
|
"grad_norm": 0.21532620860244342,
|
|
"learning_rate": 1.6514231118556234e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23190075159072876,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3599.6,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 4.055727554179566,
|
|
"grad_norm": 0.2164763504692696,
|
|
"learning_rate": 1.6442228759940772e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23097014427185059,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3547.5,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 4.063467492260062,
|
|
"grad_norm": 0.23575143128771422,
|
|
"learning_rate": 1.637027398861316e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2218899130821228,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3543.9,
|
|
"valid_targets_min": 402
|
|
},
|
|
{
|
|
"epoch": 4.071207430340557,
|
|
"grad_norm": 0.21247601660747695,
|
|
"learning_rate": 1.6298367767010895e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22882096469402313,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3743.6,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 4.078947368421052,
|
|
"grad_norm": 0.20428084086179712,
|
|
"learning_rate": 1.6226511056922076e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23036187887191772,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3927.4,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 4.086687306501548,
|
|
"grad_norm": 0.3557260783805851,
|
|
"learning_rate": 1.615470481947257e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25892725586891174,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4279.9,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 4.094427244582043,
|
|
"grad_norm": 0.2595897335410842,
|
|
"learning_rate": 1.6082950015113136e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26021724939346313,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4793.1,
|
|
"valid_targets_min": 2022
|
|
},
|
|
{
|
|
"epoch": 4.102167182662539,
|
|
"grad_norm": 0.6024187704389053,
|
|
"learning_rate": 1.6011247603606587e-05,
|
|
"loss": 0.3831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49718737602233887,
|
|
"step": 2650,
|
|
"valid_targets_mean": 894.3,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 4.109907120743034,
|
|
"grad_norm": 0.2678170063327489,
|
|
"learning_rate": 1.5939598544014944e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269172728061676,
|
|
"step": 2655,
|
|
"valid_targets_mean": 4972.6,
|
|
"valid_targets_min": 3047
|
|
},
|
|
{
|
|
"epoch": 4.117647058823529,
|
|
"grad_norm": 0.3375065116904596,
|
|
"learning_rate": 1.5868003794686626e-05,
|
|
"loss": 0.3573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2636796832084656,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4199.6,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 4.125386996904025,
|
|
"grad_norm": 0.23043695651882945,
|
|
"learning_rate": 1.579646431324362e-05,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26492059230804443,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4869.9,
|
|
"valid_targets_min": 2215
|
|
},
|
|
{
|
|
"epoch": 4.13312693498452,
|
|
"grad_norm": 0.5743767716387614,
|
|
"learning_rate": 1.5724981056568652e-05,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5218298435211182,
|
|
"step": 2670,
|
|
"valid_targets_mean": 865.0,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 4.140866873065016,
|
|
"grad_norm": 0.2580675271999514,
|
|
"learning_rate": 1.565355498079244e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26508790254592896,
|
|
"step": 2675,
|
|
"valid_targets_mean": 4827.9,
|
|
"valid_targets_min": 3031
|
|
},
|
|
{
|
|
"epoch": 4.148606811145511,
|
|
"grad_norm": 0.601651296204302,
|
|
"learning_rate": 1.5582187041280848e-05,
|
|
"loss": 0.3605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5127502679824829,
|
|
"step": 2680,
|
|
"valid_targets_mean": 951.7,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 4.156346749226006,
|
|
"grad_norm": 0.24088651589763943,
|
|
"learning_rate": 1.551087819262214e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26384443044662476,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4826.3,
|
|
"valid_targets_min": 2645
|
|
},
|
|
{
|
|
"epoch": 4.164086687306502,
|
|
"grad_norm": 0.7485855695105221,
|
|
"learning_rate": 1.5439629388614213e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5449026823043823,
|
|
"step": 2690,
|
|
"valid_targets_mean": 823.9,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 4.171826625386997,
|
|
"grad_norm": 0.21008142810133382,
|
|
"learning_rate": 1.5368441582251833e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2618221640586853,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4833.1,
|
|
"valid_targets_min": 2940
|
|
},
|
|
{
|
|
"epoch": 4.179566563467493,
|
|
"grad_norm": 0.6764520598015543,
|
|
"learning_rate": 1.5297315725713863e-05,
|
|
"loss": 0.3098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5113300085067749,
|
|
"step": 2700,
|
|
"valid_targets_mean": 755.9,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 4.187306501547988,
|
|
"grad_norm": 0.3087285418217133,
|
|
"learning_rate": 1.522625277035058e-05,
|
|
"loss": 0.3393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2583079934120178,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4627.0,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 4.195046439628483,
|
|
"grad_norm": 0.6822984966434357,
|
|
"learning_rate": 1.5155253666670907e-05,
|
|
"loss": 0.3469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5158101916313171,
|
|
"step": 2710,
|
|
"valid_targets_mean": 851.7,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 4.2027863777089784,
|
|
"grad_norm": 0.21511965313744666,
|
|
"learning_rate": 1.5084319364329705e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27163732051849365,
|
|
"step": 2715,
|
|
"valid_targets_mean": 4954.3,
|
|
"valid_targets_min": 2940
|
|
},
|
|
{
|
|
"epoch": 4.2105263157894735,
|
|
"grad_norm": 0.7413890509173838,
|
|
"learning_rate": 1.5013450812115094e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4921557903289795,
|
|
"step": 2720,
|
|
"valid_targets_mean": 830.1,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 4.218266253869969,
|
|
"grad_norm": 0.2599821791835351,
|
|
"learning_rate": 1.4942648957935743e-05,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802320718765259,
|
|
"step": 2725,
|
|
"valid_targets_mean": 5988.3,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 4.226006191950464,
|
|
"grad_norm": 0.23778348787535752,
|
|
"learning_rate": 1.4871914748808192e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3049447238445282,
|
|
"step": 2730,
|
|
"valid_targets_mean": 5611.2,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 4.233746130030959,
|
|
"grad_norm": 0.22741641187285336,
|
|
"learning_rate": 1.4801249130844187e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29474881291389465,
|
|
"step": 2735,
|
|
"valid_targets_mean": 5332.3,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 4.241486068111455,
|
|
"grad_norm": 0.22273130664774618,
|
|
"learning_rate": 1.4730653049238038e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3074001669883728,
|
|
"step": 2740,
|
|
"valid_targets_mean": 5595.0,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 4.24922600619195,
|
|
"grad_norm": 0.3081228582429119,
|
|
"learning_rate": 1.4660127448253945e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849186062812805,
|
|
"step": 2745,
|
|
"valid_targets_mean": 5674.6,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 4.256965944272446,
|
|
"grad_norm": 0.21226065539426642,
|
|
"learning_rate": 1.4589673271213407e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772536873817444,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5872.0,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 4.264705882352941,
|
|
"grad_norm": 0.20641248827106964,
|
|
"learning_rate": 1.4519291460482583e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28913387656211853,
|
|
"step": 2755,
|
|
"valid_targets_mean": 5701.9,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 4.272445820433436,
|
|
"grad_norm": 0.2003252068091716,
|
|
"learning_rate": 1.4448982957459676e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26687729358673096,
|
|
"step": 2760,
|
|
"valid_targets_mean": 5619.1,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 4.280185758513932,
|
|
"grad_norm": 0.2030330837787843,
|
|
"learning_rate": 1.4378748702562382e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27053672075271606,
|
|
"step": 2765,
|
|
"valid_targets_mean": 5753.0,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 4.287925696594427,
|
|
"grad_norm": 0.18745732720973687,
|
|
"learning_rate": 1.4308589635215246e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2766991853713989,
|
|
"step": 2770,
|
|
"valid_targets_mean": 5811.5,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 4.295665634674923,
|
|
"grad_norm": 0.19643443495548177,
|
|
"learning_rate": 1.4238506693837165e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808050513267517,
|
|
"step": 2775,
|
|
"valid_targets_mean": 5628.8,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 4.303405572755418,
|
|
"grad_norm": 0.2229172929117127,
|
|
"learning_rate": 1.4168500815828807e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27479100227355957,
|
|
"step": 2780,
|
|
"valid_targets_mean": 5455.2,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 4.311145510835913,
|
|
"grad_norm": 0.20382586510877573,
|
|
"learning_rate": 1.4098572937560048e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2859298586845398,
|
|
"step": 2785,
|
|
"valid_targets_mean": 5440.1,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 4.318885448916409,
|
|
"grad_norm": 0.2030964190788325,
|
|
"learning_rate": 1.4028723994357498e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27701088786125183,
|
|
"step": 2790,
|
|
"valid_targets_mean": 5518.4,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 4.326625386996904,
|
|
"grad_norm": 0.21030476442715113,
|
|
"learning_rate": 1.3958954920491951e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29448458552360535,
|
|
"step": 2795,
|
|
"valid_targets_mean": 5511.0,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 4.3343653250774,
|
|
"grad_norm": 0.18727113103221418,
|
|
"learning_rate": 1.3889266649165896e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28205591440200806,
|
|
"step": 2800,
|
|
"valid_targets_mean": 6648.1,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 4.342105263157895,
|
|
"grad_norm": 0.17960744377587906,
|
|
"learning_rate": 1.3819660112501054e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28532010316848755,
|
|
"step": 2805,
|
|
"valid_targets_mean": 6246.2,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 4.34984520123839,
|
|
"grad_norm": 0.20280145814785858,
|
|
"learning_rate": 1.3750136241525893e-05,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27794745564460754,
|
|
"step": 2810,
|
|
"valid_targets_mean": 6088.3,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 4.357585139318886,
|
|
"grad_norm": 0.18688721526373822,
|
|
"learning_rate": 1.3680695966163165e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674199640750885,
|
|
"step": 2815,
|
|
"valid_targets_mean": 6268.0,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.365325077399381,
|
|
"grad_norm": 0.17777141294133386,
|
|
"learning_rate": 1.3611340215217496e-05,
|
|
"loss": 0.3605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832227349281311,
|
|
"step": 2820,
|
|
"valid_targets_mean": 7251.7,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 4.3730650154798765,
|
|
"grad_norm": 0.2663604074209158,
|
|
"learning_rate": 1.3542069916362954e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32068130373954773,
|
|
"step": 2825,
|
|
"valid_targets_mean": 2685.1,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 4.3808049535603715,
|
|
"grad_norm": 0.1761442040782512,
|
|
"learning_rate": 1.3472885996130614e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28158140182495117,
|
|
"step": 2830,
|
|
"valid_targets_mean": 6766.1,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 4.3885448916408665,
|
|
"grad_norm": 0.23275774679712943,
|
|
"learning_rate": 1.3403789379896202e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3295462429523468,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3729.0,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 4.396284829721362,
|
|
"grad_norm": 0.23119813847436324,
|
|
"learning_rate": 1.3334780991867695e-05,
|
|
"loss": 0.3818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2756391763687134,
|
|
"step": 2840,
|
|
"valid_targets_mean": 5790.3,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 4.404024767801857,
|
|
"grad_norm": 0.21326660655163374,
|
|
"learning_rate": 1.3265861755072968e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28569531440734863,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4450.3,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.411764705882353,
|
|
"grad_norm": 0.1853959824367411,
|
|
"learning_rate": 1.3197032591347445e-05,
|
|
"loss": 0.3618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269290566444397,
|
|
"step": 2850,
|
|
"valid_targets_mean": 6372.9,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 4.419504643962848,
|
|
"grad_norm": 0.580998888405872,
|
|
"learning_rate": 1.3128294421321772e-05,
|
|
"loss": 0.3335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5727855563163757,
|
|
"step": 2855,
|
|
"valid_targets_mean": 906.0,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 4.427244582043343,
|
|
"grad_norm": 0.16994000065156134,
|
|
"learning_rate": 1.3059648164409492e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26326072216033936,
|
|
"step": 2860,
|
|
"valid_targets_mean": 6534.2,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 4.434984520123839,
|
|
"grad_norm": 0.6211120181630739,
|
|
"learning_rate": 1.299109473879477e-05,
|
|
"loss": 0.3327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5467883348464966,
|
|
"step": 2865,
|
|
"valid_targets_mean": 814.0,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 4.442724458204334,
|
|
"grad_norm": 0.18097366894776748,
|
|
"learning_rate": 1.2922635061420083e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2607588768005371,
|
|
"step": 2870,
|
|
"valid_targets_mean": 6517.6,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 4.45046439628483,
|
|
"grad_norm": 0.7077291686252601,
|
|
"learning_rate": 1.2854270047973987e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5461915731430054,
|
|
"step": 2875,
|
|
"valid_targets_mean": 796.5,
|
|
"valid_targets_min": 171
|
|
},
|
|
{
|
|
"epoch": 4.458204334365325,
|
|
"grad_norm": 0.5180820741501877,
|
|
"learning_rate": 1.278600061287883e-05,
|
|
"loss": 0.5046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4599485397338867,
|
|
"step": 2880,
|
|
"valid_targets_mean": 3823.5,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 4.46594427244582,
|
|
"grad_norm": 0.26371078519460023,
|
|
"learning_rate": 1.2717827669278564e-05,
|
|
"loss": 0.4775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4435957074165344,
|
|
"step": 2885,
|
|
"valid_targets_mean": 4368.6,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 4.473684210526316,
|
|
"grad_norm": 0.3306088294780607,
|
|
"learning_rate": 1.26497521290265e-05,
|
|
"loss": 0.4856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5073356628417969,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3313.4,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 4.481424148606811,
|
|
"grad_norm": 0.32660692643431144,
|
|
"learning_rate": 1.2581774902673116e-05,
|
|
"loss": 0.4693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4961395859718323,
|
|
"step": 2895,
|
|
"valid_targets_mean": 2229.5,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 4.489164086687307,
|
|
"grad_norm": 0.32283655139820105,
|
|
"learning_rate": 1.2513896899453902e-05,
|
|
"loss": 0.4753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49232175946235657,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2805.3,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 4.496904024767802,
|
|
"grad_norm": 0.27340357860137876,
|
|
"learning_rate": 1.2446119027277166e-05,
|
|
"loss": 0.4543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4842875003814697,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2612.6,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 4.504643962848297,
|
|
"grad_norm": 0.2283795898205354,
|
|
"learning_rate": 1.237844219271191e-05,
|
|
"loss": 0.474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46368643641471863,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3845.4,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 4.512383900928793,
|
|
"grad_norm": 0.22715248927767198,
|
|
"learning_rate": 1.2310867300975686e-05,
|
|
"loss": 0.4594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4301972985267639,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3683.9,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 4.520123839009288,
|
|
"grad_norm": 0.244438049974717,
|
|
"learning_rate": 1.2243395255922514e-05,
|
|
"loss": 0.4624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45204979181289673,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3909.4,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 4.527863777089784,
|
|
"grad_norm": 0.2823571591220732,
|
|
"learning_rate": 1.2176026960030782e-05,
|
|
"loss": 0.4719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4788780212402344,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2976.2,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 4.535603715170279,
|
|
"grad_norm": 0.27907722675689434,
|
|
"learning_rate": 1.2108763314391156e-05,
|
|
"loss": 0.4487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4670989513397217,
|
|
"step": 2930,
|
|
"valid_targets_mean": 2441.2,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 4.543343653250774,
|
|
"grad_norm": 0.262624441839696,
|
|
"learning_rate": 1.2041605218694561e-05,
|
|
"loss": 0.4719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46042945981025696,
|
|
"step": 2935,
|
|
"valid_targets_mean": 2866.4,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 4.5510835913312695,
|
|
"grad_norm": 0.2863942393667685,
|
|
"learning_rate": 1.1974553571220114e-05,
|
|
"loss": 0.4474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4525454044342041,
|
|
"step": 2940,
|
|
"valid_targets_mean": 2253.2,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 4.5588235294117645,
|
|
"grad_norm": 0.2336291226025938,
|
|
"learning_rate": 1.1907609268823138e-05,
|
|
"loss": 0.4578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43466585874557495,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3370.6,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 4.56656346749226,
|
|
"grad_norm": 0.5024424796324376,
|
|
"learning_rate": 1.184077320692314e-05,
|
|
"loss": 0.5416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6064223647117615,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4023.8,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 4.574303405572755,
|
|
"grad_norm": 0.3308569673279954,
|
|
"learning_rate": 1.1774046279491848e-05,
|
|
"loss": 0.586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5931911468505859,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4263.8,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 4.58204334365325,
|
|
"grad_norm": 0.2751838838377659,
|
|
"learning_rate": 1.1707429379041269e-05,
|
|
"loss": 0.6075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5981575846672058,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4282.4,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 4.589783281733746,
|
|
"grad_norm": 0.40496548530711984,
|
|
"learning_rate": 1.1640923396611706e-05,
|
|
"loss": 0.5813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5559823513031006,
|
|
"step": 2965,
|
|
"valid_targets_mean": 4329.7,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 4.597523219814241,
|
|
"grad_norm": 0.2515942801943693,
|
|
"learning_rate": 1.1574529221759895e-05,
|
|
"loss": 0.5808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5641961097717285,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4413.9,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 4.605263157894737,
|
|
"grad_norm": 0.27394645166999115,
|
|
"learning_rate": 1.1508247742547059e-05,
|
|
"loss": 0.5878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5969656705856323,
|
|
"step": 2975,
|
|
"valid_targets_mean": 3331.2,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 4.613003095975232,
|
|
"grad_norm": 0.2738928217061669,
|
|
"learning_rate": 1.1442079845527068e-05,
|
|
"loss": 0.5838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6013756990432739,
|
|
"step": 2980,
|
|
"valid_targets_mean": 4073.6,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 4.620743034055727,
|
|
"grad_norm": 1.963400098530233,
|
|
"learning_rate": 1.1376026415734548e-05,
|
|
"loss": 0.5679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6094938516616821,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3107.3,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 4.628482972136223,
|
|
"grad_norm": 0.23520546791878888,
|
|
"learning_rate": 1.1310088336673065e-05,
|
|
"loss": 0.5838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6291078329086304,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4157.3,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 4.636222910216718,
|
|
"grad_norm": 0.2477324114313013,
|
|
"learning_rate": 1.1244266490303306e-05,
|
|
"loss": 0.561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5861718654632568,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3665.2,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 4.643962848297214,
|
|
"grad_norm": 0.2418235487445107,
|
|
"learning_rate": 1.1178561757031272e-05,
|
|
"loss": 0.5824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5690067410469055,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3729.1,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 4.651702786377709,
|
|
"grad_norm": 0.2400013251431622,
|
|
"learning_rate": 1.1112975015696495e-05,
|
|
"loss": 0.5863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6029608249664307,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3811.2,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 4.659442724458204,
|
|
"grad_norm": 0.20997096334885615,
|
|
"learning_rate": 1.1047507143560327e-05,
|
|
"loss": 0.5763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5337103605270386,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4278.7,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 4.6671826625387,
|
|
"grad_norm": 0.2403723102288635,
|
|
"learning_rate": 1.0982159016294145e-05,
|
|
"loss": 0.586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6157784461975098,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3975.3,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 4.674922600619195,
|
|
"grad_norm": 0.22906247324578405,
|
|
"learning_rate": 1.091693150796767e-05,
|
|
"loss": 0.5778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5642900466918945,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4346.6,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 4.682662538699691,
|
|
"grad_norm": 0.4303482561535136,
|
|
"learning_rate": 1.0851825491037295e-05,
|
|
"loss": 0.5327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5001225471496582,
|
|
"step": 3025,
|
|
"valid_targets_mean": 3941.8,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 4.690402476780186,
|
|
"grad_norm": 0.3075796941356459,
|
|
"learning_rate": 1.0786841836334376e-05,
|
|
"loss": 0.4802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4426044821739197,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4111.9,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 4.698142414860681,
|
|
"grad_norm": 0.26882565028361266,
|
|
"learning_rate": 1.0721981413053599e-05,
|
|
"loss": 0.4932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5008689761161804,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3929.4,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 4.705882352941177,
|
|
"grad_norm": 0.26150305593215656,
|
|
"learning_rate": 1.0657245088741379e-05,
|
|
"loss": 0.4803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4889753758907318,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4184.5,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 4.713622291021672,
|
|
"grad_norm": 0.23755330883299536,
|
|
"learning_rate": 1.059263372928421e-05,
|
|
"loss": 0.4851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4825485944747925,
|
|
"step": 3045,
|
|
"valid_targets_mean": 4404.0,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 4.7213622291021675,
|
|
"grad_norm": 0.22647414855955983,
|
|
"learning_rate": 1.0528148198897119e-05,
|
|
"loss": 0.4603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4652065634727478,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4238.4,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 4.7291021671826625,
|
|
"grad_norm": 0.2023525972489863,
|
|
"learning_rate": 1.0463789360112091e-05,
|
|
"loss": 0.4898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47735369205474854,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4663.1,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 4.7368421052631575,
|
|
"grad_norm": 0.2193891048335965,
|
|
"learning_rate": 1.0399558073766532e-05,
|
|
"loss": 0.4666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46430617570877075,
|
|
"step": 3060,
|
|
"valid_targets_mean": 4196.7,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 4.744582043343653,
|
|
"grad_norm": 0.21172796147478845,
|
|
"learning_rate": 1.0335455198991781e-05,
|
|
"loss": 0.4747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4615963399410248,
|
|
"step": 3065,
|
|
"valid_targets_mean": 4613.0,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 4.752321981424148,
|
|
"grad_norm": 0.21765793267753142,
|
|
"learning_rate": 1.0271481593201574e-05,
|
|
"loss": 0.4586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4470646381378174,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4339.9,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 4.760061919504644,
|
|
"grad_norm": 0.21450254380274072,
|
|
"learning_rate": 1.0207638112080593e-05,
|
|
"loss": 0.4877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4822099208831787,
|
|
"step": 3075,
|
|
"valid_targets_mean": 4427.8,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 4.767801857585139,
|
|
"grad_norm": 0.22664732896418327,
|
|
"learning_rate": 1.0143925609573056e-05,
|
|
"loss": 0.4701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4684290885925293,
|
|
"step": 3080,
|
|
"valid_targets_mean": 4592.4,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 4.775541795665634,
|
|
"grad_norm": 0.21544868055097288,
|
|
"learning_rate": 1.0080344937871237e-05,
|
|
"loss": 0.4742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.458982914686203,
|
|
"step": 3085,
|
|
"valid_targets_mean": 4412.0,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 4.78328173374613,
|
|
"grad_norm": 0.20682547082641345,
|
|
"learning_rate": 1.00168969474041e-05,
|
|
"loss": 0.467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4373583197593689,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4575.8,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 4.791021671826625,
|
|
"grad_norm": 0.20941747452955883,
|
|
"learning_rate": 9.953582486825935e-06,
|
|
"loss": 0.4776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47484564781188965,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4447.5,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 4.798761609907121,
|
|
"grad_norm": 0.21729507163708037,
|
|
"learning_rate": 9.890402403004974e-06,
|
|
"loss": 0.4767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4519065022468567,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4437.8,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 4.806501547987616,
|
|
"grad_norm": 0.2317241112551166,
|
|
"learning_rate": 9.82735754101208e-06,
|
|
"loss": 0.4736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4614100158214569,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3840.9,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 4.814241486068111,
|
|
"grad_norm": 0.22976992926725875,
|
|
"learning_rate": 9.764448744109468e-06,
|
|
"loss": 0.4526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4283045530319214,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3756.0,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 4.821981424148607,
|
|
"grad_norm": 0.24390435734998298,
|
|
"learning_rate": 9.70167685373937e-06,
|
|
"loss": 0.5576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4680092930793762,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3755.8,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 4.829721362229102,
|
|
"grad_norm": 0.23456322049605846,
|
|
"learning_rate": 9.639042709512829e-06,
|
|
"loss": 0.464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44104906916618347,
|
|
"step": 3120,
|
|
"valid_targets_mean": 3807.7,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 4.837461300309598,
|
|
"grad_norm": 0.23784627639081926,
|
|
"learning_rate": 9.57654714919846e-06,
|
|
"loss": 0.5661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4585813283920288,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3717.1,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 4.845201238390093,
|
|
"grad_norm": 0.23733520783210843,
|
|
"learning_rate": 9.51419100871122e-06,
|
|
"loss": 0.4563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4274477958679199,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3834.6,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 4.852941176470588,
|
|
"grad_norm": 0.24370727071563394,
|
|
"learning_rate": 9.451975122101245e-06,
|
|
"loss": 0.5266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4658730924129486,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3716.2,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 4.860681114551084,
|
|
"grad_norm": 0.224584994390136,
|
|
"learning_rate": 9.389900321542714e-06,
|
|
"loss": 0.4548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4292837679386139,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3947.7,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 4.868421052631579,
|
|
"grad_norm": 0.2460725885184712,
|
|
"learning_rate": 9.32796743732267e-06,
|
|
"loss": 0.5192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45926839113235474,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3347.8,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 4.876160990712075,
|
|
"grad_norm": 0.22352593416077404,
|
|
"learning_rate": 9.266177297829949e-06,
|
|
"loss": 0.4631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4535781145095825,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3933.3,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 4.88390092879257,
|
|
"grad_norm": 0.2573130230570621,
|
|
"learning_rate": 9.204530729544107e-06,
|
|
"loss": 0.5628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46443650126457214,
|
|
"step": 3155,
|
|
"valid_targets_mean": 3639.2,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 4.891640866873065,
|
|
"grad_norm": 0.2376929875968187,
|
|
"learning_rate": 9.143028557024337e-06,
|
|
"loss": 0.4508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4366801381111145,
|
|
"step": 3160,
|
|
"valid_targets_mean": 3762.1,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 4.8993808049535605,
|
|
"grad_norm": 0.2637171270303312,
|
|
"learning_rate": 9.081671602898444e-06,
|
|
"loss": 0.5542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4761418104171753,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3373.5,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 4.9071207430340555,
|
|
"grad_norm": 0.40492871600107466,
|
|
"learning_rate": 9.020460687851877e-06,
|
|
"loss": 0.4629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4501515030860901,
|
|
"step": 3170,
|
|
"valid_targets_mean": 3545.1,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 4.914860681114551,
|
|
"grad_norm": 0.266178456621172,
|
|
"learning_rate": 8.959396630616709e-06,
|
|
"loss": 0.5054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47557950019836426,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3247.4,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 4.922600619195046,
|
|
"grad_norm": 0.23955486844075902,
|
|
"learning_rate": 8.898480247960698e-06,
|
|
"loss": 0.4642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45473483204841614,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3632.9,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 4.930340557275541,
|
|
"grad_norm": 0.8173037613085806,
|
|
"learning_rate": 8.837712354676386e-06,
|
|
"loss": 0.411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2819279134273529,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4943.5,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 4.938080495356037,
|
|
"grad_norm": 0.4146358862550602,
|
|
"learning_rate": 8.77709376357017e-06,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23529914021492004,
|
|
"step": 3190,
|
|
"valid_targets_mean": 5463.8,
|
|
"valid_targets_min": 2499
|
|
},
|
|
{
|
|
"epoch": 4.945820433436532,
|
|
"grad_norm": 0.46971747550145976,
|
|
"learning_rate": 8.716625285451444e-06,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22410422563552856,
|
|
"step": 3195,
|
|
"valid_targets_mean": 5126.4,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 4.953560371517028,
|
|
"grad_norm": 0.44220027373551557,
|
|
"learning_rate": 8.656307729121749e-06,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22196033596992493,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4619.5,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 4.961300309597523,
|
|
"grad_norm": 0.46026436457674913,
|
|
"learning_rate": 8.596141901363957e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23844443261623383,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4067.3,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 4.969040247678018,
|
|
"grad_norm": 0.340692373261966,
|
|
"learning_rate": 8.536128606931502e-06,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23860566318035126,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4208.2,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 4.976780185758514,
|
|
"grad_norm": 0.401607934059039,
|
|
"learning_rate": 8.476268648537567e-06,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22871974110603333,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4078.0,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 4.984520123839009,
|
|
"grad_norm": 0.459084972019712,
|
|
"learning_rate": 8.41656282684438e-06,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22857078909873962,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3467.1,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 4.992260061919505,
|
|
"grad_norm": 0.3761005239500863,
|
|
"learning_rate": 8.357011940452524e-06,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22193747758865356,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2943.9,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.5295741220591167,
|
|
"learning_rate": 8.297616785890203e-06,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22759687900543213,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2756.4,
|
|
"valid_targets_min": 182
|
|
},
|
|
{
|
|
"epoch": 5.007739938080495,
|
|
"grad_norm": 0.4700845436496503,
|
|
"learning_rate": 8.238378157602622e-06,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24569155275821686,
|
|
"step": 3235,
|
|
"valid_targets_mean": 3787.4,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 5.015479876160991,
|
|
"grad_norm": 0.3555865200718581,
|
|
"learning_rate": 8.179296847941372e-06,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23764678835868835,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3744.1,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 5.023219814241486,
|
|
"grad_norm": 0.3199043648546617,
|
|
"learning_rate": 8.120373647153792e-06,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23746223747730255,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3806.1,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 5.030959752321982,
|
|
"grad_norm": 0.26412273812856496,
|
|
"learning_rate": 8.061609343372427e-06,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2379157990217209,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3883.8,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 5.038699690402477,
|
|
"grad_norm": 0.2410617670577924,
|
|
"learning_rate": 8.003004722604497e-06,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23294153809547424,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3872.5,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 5.046439628482972,
|
|
"grad_norm": 0.2264347023306784,
|
|
"learning_rate": 7.944560568721354e-06,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23297041654586792,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3780.5,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 5.054179566563468,
|
|
"grad_norm": 0.22209984835490232,
|
|
"learning_rate": 7.886277663447998e-06,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22890938818454742,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3663.5,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 5.061919504643963,
|
|
"grad_norm": 0.23272121580951877,
|
|
"learning_rate": 7.82815678635267e-06,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22157731652259827,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3721.1,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 5.069659442724459,
|
|
"grad_norm": 0.2101185871301259,
|
|
"learning_rate": 7.770198714836348e-06,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22460076212882996,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3723.2,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 5.077399380804954,
|
|
"grad_norm": 0.206433360079651,
|
|
"learning_rate": 7.712404224122421e-06,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22615480422973633,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3877.2,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 5.085139318885449,
|
|
"grad_norm": 0.22066024180632432,
|
|
"learning_rate": 7.654774087246273e-06,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23022989928722382,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3845.5,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 5.0928792569659445,
|
|
"grad_norm": 0.3287281810820195,
|
|
"learning_rate": 7.597309075044952e-06,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2643077075481415,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4920.5,
|
|
"valid_targets_min": 2219
|
|
},
|
|
{
|
|
"epoch": 5.1006191950464395,
|
|
"grad_norm": 0.625984396524783,
|
|
"learning_rate": 7.54000995614689e-06,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4386271834373474,
|
|
"step": 3295,
|
|
"valid_targets_mean": 965.5,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 5.108359133126935,
|
|
"grad_norm": 0.5835576136803072,
|
|
"learning_rate": 7.4828774969615735e-06,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2568455636501312,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4698.2,
|
|
"valid_targets_min": 3199
|
|
},
|
|
{
|
|
"epoch": 5.11609907120743,
|
|
"grad_norm": 0.6113484919972076,
|
|
"learning_rate": 7.425912461669324e-06,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4669925272464752,
|
|
"step": 3305,
|
|
"valid_targets_mean": 877.1,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 5.123839009287925,
|
|
"grad_norm": 0.21352308520942498,
|
|
"learning_rate": 7.369115612211086e-06,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534993886947632,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4635.7,
|
|
"valid_targets_min": 3134
|
|
},
|
|
{
|
|
"epoch": 5.131578947368421,
|
|
"grad_norm": 0.563961833558715,
|
|
"learning_rate": 7.3124877082781976e-06,
|
|
"loss": 0.3433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.473026841878891,
|
|
"step": 3315,
|
|
"valid_targets_mean": 855.6,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 5.139318885448916,
|
|
"grad_norm": 0.20620873462156789,
|
|
"learning_rate": 7.256029507302254e-06,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26267993450164795,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4777.3,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 5.147058823529412,
|
|
"grad_norm": 0.6057875870401285,
|
|
"learning_rate": 7.1997417644449915e-06,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4821520149707794,
|
|
"step": 3325,
|
|
"valid_targets_mean": 898.8,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 5.154798761609907,
|
|
"grad_norm": 0.22983838070110416,
|
|
"learning_rate": 7.14362523258815e-06,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24655894935131073,
|
|
"step": 3330,
|
|
"valid_targets_mean": 4618.4,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 5.162538699690402,
|
|
"grad_norm": 0.2349977477858853,
|
|
"learning_rate": 7.087680662323426e-06,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657468318939209,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3491.6,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 5.170278637770898,
|
|
"grad_norm": 0.20972523434122636,
|
|
"learning_rate": 7.031908801942429e-06,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2488928586244583,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4612.6,
|
|
"valid_targets_min": 3000
|
|
},
|
|
{
|
|
"epoch": 5.178018575851393,
|
|
"grad_norm": 0.20166225442844207,
|
|
"learning_rate": 6.976310397426676e-06,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563784122467041,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3892.6,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 5.185758513931889,
|
|
"grad_norm": 0.2719680440646201,
|
|
"learning_rate": 6.920886192437595e-06,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25556352734565735,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4844.6,
|
|
"valid_targets_min": 2731
|
|
},
|
|
{
|
|
"epoch": 5.193498452012384,
|
|
"grad_norm": 0.5852181726057455,
|
|
"learning_rate": 6.865636928306618e-06,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4378099739551544,
|
|
"step": 3355,
|
|
"valid_targets_mean": 984.1,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 5.201238390092879,
|
|
"grad_norm": 0.19780424134789956,
|
|
"learning_rate": 6.810563344025214e-06,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2589517831802368,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4991.7,
|
|
"valid_targets_min": 3133
|
|
},
|
|
{
|
|
"epoch": 5.208978328173375,
|
|
"grad_norm": 0.2261636215220509,
|
|
"learning_rate": 6.755666176235054e-06,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27702945470809937,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3466.8,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 5.21671826625387,
|
|
"grad_norm": 0.2862021607145266,
|
|
"learning_rate": 6.700946159218118e-06,
|
|
"loss": 0.3934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844895124435425,
|
|
"step": 3370,
|
|
"valid_targets_mean": 5756.4,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 5.224458204334366,
|
|
"grad_norm": 0.2325416184027991,
|
|
"learning_rate": 6.646404024886883e-06,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715683579444885,
|
|
"step": 3375,
|
|
"valid_targets_mean": 5488.7,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 5.232198142414861,
|
|
"grad_norm": 0.20910098188579046,
|
|
"learning_rate": 6.592040502774568e-06,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28531405329704285,
|
|
"step": 3380,
|
|
"valid_targets_mean": 5880.8,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 5.239938080495356,
|
|
"grad_norm": 0.19262332500880985,
|
|
"learning_rate": 6.537856320025324e-06,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27549439668655396,
|
|
"step": 3385,
|
|
"valid_targets_mean": 5631.9,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 5.247678018575852,
|
|
"grad_norm": 0.1965475078253338,
|
|
"learning_rate": 6.483852201384535e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27211594581604004,
|
|
"step": 3390,
|
|
"valid_targets_mean": 5617.9,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 5.255417956656347,
|
|
"grad_norm": 0.19113275836256458,
|
|
"learning_rate": 6.43002886918914e-06,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27833759784698486,
|
|
"step": 3395,
|
|
"valid_targets_mean": 6436.5,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 5.2631578947368425,
|
|
"grad_norm": 0.21923195262698808,
|
|
"learning_rate": 6.37638704335793e-06,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2960706353187561,
|
|
"step": 3400,
|
|
"valid_targets_mean": 5309.5,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 5.2708978328173375,
|
|
"grad_norm": 0.19817651220500906,
|
|
"learning_rate": 6.322927441381945e-06,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27379852533340454,
|
|
"step": 3405,
|
|
"valid_targets_mean": 5691.1,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 5.2786377708978325,
|
|
"grad_norm": 0.21559057040050258,
|
|
"learning_rate": 6.269650778314893e-06,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28065264225006104,
|
|
"step": 3410,
|
|
"valid_targets_mean": 5925.6,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 5.286377708978328,
|
|
"grad_norm": 0.195638801915978,
|
|
"learning_rate": 6.2165577667635445e-06,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757023572921753,
|
|
"step": 3415,
|
|
"valid_targets_mean": 5658.9,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 5.294117647058823,
|
|
"grad_norm": 0.20530291760795438,
|
|
"learning_rate": 6.163649116878225e-06,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996949851512909,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5660.4,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 5.301857585139319,
|
|
"grad_norm": 0.1873481059749738,
|
|
"learning_rate": 6.110925536343335e-06,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26079216599464417,
|
|
"step": 3425,
|
|
"valid_targets_mean": 6256.5,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 5.309597523219814,
|
|
"grad_norm": 0.21090363813991392,
|
|
"learning_rate": 6.058387730367836e-06,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2864122688770294,
|
|
"step": 3430,
|
|
"valid_targets_mean": 5626.4,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 5.317337461300309,
|
|
"grad_norm": 0.21223153196520655,
|
|
"learning_rate": 6.006036401675854e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27753371000289917,
|
|
"step": 3435,
|
|
"valid_targets_mean": 5776.4,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 5.325077399380805,
|
|
"grad_norm": 0.19681108294792513,
|
|
"learning_rate": 5.953872250497288e-06,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27904945611953735,
|
|
"step": 3440,
|
|
"valid_targets_mean": 5494.7,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 5.3328173374613,
|
|
"grad_norm": 0.18765774702077823,
|
|
"learning_rate": 5.901895974558405e-06,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753779888153076,
|
|
"step": 3445,
|
|
"valid_targets_mean": 6014.0,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 5.340557275541796,
|
|
"grad_norm": 0.1792203238973369,
|
|
"learning_rate": 5.850108269072532e-06,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28831207752227783,
|
|
"step": 3450,
|
|
"valid_targets_mean": 6489.6,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 5.348297213622291,
|
|
"grad_norm": 0.23403501837601848,
|
|
"learning_rate": 5.798509826730778e-06,
|
|
"loss": 0.3739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3104645013809204,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3702.6,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 5.356037151702786,
|
|
"grad_norm": 0.18114046054539248,
|
|
"learning_rate": 5.747101337692722e-06,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2728242874145508,
|
|
"step": 3460,
|
|
"valid_targets_mean": 5997.3,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 5.363777089783282,
|
|
"grad_norm": 0.20789785969522756,
|
|
"learning_rate": 5.695883489577208e-06,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29742875695228577,
|
|
"step": 3465,
|
|
"valid_targets_mean": 4160.1,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 5.371517027863777,
|
|
"grad_norm": 0.16771492967164323,
|
|
"learning_rate": 5.644856967453159e-06,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26888614892959595,
|
|
"step": 3470,
|
|
"valid_targets_mean": 6533.9,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 5.379256965944273,
|
|
"grad_norm": 0.17143677820496292,
|
|
"learning_rate": 5.59402245383039e-06,
|
|
"loss": 0.3499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27564138174057007,
|
|
"step": 3475,
|
|
"valid_targets_mean": 6688.6,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 5.386996904024768,
|
|
"grad_norm": 0.18320577044853478,
|
|
"learning_rate": 5.543380628650488e-06,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2952688932418823,
|
|
"step": 3480,
|
|
"valid_targets_mean": 6724.0,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 5.394736842105263,
|
|
"grad_norm": 0.18695286448133377,
|
|
"learning_rate": 5.492932169277721e-06,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2814313769340515,
|
|
"step": 3485,
|
|
"valid_targets_mean": 6117.0,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 5.402476780185759,
|
|
"grad_norm": 0.17440517134147482,
|
|
"learning_rate": 5.442677750489971e-06,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26593172550201416,
|
|
"step": 3490,
|
|
"valid_targets_mean": 6469.6,
|
|
"valid_targets_min": 209
|
|
},
|
|
{
|
|
"epoch": 5.410216718266254,
|
|
"grad_norm": 0.1806924760524341,
|
|
"learning_rate": 5.3926180444697285e-06,
|
|
"loss": 0.3594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29063111543655396,
|
|
"step": 3495,
|
|
"valid_targets_mean": 6510.8,
|
|
"valid_targets_min": 206
|
|
},
|
|
{
|
|
"epoch": 5.41795665634675,
|
|
"grad_norm": 0.19105222681262465,
|
|
"learning_rate": 5.342753720795069e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27752232551574707,
|
|
"step": 3500,
|
|
"valid_targets_mean": 5183.3,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 5.425696594427245,
|
|
"grad_norm": 0.18298795064439732,
|
|
"learning_rate": 5.293085446430712e-06,
|
|
"loss": 0.3866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273588091135025,
|
|
"step": 3505,
|
|
"valid_targets_mean": 6249.5,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 5.43343653250774,
|
|
"grad_norm": 0.19779460820575848,
|
|
"learning_rate": 5.243613885719125e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806493043899536,
|
|
"step": 3510,
|
|
"valid_targets_mean": 5410.6,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 5.4411764705882355,
|
|
"grad_norm": 0.18101824811322353,
|
|
"learning_rate": 5.194339700371591e-06,
|
|
"loss": 0.3822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708035707473755,
|
|
"step": 3515,
|
|
"valid_targets_mean": 6375.5,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 5.4489164086687305,
|
|
"grad_norm": 0.24003662889244576,
|
|
"learning_rate": 5.145263549459383e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3036267161369324,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3327.1,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 5.456656346749226,
|
|
"grad_norm": 0.7170332403708661,
|
|
"learning_rate": 5.0963860894049655e-06,
|
|
"loss": 0.5142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48753973841667175,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3046.5,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 5.464396284829721,
|
|
"grad_norm": 0.4206141730370214,
|
|
"learning_rate": 5.047707973973175e-06,
|
|
"loss": 0.4777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4659736156463623,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3748.6,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 5.472136222910216,
|
|
"grad_norm": 0.31226269508956145,
|
|
"learning_rate": 4.999229854262499e-06,
|
|
"loss": 0.4676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4414464235305786,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3423.9,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 5.479876160990712,
|
|
"grad_norm": 0.24783609212541963,
|
|
"learning_rate": 4.9509523786963834e-06,
|
|
"loss": 0.4664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42238691449165344,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4002.1,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 5.487616099071207,
|
|
"grad_norm": 0.4900660106042238,
|
|
"learning_rate": 4.902876193014519e-06,
|
|
"loss": 0.4713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5052697658538818,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3102.1,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 5.495356037151703,
|
|
"grad_norm": 0.3216191606364154,
|
|
"learning_rate": 4.855001940264226e-06,
|
|
"loss": 0.4507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4479552209377289,
|
|
"step": 3550,
|
|
"valid_targets_mean": 2004.2,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 5.503095975232198,
|
|
"grad_norm": 0.3302208768470444,
|
|
"learning_rate": 4.807330260791874e-06,
|
|
"loss": 0.4731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4769449830055237,
|
|
"step": 3555,
|
|
"valid_targets_mean": 2664.8,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 5.510835913312693,
|
|
"grad_norm": 0.2620213606002208,
|
|
"learning_rate": 4.759861792234273e-06,
|
|
"loss": 0.4607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47559654712677,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3044.0,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 5.518575851393189,
|
|
"grad_norm": 0.21912979692260454,
|
|
"learning_rate": 4.712597169510169e-06,
|
|
"loss": 0.4528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4707758128643036,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3938.2,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 5.526315789473684,
|
|
"grad_norm": 0.2336401576044844,
|
|
"learning_rate": 4.66553702481177e-06,
|
|
"loss": 0.4609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.469599187374115,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3717.0,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 5.534055727554179,
|
|
"grad_norm": 0.21825298051270284,
|
|
"learning_rate": 4.61868198759625e-06,
|
|
"loss": 0.4461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4329976439476013,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4406.2,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 5.541795665634675,
|
|
"grad_norm": 0.24377513016770846,
|
|
"learning_rate": 4.57203268457735e-06,
|
|
"loss": 0.4679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5108462572097778,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3612.3,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 5.54953560371517,
|
|
"grad_norm": 0.28343129271115425,
|
|
"learning_rate": 4.525589739717011e-06,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44696491956710815,
|
|
"step": 3585,
|
|
"valid_targets_mean": 2256.9,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 5.557275541795666,
|
|
"grad_norm": 0.2634902677381947,
|
|
"learning_rate": 4.479353774216997e-06,
|
|
"loss": 0.4559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46883776783943176,
|
|
"step": 3590,
|
|
"valid_targets_mean": 2972.2,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 5.565015479876161,
|
|
"grad_norm": 0.6298060593569906,
|
|
"learning_rate": 4.433325406510598e-06,
|
|
"loss": 0.5033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5742595195770264,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3760.0,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 5.572755417956657,
|
|
"grad_norm": 0.44016284675067924,
|
|
"learning_rate": 4.38750525225438e-06,
|
|
"loss": 0.587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5365135669708252,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4103.2,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 5.580495356037152,
|
|
"grad_norm": 0.3067661947747438,
|
|
"learning_rate": 4.341893924319918e-06,
|
|
"loss": 0.6035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5996137857437134,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3301.9,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 5.588235294117647,
|
|
"grad_norm": 0.28399430177216545,
|
|
"learning_rate": 4.296492032785602e-06,
|
|
"loss": 0.5865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5685585737228394,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4432.9,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 5.595975232198143,
|
|
"grad_norm": 0.24775958043412608,
|
|
"learning_rate": 4.251300184928515e-06,
|
|
"loss": 0.5757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5673257112503052,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4270.7,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 5.603715170278638,
|
|
"grad_norm": 0.2389772509919841,
|
|
"learning_rate": 4.2063189852162556e-06,
|
|
"loss": 0.5774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5860211253166199,
|
|
"step": 3620,
|
|
"valid_targets_mean": 4288.7,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 5.611455108359133,
|
|
"grad_norm": 0.22859735942036888,
|
|
"learning_rate": 4.161549035298893e-06,
|
|
"loss": 0.5787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.556627094745636,
|
|
"step": 3625,
|
|
"valid_targets_mean": 4493.8,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 5.6191950464396285,
|
|
"grad_norm": 0.2065612771298074,
|
|
"learning_rate": 4.116990934000897e-06,
|
|
"loss": 0.5623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5249226689338684,
|
|
"step": 3630,
|
|
"valid_targets_mean": 4781.4,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 5.6269349845201235,
|
|
"grad_norm": 0.22654687040021215,
|
|
"learning_rate": 4.0726452773131434e-06,
|
|
"loss": 0.5765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5509902238845825,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4028.6,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 5.634674922600619,
|
|
"grad_norm": 0.2702463775067581,
|
|
"learning_rate": 4.02851265838494e-06,
|
|
"loss": 0.5656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5888859033584595,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3116.2,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 5.642414860681114,
|
|
"grad_norm": 0.22797384834728007,
|
|
"learning_rate": 3.984593667516079e-06,
|
|
"loss": 0.5821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6064164042472839,
|
|
"step": 3645,
|
|
"valid_targets_mean": 4106.4,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 5.65015479876161,
|
|
"grad_norm": 0.24289931931570444,
|
|
"learning_rate": 3.940888892148956e-06,
|
|
"loss": 0.5755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5621261596679688,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3609.7,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 5.657894736842105,
|
|
"grad_norm": 0.22029515118470755,
|
|
"learning_rate": 3.897398916860711e-06,
|
|
"loss": 0.5862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5754674077033997,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4175.3,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 5.6656346749226,
|
|
"grad_norm": 0.23212114028891812,
|
|
"learning_rate": 3.8541243233554035e-06,
|
|
"loss": 0.5658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5541927814483643,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3942.0,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 5.673374613003096,
|
|
"grad_norm": 0.24191373813602807,
|
|
"learning_rate": 3.811065690456228e-06,
|
|
"loss": 0.5841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5798615217208862,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3787.9,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.681114551083591,
|
|
"grad_norm": 0.6101589491962623,
|
|
"learning_rate": 3.768223594097795e-06,
|
|
"loss": 0.5419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5063816905021667,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3564.7,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 5.688854489164086,
|
|
"grad_norm": 0.3552734294095527,
|
|
"learning_rate": 3.725598607318397e-06,
|
|
"loss": 0.4897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46273231506347656,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4363.4,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 5.696594427244582,
|
|
"grad_norm": 0.27801406869372164,
|
|
"learning_rate": 3.6831913002523556e-06,
|
|
"loss": 0.4785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45543885231018066,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3516.3,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 5.704334365325077,
|
|
"grad_norm": 0.25240486544734686,
|
|
"learning_rate": 3.6410022401224175e-06,
|
|
"loss": 0.4795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4594934582710266,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4421.9,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 5.712074303405573,
|
|
"grad_norm": 0.2757068497440848,
|
|
"learning_rate": 3.599031991232127e-06,
|
|
"loss": 0.4827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4860798418521881,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3265.3,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 5.719814241486068,
|
|
"grad_norm": 0.21533570759700538,
|
|
"learning_rate": 3.557281114958302e-06,
|
|
"loss": 0.4607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42976775765419006,
|
|
"step": 3695,
|
|
"valid_targets_mean": 4266.4,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 5.727554179566564,
|
|
"grad_norm": 0.24107940087397856,
|
|
"learning_rate": 3.5157501697435393e-06,
|
|
"loss": 0.4835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48572295904159546,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3498.2,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 5.735294117647059,
|
|
"grad_norm": 0.20742111952653958,
|
|
"learning_rate": 3.4744397110887105e-06,
|
|
"loss": 0.4659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46599340438842773,
|
|
"step": 3705,
|
|
"valid_targets_mean": 4387.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 5.743034055727554,
|
|
"grad_norm": 0.24339168121245688,
|
|
"learning_rate": 3.4333502915455496e-06,
|
|
"loss": 0.4716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4663081169128418,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3467.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 5.75077399380805,
|
|
"grad_norm": 0.2030068764779058,
|
|
"learning_rate": 3.3924824607092766e-06,
|
|
"loss": 0.4579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46148619055747986,
|
|
"step": 3715,
|
|
"valid_targets_mean": 4467.1,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 5.758513931888545,
|
|
"grad_norm": 0.23720225443473478,
|
|
"learning_rate": 3.3518367652112205e-06,
|
|
"loss": 0.4767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49928000569343567,
|
|
"step": 3720,
|
|
"valid_targets_mean": 3536.6,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 5.76625386996904,
|
|
"grad_norm": 0.21388490791029582,
|
|
"learning_rate": 3.3114137487115162e-06,
|
|
"loss": 0.4691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4830447733402252,
|
|
"step": 3725,
|
|
"valid_targets_mean": 4088.9,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 5.773993808049536,
|
|
"grad_norm": 0.24042350766690682,
|
|
"learning_rate": 3.2712139518918474e-06,
|
|
"loss": 0.4722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4742598831653595,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3560.9,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 5.781733746130031,
|
|
"grad_norm": 0.20045278236118882,
|
|
"learning_rate": 3.2312379124481973e-06,
|
|
"loss": 0.4676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4577088952064514,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4477.0,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 5.7894736842105265,
|
|
"grad_norm": 0.25309880380871713,
|
|
"learning_rate": 3.1914861650836525e-06,
|
|
"loss": 0.4664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4756113588809967,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3193.1,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 5.7972136222910216,
|
|
"grad_norm": 0.2201716814517496,
|
|
"learning_rate": 3.1519592415012833e-06,
|
|
"loss": 0.4777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46364396810531616,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4601.6,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 5.804953560371517,
|
|
"grad_norm": 0.23649926644101738,
|
|
"learning_rate": 3.1126576703969834e-06,
|
|
"loss": 0.4679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.482467383146286,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3430.9,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 5.812693498452012,
|
|
"grad_norm": 0.2214534868161132,
|
|
"learning_rate": 3.0735819774524467e-06,
|
|
"loss": 0.4554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4528377056121826,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3828.6,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 5.820433436532507,
|
|
"grad_norm": 0.24515891846005014,
|
|
"learning_rate": 3.034732685328101e-06,
|
|
"loss": 0.5454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46225646138191223,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3390.7,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 5.828173374613003,
|
|
"grad_norm": 0.22072791719945636,
|
|
"learning_rate": 2.9961103136561334e-06,
|
|
"loss": 0.4656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45086175203323364,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3718.5,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 5.835913312693498,
|
|
"grad_norm": 0.25238421634676017,
|
|
"learning_rate": 2.95771537903353e-06,
|
|
"loss": 0.5577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4524438977241516,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3193.4,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 5.843653250773993,
|
|
"grad_norm": 0.23195850076574098,
|
|
"learning_rate": 2.9195483950151836e-06,
|
|
"loss": 0.4588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45001834630966187,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3690.3,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 5.851393188854489,
|
|
"grad_norm": 0.2487896013000018,
|
|
"learning_rate": 2.8816098721069984e-06,
|
|
"loss": 0.5147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.477507621049881,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3144.1,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 5.859133126934984,
|
|
"grad_norm": 0.2283108946369156,
|
|
"learning_rate": 2.8439003177590963e-06,
|
|
"loss": 0.4585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4684329628944397,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3871.4,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 5.86687306501548,
|
|
"grad_norm": 0.25539787648745926,
|
|
"learning_rate": 2.8064202363589934e-06,
|
|
"loss": 0.5089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46779173612594604,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3256.5,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 5.874613003095975,
|
|
"grad_norm": 0.23033187572477296,
|
|
"learning_rate": 2.7691701292248716e-06,
|
|
"loss": 0.4605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47206708788871765,
|
|
"step": 3795,
|
|
"valid_targets_mean": 3665.0,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 5.882352941176471,
|
|
"grad_norm": 0.2505828414321148,
|
|
"learning_rate": 2.732150494598882e-06,
|
|
"loss": 0.5563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4846688210964203,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3457.3,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 5.890092879256966,
|
|
"grad_norm": 0.25343046197650915,
|
|
"learning_rate": 2.6953618276404613e-06,
|
|
"loss": 0.4528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44189953804016113,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3141.0,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 5.897832817337461,
|
|
"grad_norm": 0.23744120898748566,
|
|
"learning_rate": 2.658804620419715e-06,
|
|
"loss": 0.5418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46508336067199707,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3509.7,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 5.905572755417957,
|
|
"grad_norm": 0.24804173873195717,
|
|
"learning_rate": 2.622479361910848e-06,
|
|
"loss": 0.4644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4441119432449341,
|
|
"step": 3815,
|
|
"valid_targets_mean": 3106.4,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 5.913312693498452,
|
|
"grad_norm": 0.2665451371246972,
|
|
"learning_rate": 2.5863865379856055e-06,
|
|
"loss": 0.4964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4510113596916199,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3530.0,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 5.921052631578947,
|
|
"grad_norm": 0.24440748143275612,
|
|
"learning_rate": 2.550526631406782e-06,
|
|
"loss": 0.4649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4752050042152405,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3219.6,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 5.928792569659443,
|
|
"grad_norm": 0.7821932084740958,
|
|
"learning_rate": 2.5149001218217793e-06,
|
|
"loss": 0.4397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2791755795478821,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4948.2,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 5.936532507739938,
|
|
"grad_norm": 0.689274326323386,
|
|
"learning_rate": 2.4795074857561587e-06,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24057920277118683,
|
|
"step": 3835,
|
|
"valid_targets_mean": 4927.9,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 5.944272445820434,
|
|
"grad_norm": 0.6098355124995979,
|
|
"learning_rate": 2.444349196607301e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26120248436927795,
|
|
"step": 3840,
|
|
"valid_targets_mean": 4602.5,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 5.952012383900929,
|
|
"grad_norm": 0.3585836508498647,
|
|
"learning_rate": 2.409425724638048e-06,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22006279230117798,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4983.0,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 5.959752321981425,
|
|
"grad_norm": 0.3295021839597865,
|
|
"learning_rate": 2.3747375369704216e-06,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23593170940876007,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4184.5,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 5.96749226006192,
|
|
"grad_norm": 0.3026012608189533,
|
|
"learning_rate": 2.3402850975793933e-06,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22847211360931396,
|
|
"step": 3855,
|
|
"valid_targets_mean": 4258.2,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 5.975232198142415,
|
|
"grad_norm": 0.3595328387325092,
|
|
"learning_rate": 2.3060688672866436e-06,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22959434986114502,
|
|
"step": 3860,
|
|
"valid_targets_mean": 4959.4,
|
|
"valid_targets_min": 2049
|
|
},
|
|
{
|
|
"epoch": 5.9829721362229105,
|
|
"grad_norm": 0.7295808308908135,
|
|
"learning_rate": 2.2720893037544256e-06,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23389963805675507,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3119.1,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 5.9907120743034055,
|
|
"grad_norm": 0.5650390704002861,
|
|
"learning_rate": 2.238346861479439e-06,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21749567985534668,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2866.9,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 5.9984520123839005,
|
|
"grad_norm": 0.40816923827924784,
|
|
"learning_rate": 2.204841991786739e-06,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23147590458393097,
|
|
"step": 3875,
|
|
"valid_targets_mean": 3248.7,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 6.006191950464396,
|
|
"grad_norm": 0.7291156903672835,
|
|
"learning_rate": 2.17157514282371e-06,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24925114214420319,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3799.7,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 6.013931888544891,
|
|
"grad_norm": 0.5260919366108272,
|
|
"learning_rate": 2.138546759554072e-06,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24600772559642792,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3723.8,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 6.021671826625387,
|
|
"grad_norm": 0.3723112864888638,
|
|
"learning_rate": 2.105757283751926e-06,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24158604443073273,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3488.4,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 6.029411764705882,
|
|
"grad_norm": 0.31028876289795915,
|
|
"learning_rate": 2.0732071539958333e-06,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2264404296875,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3832.2,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 6.037151702786378,
|
|
"grad_norm": 0.28034887895883914,
|
|
"learning_rate": 2.04089680566298e-06,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2235153615474701,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3689.5,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 6.044891640866873,
|
|
"grad_norm": 0.24488025943334588,
|
|
"learning_rate": 2.0088266709233185e-06,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2193290889263153,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3592.6,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 6.052631578947368,
|
|
"grad_norm": 0.2339134884248038,
|
|
"learning_rate": 1.9769971787338105e-06,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2263258993625641,
|
|
"step": 3910,
|
|
"valid_targets_mean": 3646.3,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 6.060371517027864,
|
|
"grad_norm": 0.21428445248705047,
|
|
"learning_rate": 1.945408754832676e-06,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22564566135406494,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3811.8,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 6.068111455108359,
|
|
"grad_norm": 0.22452809167389198,
|
|
"learning_rate": 1.9140618217337083e-06,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23167350888252258,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3824.1,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 6.075851393188855,
|
|
"grad_norm": 0.2463111946693812,
|
|
"learning_rate": 1.8829567987206232e-06,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21592099964618683,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3658.0,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 6.08359133126935,
|
|
"grad_norm": 0.2065926156962835,
|
|
"learning_rate": 1.8520941018414374e-06,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2253035306930542,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3583.1,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 6.091331269349845,
|
|
"grad_norm": 0.4700973971020121,
|
|
"learning_rate": 1.8214741439029148e-06,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2691056430339813,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4880.6,
|
|
"valid_targets_min": 3320
|
|
},
|
|
{
|
|
"epoch": 6.099071207430341,
|
|
"grad_norm": 0.8850257104525197,
|
|
"learning_rate": 1.7910973344650551e-06,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4290119409561157,
|
|
"step": 3940,
|
|
"valid_targets_mean": 978.9,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 6.106811145510836,
|
|
"grad_norm": 0.28346540950291677,
|
|
"learning_rate": 1.7609640798355832e-06,
|
|
"loss": 0.3383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25688430666923523,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4807.5,
|
|
"valid_targets_min": 2887
|
|
},
|
|
{
|
|
"epoch": 6.114551083591332,
|
|
"grad_norm": 0.6378797144431667,
|
|
"learning_rate": 1.731074783064548e-06,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46209287643432617,
|
|
"step": 3950,
|
|
"valid_targets_mean": 870.8,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 6.122291021671827,
|
|
"grad_norm": 0.2755630344354851,
|
|
"learning_rate": 1.7014298439389198e-06,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551175355911255,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4695.3,
|
|
"valid_targets_min": 3169
|
|
},
|
|
{
|
|
"epoch": 6.130030959752322,
|
|
"grad_norm": 0.5465239623940196,
|
|
"learning_rate": 1.6720296589772323e-06,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44647932052612305,
|
|
"step": 3960,
|
|
"valid_targets_mean": 994.2,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 6.137770897832818,
|
|
"grad_norm": 0.1963243596923511,
|
|
"learning_rate": 1.6428746214242908e-06,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24854975938796997,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4788.1,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 6.145510835913313,
|
|
"grad_norm": 0.23925681970801946,
|
|
"learning_rate": 1.6139651212459173e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27736037969589233,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3206.7,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 6.153250773993808,
|
|
"grad_norm": 0.19526821239590325,
|
|
"learning_rate": 1.5853015451237164e-06,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2648370862007141,
|
|
"step": 3975,
|
|
"valid_targets_mean": 4860.4,
|
|
"valid_targets_min": 3021
|
|
},
|
|
{
|
|
"epoch": 6.1609907120743035,
|
|
"grad_norm": 0.17901003628384335,
|
|
"learning_rate": 1.5568842764499149e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25796425342559814,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4770.2,
|
|
"valid_targets_min": 2896
|
|
},
|
|
{
|
|
"epoch": 6.1687306501547985,
|
|
"grad_norm": 0.18355680383922257,
|
|
"learning_rate": 1.5287136953222436e-06,
|
|
"loss": 0.3601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2625921964645386,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4724.9,
|
|
"valid_targets_min": 3054
|
|
},
|
|
{
|
|
"epoch": 6.176470588235294,
|
|
"grad_norm": 0.20634320415097784,
|
|
"learning_rate": 1.500790178538829e-06,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26189669966697693,
|
|
"step": 3990,
|
|
"valid_targets_mean": 4654.0,
|
|
"valid_targets_min": 1994
|
|
},
|
|
{
|
|
"epoch": 6.184210526315789,
|
|
"grad_norm": 0.18320060274020078,
|
|
"learning_rate": 1.4731140995931648e-06,
|
|
"loss": 0.3776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25498661398887634,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4692.8,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 6.191950464396285,
|
|
"grad_norm": 0.17383132846140184,
|
|
"learning_rate": 1.4456858286691321e-06,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579241096973419,
|
|
"step": 4000,
|
|
"valid_targets_mean": 5052.9,
|
|
"valid_targets_min": 2772
|
|
},
|
|
{
|
|
"epoch": 6.19969040247678,
|
|
"grad_norm": 0.18053492970170026,
|
|
"learning_rate": 1.418505732636024e-06,
|
|
"loss": 0.3797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26143360137939453,
|
|
"step": 4005,
|
|
"valid_targets_mean": 5030.0,
|
|
"valid_targets_min": 3232
|
|
},
|
|
{
|
|
"epoch": 6.207430340557275,
|
|
"grad_norm": 0.16757443091222204,
|
|
"learning_rate": 1.3915741750436419e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25423550605773926,
|
|
"step": 4010,
|
|
"valid_targets_mean": 5112.5,
|
|
"valid_targets_min": 3319
|
|
},
|
|
{
|
|
"epoch": 6.215170278637771,
|
|
"grad_norm": 0.2658597423513511,
|
|
"learning_rate": 1.3648915161174547e-06,
|
|
"loss": 0.3881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27926871180534363,
|
|
"step": 4015,
|
|
"valid_targets_mean": 5787.9,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 6.222910216718266,
|
|
"grad_norm": 0.226772644460831,
|
|
"learning_rate": 1.3384581127537554e-06,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29047223925590515,
|
|
"step": 4020,
|
|
"valid_targets_mean": 6091.0,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 6.230650154798761,
|
|
"grad_norm": 0.23155465259221597,
|
|
"learning_rate": 1.31227431851489e-06,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898438572883606,
|
|
"step": 4025,
|
|
"valid_targets_mean": 5457.9,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 6.238390092879257,
|
|
"grad_norm": 0.21580118203410323,
|
|
"learning_rate": 1.2863404836245552e-06,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27747899293899536,
|
|
"step": 4030,
|
|
"valid_targets_mean": 5283.4,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 6.246130030959752,
|
|
"grad_norm": 0.20179198181754435,
|
|
"learning_rate": 1.2606569549630754e-06,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2699848413467407,
|
|
"step": 4035,
|
|
"valid_targets_mean": 6079.6,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 6.253869969040248,
|
|
"grad_norm": 0.20780526327723273,
|
|
"learning_rate": 1.2352240760627865e-06,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3023836016654968,
|
|
"step": 4040,
|
|
"valid_targets_mean": 5938.4,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 6.261609907120743,
|
|
"grad_norm": 0.2358042418220001,
|
|
"learning_rate": 1.210042187103444e-06,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2776799201965332,
|
|
"step": 4045,
|
|
"valid_targets_mean": 5991.7,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 6.269349845201239,
|
|
"grad_norm": 0.21389084088630184,
|
|
"learning_rate": 1.1851116249076554e-06,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2743082046508789,
|
|
"step": 4050,
|
|
"valid_targets_mean": 5768.7,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 6.277089783281734,
|
|
"grad_norm": 0.2526903154129935,
|
|
"learning_rate": 1.160432722936391e-06,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2764449119567871,
|
|
"step": 4055,
|
|
"valid_targets_mean": 5973.1,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 6.284829721362229,
|
|
"grad_norm": 0.21257688091062354,
|
|
"learning_rate": 1.1360058112845084e-06,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28229254484176636,
|
|
"step": 4060,
|
|
"valid_targets_mean": 5782.9,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 6.292569659442725,
|
|
"grad_norm": 0.20899291602873515,
|
|
"learning_rate": 1.1118312166763556e-06,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2762507498264313,
|
|
"step": 4065,
|
|
"valid_targets_mean": 5733.2,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 6.30030959752322,
|
|
"grad_norm": 0.21713612838525795,
|
|
"learning_rate": 1.0879092624613906e-06,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842179238796234,
|
|
"step": 4070,
|
|
"valid_targets_mean": 5674.5,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 6.308049535603715,
|
|
"grad_norm": 0.2039910928113309,
|
|
"learning_rate": 1.0642402686098507e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28843218088150024,
|
|
"step": 4075,
|
|
"valid_targets_mean": 5429.0,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 6.315789473684211,
|
|
"grad_norm": 0.19224365218790374,
|
|
"learning_rate": 1.0408245517084813e-06,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682873606681824,
|
|
"step": 4080,
|
|
"valid_targets_mean": 5852.8,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 6.323529411764706,
|
|
"grad_norm": 0.2065527692821168,
|
|
"learning_rate": 1.0176624249563073e-06,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2906603217124939,
|
|
"step": 4085,
|
|
"valid_targets_mean": 5466.6,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 6.3312693498452015,
|
|
"grad_norm": 0.19515211355143153,
|
|
"learning_rate": 9.947541981604258e-07,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659899592399597,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5432.9,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 6.3390092879256965,
|
|
"grad_norm": 0.1913097879398444,
|
|
"learning_rate": 9.721001777318738e-07,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2871350049972534,
|
|
"step": 4095,
|
|
"valid_targets_mean": 6355.9,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 6.346749226006192,
|
|
"grad_norm": 0.6303827656733666,
|
|
"learning_rate": 9.497006666815345e-07,
|
|
"loss": 0.3664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5261931419372559,
|
|
"step": 4100,
|
|
"valid_targets_mean": 805.4,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 6.354489164086687,
|
|
"grad_norm": 0.1791982041781785,
|
|
"learning_rate": 9.275559646160737e-07,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2809305787086487,
|
|
"step": 4105,
|
|
"valid_targets_mean": 6133.8,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 6.362229102167182,
|
|
"grad_norm": 0.6487679004490194,
|
|
"learning_rate": 9.056663677339306e-07,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.536277174949646,
|
|
"step": 4110,
|
|
"valid_targets_mean": 817.7,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 6.369969040247678,
|
|
"grad_norm": 0.16900952991743548,
|
|
"learning_rate": 8.840321688213783e-07,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26960551738739014,
|
|
"step": 4115,
|
|
"valid_targets_mean": 6547.3,
|
|
"valid_targets_min": 1808
|
|
},
|
|
{
|
|
"epoch": 6.377708978328173,
|
|
"grad_norm": 0.16426895963647536,
|
|
"learning_rate": 8.626536572485777e-07,
|
|
"loss": 0.3464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27813348174095154,
|
|
"step": 4120,
|
|
"valid_targets_mean": 6934.5,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 6.385448916408668,
|
|
"grad_norm": 0.17654840470496103,
|
|
"learning_rate": 8.415311189657193e-07,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2760644555091858,
|
|
"step": 4125,
|
|
"valid_targets_mean": 6416.0,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 6.393188854489164,
|
|
"grad_norm": 0.17398066108702462,
|
|
"learning_rate": 8.206648364992165e-07,
|
|
"loss": 0.3863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28320106863975525,
|
|
"step": 4130,
|
|
"valid_targets_mean": 6050.4,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 6.400928792569659,
|
|
"grad_norm": 0.1889308588041788,
|
|
"learning_rate": 8.000550889478931e-07,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26654744148254395,
|
|
"step": 4135,
|
|
"valid_targets_mean": 5863.9,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 6.408668730650155,
|
|
"grad_norm": 0.18610631361250526,
|
|
"learning_rate": 7.797021519792713e-07,
|
|
"loss": 0.3521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283447265625,
|
|
"step": 4140,
|
|
"valid_targets_mean": 6129.1,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 6.41640866873065,
|
|
"grad_norm": 0.17612060388117373,
|
|
"learning_rate": 7.596062978258878e-07,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270918071269989,
|
|
"step": 4145,
|
|
"valid_targets_mean": 6139.4,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 6.424148606811146,
|
|
"grad_norm": 0.19144618179300954,
|
|
"learning_rate": 7.397677952816362e-07,
|
|
"loss": 0.385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28544291853904724,
|
|
"step": 4150,
|
|
"valid_targets_mean": 6069.3,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 6.431888544891641,
|
|
"grad_norm": 0.18631608355027618,
|
|
"learning_rate": 7.201869096981839e-07,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27224114537239075,
|
|
"step": 4155,
|
|
"valid_targets_mean": 6344.3,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 6.439628482972136,
|
|
"grad_norm": 0.19702195585961055,
|
|
"learning_rate": 7.008639029814212e-07,
|
|
"loss": 0.3816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27386119961738586,
|
|
"step": 4160,
|
|
"valid_targets_mean": 6771.4,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 6.447368421052632,
|
|
"grad_norm": 0.18187091943265118,
|
|
"learning_rate": 6.817990335879532e-07,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707570791244507,
|
|
"step": 4165,
|
|
"valid_targets_mean": 6569.9,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 6.455108359133127,
|
|
"grad_norm": 0.8528880455241485,
|
|
"learning_rate": 6.629925565216489e-07,
|
|
"loss": 0.4733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49345722794532776,
|
|
"step": 4170,
|
|
"valid_targets_mean": 2007.4,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 6.462848297213622,
|
|
"grad_norm": 0.699488015653727,
|
|
"learning_rate": 6.444447233302287e-07,
|
|
"loss": 0.4807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49539169669151306,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3351.7,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 6.470588235294118,
|
|
"grad_norm": 0.6270608704495003,
|
|
"learning_rate": 6.261557821018938e-07,
|
|
"loss": 0.474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4838091731071472,
|
|
"step": 4180,
|
|
"valid_targets_mean": 3195.7,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 6.478328173374613,
|
|
"grad_norm": 0.5387591896066183,
|
|
"learning_rate": 6.08125977462013e-07,
|
|
"loss": 0.4717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44912034273147583,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3282.1,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 6.486068111455109,
|
|
"grad_norm": 0.4499016313928773,
|
|
"learning_rate": 5.903555505698588e-07,
|
|
"loss": 0.4553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4348664879798889,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3273.5,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 6.493808049535604,
|
|
"grad_norm": 0.3663129451282317,
|
|
"learning_rate": 5.728447391153679e-07,
|
|
"loss": 0.4627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4080617427825928,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3904.3,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 6.5015479876160995,
|
|
"grad_norm": 0.36106339613529814,
|
|
"learning_rate": 5.555937773159614e-07,
|
|
"loss": 0.4671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4984878897666931,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3417.9,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 6.5092879256965945,
|
|
"grad_norm": 0.38104294266011846,
|
|
"learning_rate": 5.386028959134293e-07,
|
|
"loss": 0.4605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4836174547672272,
|
|
"step": 4205,
|
|
"valid_targets_mean": 2151.9,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 6.5170278637770895,
|
|
"grad_norm": 0.315701416188697,
|
|
"learning_rate": 5.2187232217082e-07,
|
|
"loss": 0.4531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43265655636787415,
|
|
"step": 4210,
|
|
"valid_targets_mean": 2757.0,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 6.524767801857585,
|
|
"grad_norm": 0.3241514534299132,
|
|
"learning_rate": 5.054022798694313e-07,
|
|
"loss": 0.4602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.470084011554718,
|
|
"step": 4215,
|
|
"valid_targets_mean": 2699.5,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 6.53250773993808,
|
|
"grad_norm": 0.26159511654460443,
|
|
"learning_rate": 4.8919298930578e-07,
|
|
"loss": 0.4523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4371451735496521,
|
|
"step": 4220,
|
|
"valid_targets_mean": 3419.5,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 6.540247678018575,
|
|
"grad_norm": 0.2504235833919837,
|
|
"learning_rate": 4.732446672886881e-07,
|
|
"loss": 0.4512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4491339921951294,
|
|
"step": 4225,
|
|
"valid_targets_mean": 3438.5,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 6.547987616099071,
|
|
"grad_norm": 0.22720432460548975,
|
|
"learning_rate": 4.575575271363675e-07,
|
|
"loss": 0.4554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4360339641571045,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4330.6,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 6.555727554179566,
|
|
"grad_norm": 0.28294511225608837,
|
|
"learning_rate": 4.421317786735624e-07,
|
|
"loss": 0.4502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4861607551574707,
|
|
"step": 4235,
|
|
"valid_targets_mean": 2712.3,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 6.563467492260062,
|
|
"grad_norm": 0.6707297493719596,
|
|
"learning_rate": 4.269676282287494e-07,
|
|
"loss": 0.4812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6213827729225159,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3619.0,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 6.571207430340557,
|
|
"grad_norm": 0.6214045190705632,
|
|
"learning_rate": 4.1206527863139057e-07,
|
|
"loss": 0.5954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5824170708656311,
|
|
"step": 4245,
|
|
"valid_targets_mean": 3834.9,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 6.578947368421053,
|
|
"grad_norm": 0.6112074026597619,
|
|
"learning_rate": 3.974249292091892e-07,
|
|
"loss": 0.594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5758994817733765,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3756.7,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 6.586687306501548,
|
|
"grad_norm": 0.5413229105025136,
|
|
"learning_rate": 3.830467757854539e-07,
|
|
"loss": 0.5967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5864338278770447,
|
|
"step": 4255,
|
|
"valid_targets_mean": 3851.6,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 6.594427244582043,
|
|
"grad_norm": 0.5819276533348205,
|
|
"learning_rate": 3.689310106764632e-07,
|
|
"loss": 0.5799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6016057729721069,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3610.4,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 6.602167182662539,
|
|
"grad_norm": 0.43939760248910475,
|
|
"learning_rate": 3.550778226888985e-07,
|
|
"loss": 0.5774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5410871505737305,
|
|
"step": 4265,
|
|
"valid_targets_mean": 4324.1,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 6.609907120743034,
|
|
"grad_norm": 0.4414282999708507,
|
|
"learning_rate": 3.414873971173127e-07,
|
|
"loss": 0.5881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5685605406761169,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3782.9,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 6.617647058823529,
|
|
"grad_norm": 0.37817173468347903,
|
|
"learning_rate": 3.2815991574166375e-07,
|
|
"loss": 0.572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5508712530136108,
|
|
"step": 4275,
|
|
"valid_targets_mean": 4237.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 6.625386996904025,
|
|
"grad_norm": 0.35494446629614074,
|
|
"learning_rate": 3.150955568248715e-07,
|
|
"loss": 0.5742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5618966817855835,
|
|
"step": 4280,
|
|
"valid_targets_mean": 4379.6,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 6.63312693498452,
|
|
"grad_norm": 0.32391073129637493,
|
|
"learning_rate": 3.022944951104356e-07,
|
|
"loss": 0.5608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5062550902366638,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4701.0,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 6.640866873065016,
|
|
"grad_norm": 0.3204909549379454,
|
|
"learning_rate": 2.8975690182011075e-07,
|
|
"loss": 0.5814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5509536266326904,
|
|
"step": 4290,
|
|
"valid_targets_mean": 4621.8,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 6.648606811145511,
|
|
"grad_norm": 0.3647704275879708,
|
|
"learning_rate": 2.7748294465159254e-07,
|
|
"loss": 0.5869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6267940998077393,
|
|
"step": 4295,
|
|
"valid_targets_mean": 3181.1,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 6.656346749226007,
|
|
"grad_norm": 0.31426260151044333,
|
|
"learning_rate": 2.6547278777629527e-07,
|
|
"loss": 0.5858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6311694383621216,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4021.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 6.664086687306502,
|
|
"grad_norm": 0.33599295900021175,
|
|
"learning_rate": 2.53726591837149e-07,
|
|
"loss": 0.5722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6311732530593872,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3290.7,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 6.671826625386997,
|
|
"grad_norm": 0.3475426941422704,
|
|
"learning_rate": 2.42244513946448e-07,
|
|
"loss": 0.5813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6119974851608276,
|
|
"step": 4310,
|
|
"valid_targets_mean": 4297.4,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 6.679566563467493,
|
|
"grad_norm": 0.45164109717828255,
|
|
"learning_rate": 2.310267076837569e-07,
|
|
"loss": 0.5566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4921790063381195,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3676.8,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 6.687306501547988,
|
|
"grad_norm": 0.4723806465235738,
|
|
"learning_rate": 2.2007332309384566e-07,
|
|
"loss": 0.4951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5073189735412598,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3877.6,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 6.695046439628483,
|
|
"grad_norm": 0.46646701615227143,
|
|
"learning_rate": 2.0938450668469112e-07,
|
|
"loss": 0.4788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5121541619300842,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3730.8,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 6.7027863777089784,
|
|
"grad_norm": 0.43132502426183134,
|
|
"learning_rate": 1.989604014255142e-07,
|
|
"loss": 0.4785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4603354036808014,
|
|
"step": 4330,
|
|
"valid_targets_mean": 3764.0,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 6.7105263157894735,
|
|
"grad_norm": 0.40909818551913196,
|
|
"learning_rate": 1.8880114674486804e-07,
|
|
"loss": 0.4773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4582119882106781,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3619.3,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 6.718266253869969,
|
|
"grad_norm": 0.4004083105802111,
|
|
"learning_rate": 1.789068785287751e-07,
|
|
"loss": 0.4722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4720708727836609,
|
|
"step": 4340,
|
|
"valid_targets_mean": 3383.6,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 6.726006191950464,
|
|
"grad_norm": 0.38058963339368435,
|
|
"learning_rate": 1.6927772911890406e-07,
|
|
"loss": 0.4725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4877032935619354,
|
|
"step": 4345,
|
|
"valid_targets_mean": 3617.2,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 6.73374613003096,
|
|
"grad_norm": 0.3597926802536988,
|
|
"learning_rate": 1.5991382731080473e-07,
|
|
"loss": 0.4701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45819124579429626,
|
|
"step": 4350,
|
|
"valid_targets_mean": 3628.0,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 6.741486068111455,
|
|
"grad_norm": 0.3487027232074482,
|
|
"learning_rate": 1.508152983521871e-07,
|
|
"loss": 0.4717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4666120707988739,
|
|
"step": 4355,
|
|
"valid_targets_mean": 3666.4,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 6.74922600619195,
|
|
"grad_norm": 0.3404120701144106,
|
|
"learning_rate": 1.4198226394123827e-07,
|
|
"loss": 0.459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4533484876155853,
|
|
"step": 4360,
|
|
"valid_targets_mean": 3281.1,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 6.756965944272446,
|
|
"grad_norm": 0.3094167758395204,
|
|
"learning_rate": 1.3341484222500368e-07,
|
|
"loss": 0.4692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45932602882385254,
|
|
"step": 4365,
|
|
"valid_targets_mean": 3577.5,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 6.764705882352941,
|
|
"grad_norm": 0.35010452061820874,
|
|
"learning_rate": 1.2511314779779515e-07,
|
|
"loss": 0.4725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4337707757949829,
|
|
"step": 4370,
|
|
"valid_targets_mean": 3314.5,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 6.772445820433436,
|
|
"grad_norm": 0.3121231927805868,
|
|
"learning_rate": 1.170772916996743e-07,
|
|
"loss": 0.474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4616796374320984,
|
|
"step": 4375,
|
|
"valid_targets_mean": 3562.3,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 6.780185758513932,
|
|
"grad_norm": 0.3089208769196132,
|
|
"learning_rate": 1.0930738141495145e-07,
|
|
"loss": 0.4709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46547895669937134,
|
|
"step": 4380,
|
|
"valid_targets_mean": 3536.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 6.787925696594427,
|
|
"grad_norm": 0.29875151854857207,
|
|
"learning_rate": 1.0180352087075573e-07,
|
|
"loss": 0.4628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4715871214866638,
|
|
"step": 4385,
|
|
"valid_targets_mean": 3570.4,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 6.795665634674923,
|
|
"grad_norm": 0.2970536821128796,
|
|
"learning_rate": 9.456581043565172e-08,
|
|
"loss": 0.4801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4667375981807709,
|
|
"step": 4390,
|
|
"valid_targets_mean": 3647.4,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 6.803405572755418,
|
|
"grad_norm": 0.2928285252990381,
|
|
"learning_rate": 8.759434691828051e-08,
|
|
"loss": 0.4641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46019408106803894,
|
|
"step": 4395,
|
|
"valid_targets_mean": 3439.6,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 6.811145510835914,
|
|
"grad_norm": 0.30341035170663566,
|
|
"learning_rate": 8.08892235660741e-08,
|
|
"loss": 0.4611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42000389099121094,
|
|
"step": 4400,
|
|
"valid_targets_mean": 3415.4,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 6.818885448916409,
|
|
"grad_norm": 0.295876982601661,
|
|
"learning_rate": 7.445053006401637e-08,
|
|
"loss": 0.5433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45638465881347656,
|
|
"step": 4405,
|
|
"valid_targets_mean": 3413.9,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 6.826625386996904,
|
|
"grad_norm": 0.2663084765881604,
|
|
"learning_rate": 6.827835253342185e-08,
|
|
"loss": 0.4678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4524911046028137,
|
|
"step": 4410,
|
|
"valid_targets_mean": 3740.7,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 6.8343653250774,
|
|
"grad_norm": 0.27849026600740684,
|
|
"learning_rate": 6.237277353080995e-08,
|
|
"loss": 0.5571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4894692897796631,
|
|
"step": 4415,
|
|
"valid_targets_mean": 3384.0,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 6.842105263157895,
|
|
"grad_norm": 0.284407912697405,
|
|
"learning_rate": 5.673387204677694e-08,
|
|
"loss": 0.4592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45752373337745667,
|
|
"step": 4420,
|
|
"valid_targets_mean": 3341.3,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 6.84984520123839,
|
|
"grad_norm": 0.264041596375113,
|
|
"learning_rate": 5.136172350495683e-08,
|
|
"loss": 0.509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4639412760734558,
|
|
"step": 4425,
|
|
"valid_targets_mean": 3684.2,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 6.857585139318886,
|
|
"grad_norm": 0.29186245871808064,
|
|
"learning_rate": 4.62563997610066e-08,
|
|
"loss": 0.4603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44907432794570923,
|
|
"step": 4430,
|
|
"valid_targets_mean": 3157.4,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 6.865325077399381,
|
|
"grad_norm": 0.29347563396442583,
|
|
"learning_rate": 4.141796910163587e-08,
|
|
"loss": 0.5089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49080267548561096,
|
|
"step": 4435,
|
|
"valid_targets_mean": 3240.1,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 6.8730650154798765,
|
|
"grad_norm": 0.27314426926013186,
|
|
"learning_rate": 3.684649624370762e-08,
|
|
"loss": 0.4595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4597470164299011,
|
|
"step": 4440,
|
|
"valid_targets_mean": 3223.1,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 6.8808049535603715,
|
|
"grad_norm": 0.2478970727415698,
|
|
"learning_rate": 3.2542042333369994e-08,
|
|
"loss": 0.5537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4760324954986572,
|
|
"step": 4445,
|
|
"valid_targets_mean": 3842.8,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 6.888544891640867,
|
|
"grad_norm": 0.27597667107207946,
|
|
"learning_rate": 2.85046649452303e-08,
|
|
"loss": 0.4611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46712955832481384,
|
|
"step": 4450,
|
|
"valid_targets_mean": 3401.7,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 6.896284829721362,
|
|
"grad_norm": 0.27249460486331994,
|
|
"learning_rate": 2.473441808159116e-08,
|
|
"loss": 0.537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5251383781433105,
|
|
"step": 4455,
|
|
"valid_targets_mean": 3138.4,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 6.904024767801857,
|
|
"grad_norm": 0.27420775593874513,
|
|
"learning_rate": 2.1231352171726672e-08,
|
|
"loss": 0.4684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4861498475074768,
|
|
"step": 4460,
|
|
"valid_targets_mean": 3528.3,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 6.911764705882353,
|
|
"grad_norm": 0.28902087786383834,
|
|
"learning_rate": 1.7995514071209585e-08,
|
|
"loss": 0.4947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.560650646686554,
|
|
"step": 4465,
|
|
"valid_targets_mean": 2719.7,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 6.919504643962848,
|
|
"grad_norm": 0.26124140734602724,
|
|
"learning_rate": 1.502694706128294e-08,
|
|
"loss": 0.4598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4720116853713989,
|
|
"step": 4470,
|
|
"valid_targets_mean": 3570.4,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 6.927244582043343,
|
|
"grad_norm": 0.6990116106430034,
|
|
"learning_rate": 1.2325690848278282e-08,
|
|
"loss": 0.4774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47434142231941223,
|
|
"step": 4475,
|
|
"valid_targets_mean": 2341.9,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 6.934984520123839,
|
|
"grad_norm": 0.7575214019309366,
|
|
"learning_rate": 9.891781563091674e-09,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2614922821521759,
|
|
"step": 4480,
|
|
"valid_targets_mean": 4556.8,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 6.942724458204334,
|
|
"grad_norm": 0.9048364639257506,
|
|
"learning_rate": 7.725251760692942e-09,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26444995403289795,
|
|
"step": 4485,
|
|
"valid_targets_mean": 4626.2,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 6.95046439628483,
|
|
"grad_norm": 0.7851901560353346,
|
|
"learning_rate": 5.826130419697151e-09,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2303996980190277,
|
|
"step": 4490,
|
|
"valid_targets_mean": 4938.1,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 6.958204334365325,
|
|
"grad_norm": 0.8215220803030948,
|
|
"learning_rate": 4.194442941971577e-09,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24498389661312103,
|
|
"step": 4495,
|
|
"valid_targets_mean": 4161.5,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 6.965944272445821,
|
|
"grad_norm": 0.9564577194889915,
|
|
"learning_rate": 2.830211152300422e-09,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25270316004753113,
|
|
"step": 4500,
|
|
"valid_targets_mean": 4068.4,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 6.973684210526316,
|
|
"grad_norm": 0.8774719453894075,
|
|
"learning_rate": 1.7334532980828322e-09,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2449713945388794,
|
|
"step": 4505,
|
|
"valid_targets_mean": 4988.2,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 6.981424148606811,
|
|
"grad_norm": 0.9111507855531771,
|
|
"learning_rate": 9.041840491064157e-10,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22209803760051727,
|
|
"step": 4510,
|
|
"valid_targets_mean": 2636.5,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 6.989164086687307,
|
|
"grad_norm": 0.9332673204479095,
|
|
"learning_rate": 3.4241449733407593e-10,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24705694615840912,
|
|
"step": 4515,
|
|
"valid_targets_mean": 2712.8,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 6.996904024767802,
|
|
"grad_norm": 0.8470468387956538,
|
|
"learning_rate": 4.815215676634566e-11,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2318914830684662,
|
|
"step": 4520,
|
|
"valid_targets_mean": 3597.4,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"step": 4522,
|
|
"total_flos": 1.326951254196224e+16,
|
|
"train_loss": 0.0,
|
|
"train_runtime": 1.1333,
|
|
"train_samples_per_second": 510432.869,
|
|
"train_steps_per_second": 3990.218
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4522,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.326951254196224e+16,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|