9673 lines
258 KiB
JSON
9673 lines
258 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4375,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008,
|
|
"grad_norm": 30.676280988429856,
|
|
"learning_rate": 3.6529680365296803e-07,
|
|
"loss": 1.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.2006312608718872,
|
|
"step": 5,
|
|
"valid_targets_mean": 951.6,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 0.016,
|
|
"grad_norm": 28.668825375269876,
|
|
"learning_rate": 8.219178082191781e-07,
|
|
"loss": 1.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.1689717769622803,
|
|
"step": 10,
|
|
"valid_targets_mean": 1206.2,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 0.024,
|
|
"grad_norm": 27.32055513464657,
|
|
"learning_rate": 1.278538812785388e-06,
|
|
"loss": 1.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.1466727256774902,
|
|
"step": 15,
|
|
"valid_targets_mean": 1020.4,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 0.032,
|
|
"grad_norm": 18.542333827031467,
|
|
"learning_rate": 1.7351598173515982e-06,
|
|
"loss": 1.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.0319907665252686,
|
|
"step": 20,
|
|
"valid_targets_mean": 1490.5,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"grad_norm": 13.73025573384511,
|
|
"learning_rate": 2.191780821917808e-06,
|
|
"loss": 0.9848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9434970617294312,
|
|
"step": 25,
|
|
"valid_targets_mean": 1219.0,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 0.048,
|
|
"grad_norm": 8.749307176557625,
|
|
"learning_rate": 2.6484018264840183e-06,
|
|
"loss": 0.9195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8712356090545654,
|
|
"step": 30,
|
|
"valid_targets_mean": 1210.1,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 0.056,
|
|
"grad_norm": 5.332093995936805,
|
|
"learning_rate": 3.1050228310502285e-06,
|
|
"loss": 0.8342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8116576671600342,
|
|
"step": 35,
|
|
"valid_targets_mean": 1220.6,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 0.064,
|
|
"grad_norm": 3.9778437535139344,
|
|
"learning_rate": 3.5616438356164386e-06,
|
|
"loss": 0.804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7886539697647095,
|
|
"step": 40,
|
|
"valid_targets_mean": 1257.5,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 0.072,
|
|
"grad_norm": 2.986796861837671,
|
|
"learning_rate": 4.018264840182649e-06,
|
|
"loss": 0.7447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7322535514831543,
|
|
"step": 45,
|
|
"valid_targets_mean": 1358.4,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"grad_norm": 2.546722831935672,
|
|
"learning_rate": 4.4748858447488585e-06,
|
|
"loss": 0.7098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7030981183052063,
|
|
"step": 50,
|
|
"valid_targets_mean": 1263.6,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 0.088,
|
|
"grad_norm": 1.7508144838707125,
|
|
"learning_rate": 4.931506849315069e-06,
|
|
"loss": 0.6694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6731879711151123,
|
|
"step": 55,
|
|
"valid_targets_mean": 1503.4,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 0.096,
|
|
"grad_norm": 1.5242265574216027,
|
|
"learning_rate": 5.388127853881279e-06,
|
|
"loss": 0.6502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6159995794296265,
|
|
"step": 60,
|
|
"valid_targets_mean": 1466.4,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.104,
|
|
"grad_norm": 1.4447113510821288,
|
|
"learning_rate": 5.8447488584474885e-06,
|
|
"loss": 0.6233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6157872080802917,
|
|
"step": 65,
|
|
"valid_targets_mean": 1387.8,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 0.112,
|
|
"grad_norm": 1.426452768045559,
|
|
"learning_rate": 6.301369863013699e-06,
|
|
"loss": 0.5799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.548506498336792,
|
|
"step": 70,
|
|
"valid_targets_mean": 1143.8,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"grad_norm": 1.0954690115469343,
|
|
"learning_rate": 6.757990867579909e-06,
|
|
"loss": 0.5348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.537449061870575,
|
|
"step": 75,
|
|
"valid_targets_mean": 1934.2,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 0.128,
|
|
"grad_norm": 1.2186364833784389,
|
|
"learning_rate": 7.214611872146119e-06,
|
|
"loss": 0.5315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5077548027038574,
|
|
"step": 80,
|
|
"valid_targets_mean": 1318.4,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 0.136,
|
|
"grad_norm": 1.27110809798807,
|
|
"learning_rate": 7.671232876712329e-06,
|
|
"loss": 0.5072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4585062861442566,
|
|
"step": 85,
|
|
"valid_targets_mean": 1065.2,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 0.144,
|
|
"grad_norm": 1.6016902184695048,
|
|
"learning_rate": 8.127853881278539e-06,
|
|
"loss": 0.4862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5078126192092896,
|
|
"step": 90,
|
|
"valid_targets_mean": 1534.9,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 0.152,
|
|
"grad_norm": 1.126314469543395,
|
|
"learning_rate": 8.584474885844748e-06,
|
|
"loss": 0.4827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48283830285072327,
|
|
"step": 95,
|
|
"valid_targets_mean": 1328.8,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"grad_norm": 1.066076729253292,
|
|
"learning_rate": 9.04109589041096e-06,
|
|
"loss": 0.4715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49698948860168457,
|
|
"step": 100,
|
|
"valid_targets_mean": 1410.3,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.168,
|
|
"grad_norm": 1.0873690440463042,
|
|
"learning_rate": 9.49771689497717e-06,
|
|
"loss": 0.4692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4536080062389374,
|
|
"step": 105,
|
|
"valid_targets_mean": 1470.3,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.176,
|
|
"grad_norm": 1.190811289716915,
|
|
"learning_rate": 9.95433789954338e-06,
|
|
"loss": 0.4509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42390650510787964,
|
|
"step": 110,
|
|
"valid_targets_mean": 1168.8,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 0.184,
|
|
"grad_norm": 1.1027813156026953,
|
|
"learning_rate": 1.0410958904109589e-05,
|
|
"loss": 0.4325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41239020228385925,
|
|
"step": 115,
|
|
"valid_targets_mean": 1155.3,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 0.192,
|
|
"grad_norm": 1.2884372860034496,
|
|
"learning_rate": 1.08675799086758e-05,
|
|
"loss": 0.4353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4341617226600647,
|
|
"step": 120,
|
|
"valid_targets_mean": 1123.0,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"grad_norm": 1.1521542398828246,
|
|
"learning_rate": 1.132420091324201e-05,
|
|
"loss": 0.4403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4319363534450531,
|
|
"step": 125,
|
|
"valid_targets_mean": 1377.9,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 0.208,
|
|
"grad_norm": 1.1144075003169183,
|
|
"learning_rate": 1.178082191780822e-05,
|
|
"loss": 0.4268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4117821455001831,
|
|
"step": 130,
|
|
"valid_targets_mean": 1375.7,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 0.216,
|
|
"grad_norm": 1.1031916397714963,
|
|
"learning_rate": 1.223744292237443e-05,
|
|
"loss": 0.4143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40220510959625244,
|
|
"step": 135,
|
|
"valid_targets_mean": 1243.5,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 0.224,
|
|
"grad_norm": 1.0065622552904367,
|
|
"learning_rate": 1.2694063926940641e-05,
|
|
"loss": 0.4261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4083831310272217,
|
|
"step": 140,
|
|
"valid_targets_mean": 1500.2,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 0.232,
|
|
"grad_norm": 0.9986406195274659,
|
|
"learning_rate": 1.3150684931506849e-05,
|
|
"loss": 0.4216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42009252309799194,
|
|
"step": 145,
|
|
"valid_targets_mean": 1411.9,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"grad_norm": 1.2543356555280438,
|
|
"learning_rate": 1.360730593607306e-05,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37381511926651,
|
|
"step": 150,
|
|
"valid_targets_mean": 1028.4,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 0.248,
|
|
"grad_norm": 1.1653563844488948,
|
|
"learning_rate": 1.406392694063927e-05,
|
|
"loss": 0.3941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36115968227386475,
|
|
"step": 155,
|
|
"valid_targets_mean": 1156.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.256,
|
|
"grad_norm": 1.0427237157806477,
|
|
"learning_rate": 1.4520547945205482e-05,
|
|
"loss": 0.4029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4561612010002136,
|
|
"step": 160,
|
|
"valid_targets_mean": 1831.2,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.264,
|
|
"grad_norm": 1.1103428296200726,
|
|
"learning_rate": 1.497716894977169e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35467714071273804,
|
|
"step": 165,
|
|
"valid_targets_mean": 1220.5,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 0.272,
|
|
"grad_norm": 1.0134380798236724,
|
|
"learning_rate": 1.54337899543379e-05,
|
|
"loss": 0.3846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37798207998275757,
|
|
"step": 170,
|
|
"valid_targets_mean": 1357.8,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"grad_norm": 1.188194228153094,
|
|
"learning_rate": 1.589041095890411e-05,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3618233799934387,
|
|
"step": 175,
|
|
"valid_targets_mean": 1276.6,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 0.288,
|
|
"grad_norm": 1.1325964075232517,
|
|
"learning_rate": 1.634703196347032e-05,
|
|
"loss": 0.3707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3638894259929657,
|
|
"step": 180,
|
|
"valid_targets_mean": 1366.8,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 0.296,
|
|
"grad_norm": 1.1171545226371478,
|
|
"learning_rate": 1.680365296803653e-05,
|
|
"loss": 0.385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3856915831565857,
|
|
"step": 185,
|
|
"valid_targets_mean": 1364.6,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.304,
|
|
"grad_norm": 1.0749702594218136,
|
|
"learning_rate": 1.726027397260274e-05,
|
|
"loss": 0.3867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37205952405929565,
|
|
"step": 190,
|
|
"valid_targets_mean": 1324.5,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 0.312,
|
|
"grad_norm": 1.0662860161996275,
|
|
"learning_rate": 1.771689497716895e-05,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3517031669616699,
|
|
"step": 195,
|
|
"valid_targets_mean": 1234.9,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"grad_norm": 1.2085712282369836,
|
|
"learning_rate": 1.8173515981735163e-05,
|
|
"loss": 0.3767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4051426649093628,
|
|
"step": 200,
|
|
"valid_targets_mean": 1392.9,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 0.328,
|
|
"grad_norm": 1.2690306456225258,
|
|
"learning_rate": 1.863013698630137e-05,
|
|
"loss": 0.3548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34325483441352844,
|
|
"step": 205,
|
|
"valid_targets_mean": 1064.9,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 0.336,
|
|
"grad_norm": 1.2789180948236798,
|
|
"learning_rate": 1.9086757990867582e-05,
|
|
"loss": 0.3564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3269120752811432,
|
|
"step": 210,
|
|
"valid_targets_mean": 1157.7,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 0.344,
|
|
"grad_norm": 1.0830660085924877,
|
|
"learning_rate": 1.954337899543379e-05,
|
|
"loss": 0.37,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35428619384765625,
|
|
"step": 215,
|
|
"valid_targets_mean": 1344.7,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 0.352,
|
|
"grad_norm": 1.1297540434254691,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.3518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3363034725189209,
|
|
"step": 220,
|
|
"valid_targets_mean": 1197.5,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"grad_norm": 1.115096384358228,
|
|
"learning_rate": 2.045662100456621e-05,
|
|
"loss": 0.367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39095526933670044,
|
|
"step": 225,
|
|
"valid_targets_mean": 1276.4,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 0.368,
|
|
"grad_norm": 1.0291266181577463,
|
|
"learning_rate": 2.0913242009132424e-05,
|
|
"loss": 0.37,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.388106107711792,
|
|
"step": 230,
|
|
"valid_targets_mean": 1621.1,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 0.376,
|
|
"grad_norm": 1.131467992318446,
|
|
"learning_rate": 2.1369863013698632e-05,
|
|
"loss": 0.3486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3532012403011322,
|
|
"step": 235,
|
|
"valid_targets_mean": 1264.2,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 0.384,
|
|
"grad_norm": 1.211132776375921,
|
|
"learning_rate": 2.182648401826484e-05,
|
|
"loss": 0.3627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.342357873916626,
|
|
"step": 240,
|
|
"valid_targets_mean": 1127.9,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 0.392,
|
|
"grad_norm": 1.089779623297599,
|
|
"learning_rate": 2.2283105022831052e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33647504448890686,
|
|
"step": 245,
|
|
"valid_targets_mean": 1239.2,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"grad_norm": 1.2506285362410074,
|
|
"learning_rate": 2.2739726027397263e-05,
|
|
"loss": 0.3627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3412613868713379,
|
|
"step": 250,
|
|
"valid_targets_mean": 1024.0,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.408,
|
|
"grad_norm": 1.138753817013314,
|
|
"learning_rate": 2.3196347031963475e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3673658072948456,
|
|
"step": 255,
|
|
"valid_targets_mean": 1379.1,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 0.416,
|
|
"grad_norm": 1.119658092498723,
|
|
"learning_rate": 2.3652968036529683e-05,
|
|
"loss": 0.3549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3925490975379944,
|
|
"step": 260,
|
|
"valid_targets_mean": 1617.1,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 0.424,
|
|
"grad_norm": 1.1131783785545433,
|
|
"learning_rate": 2.410958904109589e-05,
|
|
"loss": 0.3537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3820924162864685,
|
|
"step": 265,
|
|
"valid_targets_mean": 1503.8,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 0.432,
|
|
"grad_norm": 1.276323415884899,
|
|
"learning_rate": 2.4566210045662106e-05,
|
|
"loss": 0.3363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3495078682899475,
|
|
"step": 270,
|
|
"valid_targets_mean": 1111.9,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"grad_norm": 1.253788936646263,
|
|
"learning_rate": 2.5022831050228314e-05,
|
|
"loss": 0.3424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32746508717536926,
|
|
"step": 275,
|
|
"valid_targets_mean": 1136.0,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 0.448,
|
|
"grad_norm": 1.0685455529089938,
|
|
"learning_rate": 2.547945205479452e-05,
|
|
"loss": 0.3516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3598126173019409,
|
|
"step": 280,
|
|
"valid_targets_mean": 1404.4,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 0.456,
|
|
"grad_norm": 1.1760395626637696,
|
|
"learning_rate": 2.593607305936073e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34221047163009644,
|
|
"step": 285,
|
|
"valid_targets_mean": 1207.6,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.464,
|
|
"grad_norm": 1.1557547084376616,
|
|
"learning_rate": 2.6392694063926944e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38470280170440674,
|
|
"step": 290,
|
|
"valid_targets_mean": 1442.3,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 0.472,
|
|
"grad_norm": 1.116099194214949,
|
|
"learning_rate": 2.6849315068493153e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3685324788093567,
|
|
"step": 295,
|
|
"valid_targets_mean": 1458.4,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"grad_norm": 1.0644461217348944,
|
|
"learning_rate": 2.7305936073059364e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3277217149734497,
|
|
"step": 300,
|
|
"valid_targets_mean": 1380.9,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 0.488,
|
|
"grad_norm": 1.1278228000427657,
|
|
"learning_rate": 2.7762557077625572e-05,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33293601870536804,
|
|
"step": 305,
|
|
"valid_targets_mean": 1165.2,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 0.496,
|
|
"grad_norm": 1.1879645693244656,
|
|
"learning_rate": 2.8219178082191783e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3275354504585266,
|
|
"step": 310,
|
|
"valid_targets_mean": 1084.9,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 0.504,
|
|
"grad_norm": 1.178677690659829,
|
|
"learning_rate": 2.8675799086757995e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36384332180023193,
|
|
"step": 315,
|
|
"valid_targets_mean": 1182.4,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 0.512,
|
|
"grad_norm": 1.1069804367470855,
|
|
"learning_rate": 2.9132420091324203e-05,
|
|
"loss": 0.3447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3884104788303375,
|
|
"step": 320,
|
|
"valid_targets_mean": 1247.0,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"grad_norm": 1.3884427429074675,
|
|
"learning_rate": 2.958904109589041e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33950918912887573,
|
|
"step": 325,
|
|
"valid_targets_mean": 1259.5,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 0.528,
|
|
"grad_norm": 1.0533306331581007,
|
|
"learning_rate": 3.0045662100456626e-05,
|
|
"loss": 0.3635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34938758611679077,
|
|
"step": 330,
|
|
"valid_targets_mean": 1470.4,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 0.536,
|
|
"grad_norm": 1.1001559398062153,
|
|
"learning_rate": 3.0502283105022834e-05,
|
|
"loss": 0.3435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3409516215324402,
|
|
"step": 335,
|
|
"valid_targets_mean": 1378.9,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 0.544,
|
|
"grad_norm": 1.1204070258784675,
|
|
"learning_rate": 3.0958904109589045e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32774031162261963,
|
|
"step": 340,
|
|
"valid_targets_mean": 1106.7,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.552,
|
|
"grad_norm": 1.079938472966612,
|
|
"learning_rate": 3.141552511415525e-05,
|
|
"loss": 0.3548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3414173722267151,
|
|
"step": 345,
|
|
"valid_targets_mean": 1214.8,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"grad_norm": 1.1104641644677964,
|
|
"learning_rate": 3.187214611872147e-05,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3549253046512604,
|
|
"step": 350,
|
|
"valid_targets_mean": 1330.7,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 0.568,
|
|
"grad_norm": 1.0396639559240595,
|
|
"learning_rate": 3.2328767123287676e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34784120321273804,
|
|
"step": 355,
|
|
"valid_targets_mean": 1442.7,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.9758896432889843,
|
|
"learning_rate": 3.2785388127853884e-05,
|
|
"loss": 0.3351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34021425247192383,
|
|
"step": 360,
|
|
"valid_targets_mean": 1532.7,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 0.584,
|
|
"grad_norm": 1.1046582286537374,
|
|
"learning_rate": 3.324200913242009e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3345939517021179,
|
|
"step": 365,
|
|
"valid_targets_mean": 1117.5,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 0.592,
|
|
"grad_norm": 1.2910518819027812,
|
|
"learning_rate": 3.369863013698631e-05,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3394467234611511,
|
|
"step": 370,
|
|
"valid_targets_mean": 1115.8,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"grad_norm": 1.0686053967629128,
|
|
"learning_rate": 3.4155251141552515e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29126405715942383,
|
|
"step": 375,
|
|
"valid_targets_mean": 1021.5,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.608,
|
|
"grad_norm": 1.0458942720797793,
|
|
"learning_rate": 3.461187214611872e-05,
|
|
"loss": 0.3264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34782874584198,
|
|
"step": 380,
|
|
"valid_targets_mean": 1366.5,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 0.616,
|
|
"grad_norm": 1.0250125433306818,
|
|
"learning_rate": 3.506849315068493e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3098771572113037,
|
|
"step": 385,
|
|
"valid_targets_mean": 1124.1,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.624,
|
|
"grad_norm": 1.0984535195837828,
|
|
"learning_rate": 3.5525114155251146e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33517172932624817,
|
|
"step": 390,
|
|
"valid_targets_mean": 1333.1,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 0.632,
|
|
"grad_norm": 1.0005578466871246,
|
|
"learning_rate": 3.5981735159817354e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33485180139541626,
|
|
"step": 395,
|
|
"valid_targets_mean": 1376.8,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"grad_norm": 1.0707061782584946,
|
|
"learning_rate": 3.643835616438356e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33706289529800415,
|
|
"step": 400,
|
|
"valid_targets_mean": 1375.6,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 0.648,
|
|
"grad_norm": 0.9762363220414574,
|
|
"learning_rate": 3.689497716894977e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.309683620929718,
|
|
"step": 405,
|
|
"valid_targets_mean": 1231.2,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.9717413529655256,
|
|
"learning_rate": 3.7351598173515985e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3161115348339081,
|
|
"step": 410,
|
|
"valid_targets_mean": 1381.8,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 0.664,
|
|
"grad_norm": 1.092051256210721,
|
|
"learning_rate": 3.780821917808219e-05,
|
|
"loss": 0.3204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969529926776886,
|
|
"step": 415,
|
|
"valid_targets_mean": 977.5,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.9181658398145421,
|
|
"learning_rate": 3.82648401826484e-05,
|
|
"loss": 0.3223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32871338725090027,
|
|
"step": 420,
|
|
"valid_targets_mean": 1396.4,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"grad_norm": 0.9233546926162756,
|
|
"learning_rate": 3.8721461187214615e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3489411473274231,
|
|
"step": 425,
|
|
"valid_targets_mean": 1612.8,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 0.688,
|
|
"grad_norm": 1.1078909777088182,
|
|
"learning_rate": 3.9178082191780823e-05,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32206326723098755,
|
|
"step": 430,
|
|
"valid_targets_mean": 1341.3,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 0.696,
|
|
"grad_norm": 1.099304328945864,
|
|
"learning_rate": 3.963470319634704e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3072618842124939,
|
|
"step": 435,
|
|
"valid_targets_mean": 1075.6,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 0.704,
|
|
"grad_norm": 1.062786414719623,
|
|
"learning_rate": 3.99999936325009e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31028956174850464,
|
|
"step": 440,
|
|
"valid_targets_mean": 1222.6,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 0.712,
|
|
"grad_norm": 1.088316343699649,
|
|
"learning_rate": 3.9999770770457856e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3111687898635864,
|
|
"step": 445,
|
|
"valid_targets_mean": 1200.5,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.9933986177091898,
|
|
"learning_rate": 3.9999229537513936e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2991103529930115,
|
|
"step": 450,
|
|
"valid_targets_mean": 1151.9,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 0.728,
|
|
"grad_norm": 0.9351884510394403,
|
|
"learning_rate": 3.999836994228487e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3496304452419281,
|
|
"step": 455,
|
|
"valid_targets_mean": 1435.6,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.964106683925264,
|
|
"learning_rate": 3.999719199845432e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34364449977874756,
|
|
"step": 460,
|
|
"valid_targets_mean": 1464.1,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 0.744,
|
|
"grad_norm": 1.031399213174084,
|
|
"learning_rate": 3.999569572477366e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31331491470336914,
|
|
"step": 465,
|
|
"valid_targets_mean": 1342.7,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.9762505778424657,
|
|
"learning_rate": 3.999388114506166e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30002090334892273,
|
|
"step": 470,
|
|
"valid_targets_mean": 1254.4,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"grad_norm": 0.9663768393821034,
|
|
"learning_rate": 3.999174828820413e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32793402671813965,
|
|
"step": 475,
|
|
"valid_targets_mean": 1172.2,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 0.768,
|
|
"grad_norm": 1.0589511922652461,
|
|
"learning_rate": 3.998929718815341e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34346258640289307,
|
|
"step": 480,
|
|
"valid_targets_mean": 1256.6,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 0.776,
|
|
"grad_norm": 0.9639901143221346,
|
|
"learning_rate": 3.998652788392792e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2934371829032898,
|
|
"step": 485,
|
|
"valid_targets_mean": 1240.6,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.861769599950944,
|
|
"learning_rate": 3.9983440419611445e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35812270641326904,
|
|
"step": 490,
|
|
"valid_targets_mean": 1735.1,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 0.792,
|
|
"grad_norm": 0.8871455336785263,
|
|
"learning_rate": 3.9980034844352494e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3529864251613617,
|
|
"step": 495,
|
|
"valid_targets_mean": 1523.9,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.8462569839095343,
|
|
"learning_rate": 3.9976311212363495e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3257628381252289,
|
|
"step": 500,
|
|
"valid_targets_mean": 1544.8,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 0.808,
|
|
"grad_norm": 0.918223745761247,
|
|
"learning_rate": 3.997226958291992e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32465624809265137,
|
|
"step": 505,
|
|
"valid_targets_mean": 1365.9,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.9250482266832194,
|
|
"learning_rate": 3.996791002035937e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33923643827438354,
|
|
"step": 510,
|
|
"valid_targets_mean": 1479.9,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 0.824,
|
|
"grad_norm": 0.8736221716116702,
|
|
"learning_rate": 3.996323259408055e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3201742172241211,
|
|
"step": 515,
|
|
"valid_targets_mean": 1319.2,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.9087112414925389,
|
|
"learning_rate": 3.995823737854211e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31116774678230286,
|
|
"step": 520,
|
|
"valid_targets_mean": 1253.6,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"grad_norm": 0.9403703226793279,
|
|
"learning_rate": 3.9952924453261534e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3131970763206482,
|
|
"step": 525,
|
|
"valid_targets_mean": 1391.1,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.979294325913878,
|
|
"learning_rate": 3.994729390281384e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33393606543540955,
|
|
"step": 530,
|
|
"valid_targets_mean": 1301.4,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 0.856,
|
|
"grad_norm": 0.821635154530391,
|
|
"learning_rate": 3.994134581683021e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30839449167251587,
|
|
"step": 535,
|
|
"valid_targets_mean": 1302.1,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 0.864,
|
|
"grad_norm": 1.1158741722798897,
|
|
"learning_rate": 3.9935080289996626e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3146037459373474,
|
|
"step": 540,
|
|
"valid_targets_mean": 1173.0,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 0.872,
|
|
"grad_norm": 0.8888303089988165,
|
|
"learning_rate": 3.992849742205228e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33424657583236694,
|
|
"step": 545,
|
|
"valid_targets_mean": 1298.0,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.8360103117229875,
|
|
"learning_rate": 3.9921597317788065e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30955833196640015,
|
|
"step": 550,
|
|
"valid_targets_mean": 1328.2,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 0.888,
|
|
"grad_norm": 0.8963947947895279,
|
|
"learning_rate": 3.991438008704486e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33186841011047363,
|
|
"step": 555,
|
|
"valid_targets_mean": 1348.9,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.8876565985251219,
|
|
"learning_rate": 3.990684584471179e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.313508540391922,
|
|
"step": 560,
|
|
"valid_targets_mean": 1162.4,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 0.904,
|
|
"grad_norm": 0.9185805336574387,
|
|
"learning_rate": 3.989899471072441e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3414420485496521,
|
|
"step": 565,
|
|
"valid_targets_mean": 1422.4,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.9120200113833303,
|
|
"learning_rate": 3.9890826810062784e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31678134202957153,
|
|
"step": 570,
|
|
"valid_targets_mean": 1182.6,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"grad_norm": 0.9035027395869127,
|
|
"learning_rate": 3.988234227274949e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3197858929634094,
|
|
"step": 575,
|
|
"valid_targets_mean": 1258.3,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.8889284552338094,
|
|
"learning_rate": 3.987354123384757e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33336976170539856,
|
|
"step": 580,
|
|
"valid_targets_mean": 1243.1,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.936,
|
|
"grad_norm": 0.8956576926336909,
|
|
"learning_rate": 3.9864423833458364e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34339746832847595,
|
|
"step": 585,
|
|
"valid_targets_mean": 1442.1,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.9228356287760607,
|
|
"learning_rate": 3.9854990216719285e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32474130392074585,
|
|
"step": 590,
|
|
"valid_targets_mean": 1467.6,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 0.952,
|
|
"grad_norm": 0.9294265625886918,
|
|
"learning_rate": 3.98452405338015e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32510697841644287,
|
|
"step": 595,
|
|
"valid_targets_mean": 1157.6,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.8362261962186146,
|
|
"learning_rate": 3.983517493990756e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3371143937110901,
|
|
"step": 600,
|
|
"valid_targets_mean": 1438.3,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 0.968,
|
|
"grad_norm": 0.906331819142472,
|
|
"learning_rate": 3.982479359526892e-05,
|
|
"loss": 0.3266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34150224924087524,
|
|
"step": 605,
|
|
"valid_targets_mean": 1385.9,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.857467326122182,
|
|
"learning_rate": 3.981409666514336e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32590699195861816,
|
|
"step": 610,
|
|
"valid_targets_mean": 1353.9,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 0.984,
|
|
"grad_norm": 0.843889457852941,
|
|
"learning_rate": 3.98030843198124e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31436774134635925,
|
|
"step": 615,
|
|
"valid_targets_mean": 1365.0,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.8703166417259782,
|
|
"learning_rate": 3.979175673457858e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.305080771446228,
|
|
"step": 620,
|
|
"valid_targets_mean": 1303.7,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.8748106044800781,
|
|
"learning_rate": 3.9780114089762616e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3189717233181,
|
|
"step": 625,
|
|
"valid_targets_mean": 1533.6,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.008,
|
|
"grad_norm": 0.8686199640913247,
|
|
"learning_rate": 3.976815657070062e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2791053056716919,
|
|
"step": 630,
|
|
"valid_targets_mean": 1215.8,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 1.016,
|
|
"grad_norm": 0.87220477003548,
|
|
"learning_rate": 3.975588436774107e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30328992009162903,
|
|
"step": 635,
|
|
"valid_targets_mean": 1342.2,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 1.024,
|
|
"grad_norm": 0.7886097399935514,
|
|
"learning_rate": 3.9743297676241826e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978276014328003,
|
|
"step": 640,
|
|
"valid_targets_mean": 1223.8,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 1.032,
|
|
"grad_norm": 0.9475732751586867,
|
|
"learning_rate": 3.9730396696566994e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3531752824783325,
|
|
"step": 645,
|
|
"valid_targets_mean": 1183.5,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"grad_norm": 0.8125467690582494,
|
|
"learning_rate": 3.971718163408375e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3053149878978729,
|
|
"step": 650,
|
|
"valid_targets_mean": 1391.4,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 1.048,
|
|
"grad_norm": 0.9165420855166346,
|
|
"learning_rate": 3.9703652699159093e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3108670115470886,
|
|
"step": 655,
|
|
"valid_targets_mean": 1159.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 1.056,
|
|
"grad_norm": 0.9188514738020321,
|
|
"learning_rate": 3.9689810107156425e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2885199189186096,
|
|
"step": 660,
|
|
"valid_targets_mean": 1139.2,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 1.064,
|
|
"grad_norm": 0.931571164199066,
|
|
"learning_rate": 3.967565407843222e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3238086700439453,
|
|
"step": 665,
|
|
"valid_targets_mean": 1203.3,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 1.072,
|
|
"grad_norm": 0.7919022710545551,
|
|
"learning_rate": 3.966118483833242e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869601845741272,
|
|
"step": 670,
|
|
"valid_targets_mean": 1179.0,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"grad_norm": 0.8408998183277714,
|
|
"learning_rate": 3.964640261718893e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114621341228485,
|
|
"step": 675,
|
|
"valid_targets_mean": 1206.4,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 1.088,
|
|
"grad_norm": 0.7769883940888999,
|
|
"learning_rate": 3.963130765031589e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27698951959609985,
|
|
"step": 680,
|
|
"valid_targets_mean": 1176.9,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 1.096,
|
|
"grad_norm": 0.9299925549945266,
|
|
"learning_rate": 3.961590017800598e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29162150621414185,
|
|
"step": 685,
|
|
"valid_targets_mean": 1153.6,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 1.104,
|
|
"grad_norm": 0.8372397681564182,
|
|
"learning_rate": 3.960018044552653e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34847480058670044,
|
|
"step": 690,
|
|
"valid_targets_mean": 1432.9,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 1.112,
|
|
"grad_norm": 0.8502517217252336,
|
|
"learning_rate": 3.9584148703115704e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3113204836845398,
|
|
"step": 695,
|
|
"valid_targets_mean": 1313.7,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"grad_norm": 0.841247177937042,
|
|
"learning_rate": 3.956780520597842e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986765205860138,
|
|
"step": 700,
|
|
"valid_targets_mean": 1288.4,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 1.1280000000000001,
|
|
"grad_norm": 0.7881816585443338,
|
|
"learning_rate": 3.955115021428236e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29124367237091064,
|
|
"step": 705,
|
|
"valid_targets_mean": 1491.0,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 1.1360000000000001,
|
|
"grad_norm": 0.7197102750722484,
|
|
"learning_rate": 3.95341839931538e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3113114833831787,
|
|
"step": 710,
|
|
"valid_targets_mean": 1674.3,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 1.144,
|
|
"grad_norm": 0.8546495981513146,
|
|
"learning_rate": 3.95169068126734e-05,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30420535802841187,
|
|
"step": 715,
|
|
"valid_targets_mean": 1138.0,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 1.152,
|
|
"grad_norm": 0.7550371239366739,
|
|
"learning_rate": 3.949931894787187e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30246156454086304,
|
|
"step": 720,
|
|
"valid_targets_mean": 1333.4,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"grad_norm": 0.8647094289744588,
|
|
"learning_rate": 3.948142067872565e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2825793921947479,
|
|
"step": 725,
|
|
"valid_targets_mean": 1064.0,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 1.168,
|
|
"grad_norm": 0.8178268203469544,
|
|
"learning_rate": 3.946321229015241e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27427273988723755,
|
|
"step": 730,
|
|
"valid_targets_mean": 1159.8,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 1.176,
|
|
"grad_norm": 0.8933549978067479,
|
|
"learning_rate": 3.944469407200652e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28902918100357056,
|
|
"step": 735,
|
|
"valid_targets_mean": 1059.1,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 1.184,
|
|
"grad_norm": 0.8259321366550869,
|
|
"learning_rate": 3.942586631907444e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757384777069092,
|
|
"step": 740,
|
|
"valid_targets_mean": 1192.9,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.192,
|
|
"grad_norm": 0.73881015487271,
|
|
"learning_rate": 3.9406729331070054e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3119145929813385,
|
|
"step": 745,
|
|
"valid_targets_mean": 1419.1,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"grad_norm": 0.7070178515879408,
|
|
"learning_rate": 3.938728341262985e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26624324917793274,
|
|
"step": 750,
|
|
"valid_targets_mean": 1331.1,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 1.208,
|
|
"grad_norm": 0.8070451737059322,
|
|
"learning_rate": 3.936752887330812e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35609763860702515,
|
|
"step": 755,
|
|
"valid_targets_mean": 1303.8,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 1.216,
|
|
"grad_norm": 0.8889406308506986,
|
|
"learning_rate": 3.9347466027571975e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2960336208343506,
|
|
"step": 760,
|
|
"valid_targets_mean": 1031.7,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 1.224,
|
|
"grad_norm": 0.8144153123433202,
|
|
"learning_rate": 3.932709519479639e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3228992521762848,
|
|
"step": 765,
|
|
"valid_targets_mean": 1530.6,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 1.232,
|
|
"grad_norm": 0.8571750938132287,
|
|
"learning_rate": 3.930641669925911e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.299662709236145,
|
|
"step": 770,
|
|
"valid_targets_mean": 1156.6,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"grad_norm": 0.8436215547235222,
|
|
"learning_rate": 3.928543087013546e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29546940326690674,
|
|
"step": 775,
|
|
"valid_targets_mean": 1226.7,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 1.248,
|
|
"grad_norm": 0.6961314824916215,
|
|
"learning_rate": 3.926413804149315e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3882099986076355,
|
|
"step": 780,
|
|
"valid_targets_mean": 2216.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 1.256,
|
|
"grad_norm": 0.7357698835438257,
|
|
"learning_rate": 3.9242538552286894e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2973392605781555,
|
|
"step": 785,
|
|
"valid_targets_mean": 1348.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.264,
|
|
"grad_norm": 0.8233144555871087,
|
|
"learning_rate": 3.9220632746353096e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2836835980415344,
|
|
"step": 790,
|
|
"valid_targets_mean": 1238.9,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 1.272,
|
|
"grad_norm": 0.7236518989379042,
|
|
"learning_rate": 3.91984209724043e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2910689115524292,
|
|
"step": 795,
|
|
"valid_targets_mean": 1360.2,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"grad_norm": 0.8937376369327467,
|
|
"learning_rate": 3.917590358402369e-05,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30869483947753906,
|
|
"step": 800,
|
|
"valid_targets_mean": 1176.1,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 1.288,
|
|
"grad_norm": 0.7316920210336465,
|
|
"learning_rate": 3.915308093965943e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2911234200000763,
|
|
"step": 805,
|
|
"valid_targets_mean": 1407.2,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 1.296,
|
|
"grad_norm": 0.7566986564663638,
|
|
"learning_rate": 3.9129953402618976e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29875844717025757,
|
|
"step": 810,
|
|
"valid_targets_mean": 1385.1,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 1.304,
|
|
"grad_norm": 0.7795431793780845,
|
|
"learning_rate": 3.91065213410633e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908490002155304,
|
|
"step": 815,
|
|
"valid_targets_mean": 1256.9,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 1.312,
|
|
"grad_norm": 0.752672578188729,
|
|
"learning_rate": 3.908278512800098e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285500705242157,
|
|
"step": 820,
|
|
"valid_targets_mean": 1246.1,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"grad_norm": 0.8106445949661707,
|
|
"learning_rate": 3.905874514128235e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28430575132369995,
|
|
"step": 825,
|
|
"valid_targets_mean": 1200.1,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 1.328,
|
|
"grad_norm": 0.746666311508585,
|
|
"learning_rate": 3.903440176359338e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25772130489349365,
|
|
"step": 830,
|
|
"valid_targets_mean": 1143.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 1.336,
|
|
"grad_norm": 0.8235257180383451,
|
|
"learning_rate": 3.90097553824497e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30878543853759766,
|
|
"step": 835,
|
|
"valid_targets_mean": 1257.2,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 1.3439999999999999,
|
|
"grad_norm": 0.7616453145657558,
|
|
"learning_rate": 3.8984806390190304e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30609777569770813,
|
|
"step": 840,
|
|
"valid_targets_mean": 1321.9,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 1.3519999999999999,
|
|
"grad_norm": 0.7198851154803535,
|
|
"learning_rate": 3.895955518397141e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2945796847343445,
|
|
"step": 845,
|
|
"valid_targets_mean": 1306.9,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.3599999999999999,
|
|
"grad_norm": 0.7683521421273168,
|
|
"learning_rate": 3.893400216576011e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899963855743408,
|
|
"step": 850,
|
|
"valid_targets_mean": 1374.3,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 1.3679999999999999,
|
|
"grad_norm": 0.7710425191530541,
|
|
"learning_rate": 3.89081477423279e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28380054235458374,
|
|
"step": 855,
|
|
"valid_targets_mean": 1074.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 1.376,
|
|
"grad_norm": 0.7368913339430357,
|
|
"learning_rate": 3.888199232524434e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30734187364578247,
|
|
"step": 860,
|
|
"valid_targets_mean": 1381.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.384,
|
|
"grad_norm": 0.804010595195896,
|
|
"learning_rate": 3.8855536330870354e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29496869444847107,
|
|
"step": 865,
|
|
"valid_targets_mean": 1208.3,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 1.392,
|
|
"grad_norm": 0.762373396835519,
|
|
"learning_rate": 3.882878018035173e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2857309579849243,
|
|
"step": 870,
|
|
"valid_targets_mean": 1151.9,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"grad_norm": 0.7651274271107054,
|
|
"learning_rate": 3.880172429961232e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29999279975891113,
|
|
"step": 875,
|
|
"valid_targets_mean": 1285.8,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.408,
|
|
"grad_norm": 0.6897364740751468,
|
|
"learning_rate": 3.877436911934733e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32874733209609985,
|
|
"step": 880,
|
|
"valid_targets_mean": 1723.9,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 1.416,
|
|
"grad_norm": 0.7795755978500145,
|
|
"learning_rate": 3.874671507501641e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28680315613746643,
|
|
"step": 885,
|
|
"valid_targets_mean": 1166.5,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 1.424,
|
|
"grad_norm": 0.6864374428935822,
|
|
"learning_rate": 3.871876260683677e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3177869915962219,
|
|
"step": 890,
|
|
"valid_targets_mean": 1774.6,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 1.432,
|
|
"grad_norm": 0.7431217527814288,
|
|
"learning_rate": 3.869051215977612e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3025135397911072,
|
|
"step": 895,
|
|
"valid_targets_mean": 1240.1,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"grad_norm": 0.7910849835735785,
|
|
"learning_rate": 3.8661964183545634e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2877320647239685,
|
|
"step": 900,
|
|
"valid_targets_mean": 1096.7,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 1.448,
|
|
"grad_norm": 0.7841688616892095,
|
|
"learning_rate": 3.863311913259276e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953312397003174,
|
|
"step": 905,
|
|
"valid_targets_mean": 1108.2,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 1.456,
|
|
"grad_norm": 0.7599579297412898,
|
|
"learning_rate": 3.860397746609402e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751244306564331,
|
|
"step": 910,
|
|
"valid_targets_mean": 1068.0,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.464,
|
|
"grad_norm": 0.7154123370506722,
|
|
"learning_rate": 3.857453964794764e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2932862341403961,
|
|
"step": 915,
|
|
"valid_targets_mean": 1356.9,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.472,
|
|
"grad_norm": 0.7277914468632183,
|
|
"learning_rate": 3.854480614676624e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3092343807220459,
|
|
"step": 920,
|
|
"valid_targets_mean": 1339.7,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"grad_norm": 0.7540460602215802,
|
|
"learning_rate": 3.851477743586932e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741050720214844,
|
|
"step": 925,
|
|
"valid_targets_mean": 1119.2,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 1.488,
|
|
"grad_norm": 0.7595359090443424,
|
|
"learning_rate": 3.8484453993275746e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899656295776367,
|
|
"step": 930,
|
|
"valid_targets_mean": 1257.2,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.496,
|
|
"grad_norm": 0.7590745473892713,
|
|
"learning_rate": 3.8453836301696134e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897772192955017,
|
|
"step": 935,
|
|
"valid_targets_mean": 1229.2,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 1.504,
|
|
"grad_norm": 0.710069952174475,
|
|
"learning_rate": 3.842292484852518e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2932877242565155,
|
|
"step": 940,
|
|
"valid_targets_mean": 1435.0,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.512,
|
|
"grad_norm": 0.7009972321756676,
|
|
"learning_rate": 3.8391720125833875e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30670666694641113,
|
|
"step": 945,
|
|
"valid_targets_mean": 1417.7,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"grad_norm": 0.6789807306525544,
|
|
"learning_rate": 3.83602226303617e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2999956011772156,
|
|
"step": 950,
|
|
"valid_targets_mean": 1421.0,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 1.528,
|
|
"grad_norm": 0.7020069132425687,
|
|
"learning_rate": 3.83284328635087e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30725589394569397,
|
|
"step": 955,
|
|
"valid_targets_mean": 1395.6,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 1.536,
|
|
"grad_norm": 0.7366351750334832,
|
|
"learning_rate": 3.829635133132751e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3072003126144409,
|
|
"step": 960,
|
|
"valid_targets_mean": 1372.3,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.544,
|
|
"grad_norm": 0.7704937657240408,
|
|
"learning_rate": 3.8263978544515304e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861347198486328,
|
|
"step": 965,
|
|
"valid_targets_mean": 1105.9,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 1.552,
|
|
"grad_norm": 0.6876978894230894,
|
|
"learning_rate": 3.823131501840565e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2916574776172638,
|
|
"step": 970,
|
|
"valid_targets_mean": 1315.4,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"grad_norm": 0.7553575513575888,
|
|
"learning_rate": 3.819836127296032e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2585899829864502,
|
|
"step": 975,
|
|
"valid_targets_mean": 1035.9,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 1.568,
|
|
"grad_norm": 0.8113490752447253,
|
|
"learning_rate": 3.8165117832761016e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31396710872650146,
|
|
"step": 980,
|
|
"valid_targets_mean": 1153.5,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 1.576,
|
|
"grad_norm": 0.7429188094560545,
|
|
"learning_rate": 3.813158522700098e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726953625679016,
|
|
"step": 985,
|
|
"valid_targets_mean": 1244.2,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 1.584,
|
|
"grad_norm": 0.7289581631194304,
|
|
"learning_rate": 3.809776398947665e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31393730640411377,
|
|
"step": 990,
|
|
"valid_targets_mean": 1264.6,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 1.592,
|
|
"grad_norm": 0.7854134451506058,
|
|
"learning_rate": 3.806365465857908e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28533270955085754,
|
|
"step": 995,
|
|
"valid_targets_mean": 1199.4,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"grad_norm": 0.6936454805043971,
|
|
"learning_rate": 3.802925777728541e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32617443799972534,
|
|
"step": 1000,
|
|
"valid_targets_mean": 1473.9,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 1.608,
|
|
"grad_norm": 0.7435484022878768,
|
|
"learning_rate": 3.799457389315023e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3141123652458191,
|
|
"step": 1005,
|
|
"valid_targets_mean": 1251.2,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 1.616,
|
|
"grad_norm": 0.8305200673965024,
|
|
"learning_rate": 3.795960355829683e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33428719639778137,
|
|
"step": 1010,
|
|
"valid_targets_mean": 1370.6,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 1.624,
|
|
"grad_norm": 0.7471744233230212,
|
|
"learning_rate": 3.7924347329408444e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27914416790008545,
|
|
"step": 1015,
|
|
"valid_targets_mean": 1043.6,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 1.6320000000000001,
|
|
"grad_norm": 0.7580690835724073,
|
|
"learning_rate": 3.788880576771937e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27981412410736084,
|
|
"step": 1020,
|
|
"valid_targets_mean": 1073.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.6400000000000001,
|
|
"grad_norm": 0.7284957348491666,
|
|
"learning_rate": 3.785297943900605e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908656597137451,
|
|
"step": 1025,
|
|
"valid_targets_mean": 1295.8,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 1.6480000000000001,
|
|
"grad_norm": 0.7136260670544277,
|
|
"learning_rate": 3.7816868913578044e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2881999611854553,
|
|
"step": 1030,
|
|
"valid_targets_mean": 1141.8,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 1.6560000000000001,
|
|
"grad_norm": 0.73310608920792,
|
|
"learning_rate": 3.778047476626897e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849404513835907,
|
|
"step": 1035,
|
|
"valid_targets_mean": 1174.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.6640000000000001,
|
|
"grad_norm": 0.6888340857303676,
|
|
"learning_rate": 3.7743797576427335e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3199218511581421,
|
|
"step": 1040,
|
|
"valid_targets_mean": 1471.3,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 1.6720000000000002,
|
|
"grad_norm": 0.6862789571807757,
|
|
"learning_rate": 3.770683792790733e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3052081763744354,
|
|
"step": 1045,
|
|
"valid_targets_mean": 1445.1,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.6800000000000002,
|
|
"grad_norm": 0.7199939514453941,
|
|
"learning_rate": 3.766959640905954e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30268457531929016,
|
|
"step": 1050,
|
|
"valid_targets_mean": 1268.7,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 1.688,
|
|
"grad_norm": 0.7329339336593718,
|
|
"learning_rate": 3.763207361272153e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28731805086135864,
|
|
"step": 1055,
|
|
"valid_targets_mean": 1210.4,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 1.696,
|
|
"grad_norm": 0.6637047907799619,
|
|
"learning_rate": 3.759427013620849e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30762284994125366,
|
|
"step": 1060,
|
|
"valid_targets_mean": 1642.5,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 1.704,
|
|
"grad_norm": 0.7417630099561096,
|
|
"learning_rate": 3.755618658130366e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31815579533576965,
|
|
"step": 1065,
|
|
"valid_targets_mean": 1622.2,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 1.712,
|
|
"grad_norm": 0.6965362227437693,
|
|
"learning_rate": 3.751782355424877e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128035068511963,
|
|
"step": 1070,
|
|
"valid_targets_mean": 1542.0,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"grad_norm": 0.6923502593943535,
|
|
"learning_rate": 3.7479181665734395e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.299746572971344,
|
|
"step": 1075,
|
|
"valid_targets_mean": 1333.4,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 1.728,
|
|
"grad_norm": 0.7222683463238148,
|
|
"learning_rate": 3.7440261530890213e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33654940128326416,
|
|
"step": 1080,
|
|
"valid_targets_mean": 1401.9,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 1.736,
|
|
"grad_norm": 0.7289839437870484,
|
|
"learning_rate": 3.740106376927527e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3044643998146057,
|
|
"step": 1085,
|
|
"valid_targets_mean": 1263.6,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 1.744,
|
|
"grad_norm": 0.7237455815264212,
|
|
"learning_rate": 3.7361589004868035e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.279565691947937,
|
|
"step": 1090,
|
|
"valid_targets_mean": 1167.2,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 1.752,
|
|
"grad_norm": 0.7408302371848148,
|
|
"learning_rate": 3.7321837866056535e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30340689420700073,
|
|
"step": 1095,
|
|
"valid_targets_mean": 1328.9,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"grad_norm": 0.6934904963953936,
|
|
"learning_rate": 3.728181098562831e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3032642900943756,
|
|
"step": 1100,
|
|
"valid_targets_mean": 1367.6,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 1.768,
|
|
"grad_norm": 0.7642787998076341,
|
|
"learning_rate": 3.7241509000760355e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29110094904899597,
|
|
"step": 1105,
|
|
"valid_targets_mean": 1158.8,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.776,
|
|
"grad_norm": 0.7169191571702284,
|
|
"learning_rate": 3.720093255300899e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3033268451690674,
|
|
"step": 1110,
|
|
"valid_targets_mean": 1237.6,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 1.784,
|
|
"grad_norm": 0.6977557375681197,
|
|
"learning_rate": 3.7160082288299645e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30440837144851685,
|
|
"step": 1115,
|
|
"valid_targets_mean": 1385.8,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 1.792,
|
|
"grad_norm": 0.7508030358080197,
|
|
"learning_rate": 3.7118958856916534e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950218617916107,
|
|
"step": 1120,
|
|
"valid_targets_mean": 1169.1,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"grad_norm": 0.7128171035336155,
|
|
"learning_rate": 3.707756291349237e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3089803159236908,
|
|
"step": 1125,
|
|
"valid_targets_mean": 1405.9,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.808,
|
|
"grad_norm": 0.6642775746517514,
|
|
"learning_rate": 3.703589511699787e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27773725986480713,
|
|
"step": 1130,
|
|
"valid_targets_mean": 1166.2,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 1.8159999999999998,
|
|
"grad_norm": 0.7021082167955099,
|
|
"learning_rate": 3.6993956130731355e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29418376088142395,
|
|
"step": 1135,
|
|
"valid_targets_mean": 1374.6,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 1.8239999999999998,
|
|
"grad_norm": 0.7280588850000739,
|
|
"learning_rate": 3.6951746622308106e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2929180860519409,
|
|
"step": 1140,
|
|
"valid_targets_mean": 1123.7,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 1.8319999999999999,
|
|
"grad_norm": 0.6655113502610764,
|
|
"learning_rate": 3.69092672636498e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28471970558166504,
|
|
"step": 1145,
|
|
"valid_targets_mean": 1335.5,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 1.8399999999999999,
|
|
"grad_norm": 0.7194648413956043,
|
|
"learning_rate": 3.686651873097375e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005855083465576,
|
|
"step": 1150,
|
|
"valid_targets_mean": 1320.1,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.8479999999999999,
|
|
"grad_norm": 0.6515294832198358,
|
|
"learning_rate": 3.682350170478223e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29663175344467163,
|
|
"step": 1155,
|
|
"valid_targets_mean": 1309.3,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 1.8559999999999999,
|
|
"grad_norm": 0.7458254019885102,
|
|
"learning_rate": 3.678021686985153e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26135438680648804,
|
|
"step": 1160,
|
|
"valid_targets_mean": 1049.6,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.8639999999999999,
|
|
"grad_norm": 0.7729043298762871,
|
|
"learning_rate": 3.6736664915221144e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3061363697052002,
|
|
"step": 1165,
|
|
"valid_targets_mean": 1242.2,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 1.8719999999999999,
|
|
"grad_norm": 0.6511360638897764,
|
|
"learning_rate": 3.669284653418278e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29013341665267944,
|
|
"step": 1170,
|
|
"valid_targets_mean": 1520.2,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"grad_norm": 0.7455951070931932,
|
|
"learning_rate": 3.6648762424269306e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28163498640060425,
|
|
"step": 1175,
|
|
"valid_targets_mean": 1072.2,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 1.888,
|
|
"grad_norm": 0.6633680755133979,
|
|
"learning_rate": 3.660441328724365e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.287434458732605,
|
|
"step": 1180,
|
|
"valid_targets_mean": 1369.0,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 1.896,
|
|
"grad_norm": 0.7379333087440816,
|
|
"learning_rate": 3.655979982908764e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908487915992737,
|
|
"step": 1185,
|
|
"valid_targets_mean": 1313.7,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 1.904,
|
|
"grad_norm": 0.726006883052574,
|
|
"learning_rate": 3.6514922759990756e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002976179122925,
|
|
"step": 1190,
|
|
"valid_targets_mean": 1185.8,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.912,
|
|
"grad_norm": 0.6832975779906103,
|
|
"learning_rate": 3.646978279433883e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3047093152999878,
|
|
"step": 1195,
|
|
"valid_targets_mean": 1359.9,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"grad_norm": 0.7395903246622791,
|
|
"learning_rate": 3.6424380650702685e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2698495388031006,
|
|
"step": 1200,
|
|
"valid_targets_mean": 1180.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 1.928,
|
|
"grad_norm": 0.7593360203543278,
|
|
"learning_rate": 3.637871705182667e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953850030899048,
|
|
"step": 1205,
|
|
"valid_targets_mean": 1233.9,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 1.936,
|
|
"grad_norm": 0.7255522515618682,
|
|
"learning_rate": 3.633279272461717e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29483166337013245,
|
|
"step": 1210,
|
|
"valid_targets_mean": 1323.7,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 1.944,
|
|
"grad_norm": 0.714636546127886,
|
|
"learning_rate": 3.628660840013102e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2759806513786316,
|
|
"step": 1215,
|
|
"valid_targets_mean": 1213.4,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 1.952,
|
|
"grad_norm": 0.6795729910423134,
|
|
"learning_rate": 3.624016481356392e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986353039741516,
|
|
"step": 1220,
|
|
"valid_targets_mean": 1346.9,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"grad_norm": 0.7017966196444858,
|
|
"learning_rate": 3.619346270423866e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3066234588623047,
|
|
"step": 1225,
|
|
"valid_targets_mean": 1181.2,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 1.968,
|
|
"grad_norm": 0.6692708150516412,
|
|
"learning_rate": 3.6146502815593384e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31543323397636414,
|
|
"step": 1230,
|
|
"valid_targets_mean": 1399.3,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 1.976,
|
|
"grad_norm": 0.6607542113963208,
|
|
"learning_rate": 3.609928589516977e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3146621584892273,
|
|
"step": 1235,
|
|
"valid_targets_mean": 1441.6,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.984,
|
|
"grad_norm": 0.7377505823319686,
|
|
"learning_rate": 3.6051812694601114e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721008062362671,
|
|
"step": 1240,
|
|
"valid_targets_mean": 953.9,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 1.992,
|
|
"grad_norm": 0.6900560685652031,
|
|
"learning_rate": 3.6004083969600346e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.294763445854187,
|
|
"step": 1245,
|
|
"valid_targets_mean": 1420.9,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.7237844280261507,
|
|
"learning_rate": 3.595610047994804e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25826698541641235,
|
|
"step": 1250,
|
|
"valid_targets_mean": 1096.3,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 2.008,
|
|
"grad_norm": 0.7225144968296001,
|
|
"learning_rate": 3.5907862989480285e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2946006953716278,
|
|
"step": 1255,
|
|
"valid_targets_mean": 1221.0,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 2.016,
|
|
"grad_norm": 0.7609268219577915,
|
|
"learning_rate": 3.585937226607656e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747316360473633,
|
|
"step": 1260,
|
|
"valid_targets_mean": 1179.3,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 2.024,
|
|
"grad_norm": 0.7006324809775086,
|
|
"learning_rate": 3.5810629081647476e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.290524959564209,
|
|
"step": 1265,
|
|
"valid_targets_mean": 1562.4,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 2.032,
|
|
"grad_norm": 0.7250265995978956,
|
|
"learning_rate": 3.576163421212249e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26366108655929565,
|
|
"step": 1270,
|
|
"valid_targets_mean": 1265.8,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"grad_norm": 0.8139371315853822,
|
|
"learning_rate": 3.5712388437437576e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26561683416366577,
|
|
"step": 1275,
|
|
"valid_targets_mean": 1069.7,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 2.048,
|
|
"grad_norm": 0.752173363235469,
|
|
"learning_rate": 3.566289254152283e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002524971961975,
|
|
"step": 1280,
|
|
"valid_targets_mean": 1271.1,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 2.056,
|
|
"grad_norm": 0.6992503235151014,
|
|
"learning_rate": 3.56131473122899e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.271355003118515,
|
|
"step": 1285,
|
|
"valid_targets_mean": 1267.6,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 2.064,
|
|
"grad_norm": 0.7257496576640303,
|
|
"learning_rate": 3.556315354161955e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26641058921813965,
|
|
"step": 1290,
|
|
"valid_targets_mean": 1088.1,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 2.072,
|
|
"grad_norm": 0.7197406276703954,
|
|
"learning_rate": 3.551291202534899e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657165825366974,
|
|
"step": 1295,
|
|
"valid_targets_mean": 1077.0,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"grad_norm": 0.704209132513246,
|
|
"learning_rate": 3.546242356325922e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2918708324432373,
|
|
"step": 1300,
|
|
"valid_targets_mean": 1468.3,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.088,
|
|
"grad_norm": 0.6992019999809073,
|
|
"learning_rate": 3.5411688959062323e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2602587938308716,
|
|
"step": 1305,
|
|
"valid_targets_mean": 1111.9,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 2.096,
|
|
"grad_norm": 0.693762635914624,
|
|
"learning_rate": 3.5360709020388625e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289661705493927,
|
|
"step": 1310,
|
|
"valid_targets_mean": 1530.2,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 2.104,
|
|
"grad_norm": 0.6693253093979602,
|
|
"learning_rate": 3.530948455877388e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899070680141449,
|
|
"step": 1315,
|
|
"valid_targets_mean": 1423.2,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 2.112,
|
|
"grad_norm": 0.7192853920945097,
|
|
"learning_rate": 3.525801638964634e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624514400959015,
|
|
"step": 1320,
|
|
"valid_targets_mean": 1191.1,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"grad_norm": 0.6575821173527254,
|
|
"learning_rate": 3.520630533231376e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31122565269470215,
|
|
"step": 1325,
|
|
"valid_targets_mean": 1598.9,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 2.128,
|
|
"grad_norm": 0.6711022677702406,
|
|
"learning_rate": 3.5154352209950376e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285232275724411,
|
|
"step": 1330,
|
|
"valid_targets_mean": 1547.2,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 2.136,
|
|
"grad_norm": 0.6896250773217073,
|
|
"learning_rate": 3.510215784958376e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29398879408836365,
|
|
"step": 1335,
|
|
"valid_targets_mean": 1271.2,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.144,
|
|
"grad_norm": 0.6834518236829222,
|
|
"learning_rate": 3.5049723082081755e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3116733133792877,
|
|
"step": 1340,
|
|
"valid_targets_mean": 1414.6,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 2.152,
|
|
"grad_norm": 0.6810510101707653,
|
|
"learning_rate": 3.49970487421391e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28676432371139526,
|
|
"step": 1345,
|
|
"valid_targets_mean": 1364.6,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"grad_norm": 0.7499164603241426,
|
|
"learning_rate": 3.494413566826427e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2703482508659363,
|
|
"step": 1350,
|
|
"valid_targets_mean": 1134.5,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 2.168,
|
|
"grad_norm": 0.7433088270079251,
|
|
"learning_rate": 3.489098470276608e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24965405464172363,
|
|
"step": 1355,
|
|
"valid_targets_mean": 1064.0,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 2.176,
|
|
"grad_norm": 0.6901334093689532,
|
|
"learning_rate": 3.483759669174024e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27521902322769165,
|
|
"step": 1360,
|
|
"valid_targets_mean": 1208.4,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 2.184,
|
|
"grad_norm": 0.7187690004692653,
|
|
"learning_rate": 3.478397248505598e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603776156902313,
|
|
"step": 1365,
|
|
"valid_targets_mean": 1319.8,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 2.192,
|
|
"grad_norm": 0.6962638783183357,
|
|
"learning_rate": 3.473011293634241e-05,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28224700689315796,
|
|
"step": 1370,
|
|
"valid_targets_mean": 1257.2,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"grad_norm": 0.758322973763713,
|
|
"learning_rate": 3.467601890297502e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2698427438735962,
|
|
"step": 1375,
|
|
"valid_targets_mean": 1172.2,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 2.208,
|
|
"grad_norm": 0.6437126875135689,
|
|
"learning_rate": 3.4621691246061976e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868633270263672,
|
|
"step": 1380,
|
|
"valid_targets_mean": 1483.9,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 2.216,
|
|
"grad_norm": 0.6576070099221175,
|
|
"learning_rate": 3.456713083043046e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2580168545246124,
|
|
"step": 1385,
|
|
"valid_targets_mean": 1241.0,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 2.224,
|
|
"grad_norm": 0.6421329310961442,
|
|
"learning_rate": 3.451233852461285e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29286086559295654,
|
|
"step": 1390,
|
|
"valid_targets_mean": 1435.0,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 2.232,
|
|
"grad_norm": 0.6922702222825998,
|
|
"learning_rate": 3.4457315200832935e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29231929779052734,
|
|
"step": 1395,
|
|
"valid_targets_mean": 1306.4,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"grad_norm": 0.6957166936230352,
|
|
"learning_rate": 3.440206173499201e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27849268913269043,
|
|
"step": 1400,
|
|
"valid_targets_mean": 1264.4,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 2.248,
|
|
"grad_norm": 0.6504488463923631,
|
|
"learning_rate": 3.4346579006654945e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2894713878631592,
|
|
"step": 1405,
|
|
"valid_targets_mean": 1498.7,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 2.2560000000000002,
|
|
"grad_norm": 0.6747819942932284,
|
|
"learning_rate": 3.4290867899036166e-05,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264851838350296,
|
|
"step": 1410,
|
|
"valid_targets_mean": 1207.8,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 2.2640000000000002,
|
|
"grad_norm": 0.6678290925781037,
|
|
"learning_rate": 3.4234929298985614e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273662269115448,
|
|
"step": 1415,
|
|
"valid_targets_mean": 1327.2,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 2.2720000000000002,
|
|
"grad_norm": 0.6769732705565137,
|
|
"learning_rate": 3.417876409697463e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27540674805641174,
|
|
"step": 1420,
|
|
"valid_targets_mean": 1174.8,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 2.2800000000000002,
|
|
"grad_norm": 0.695964532887264,
|
|
"learning_rate": 3.412237318708175e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2607964277267456,
|
|
"step": 1425,
|
|
"valid_targets_mean": 1143.1,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 2.288,
|
|
"grad_norm": 0.706678518213352,
|
|
"learning_rate": 3.4065757466978504e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663518488407135,
|
|
"step": 1430,
|
|
"valid_targets_mean": 1163.8,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 2.296,
|
|
"grad_norm": 0.7050335679984683,
|
|
"learning_rate": 3.400891783791511e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2760336399078369,
|
|
"step": 1435,
|
|
"valid_targets_mean": 1149.5,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 2.304,
|
|
"grad_norm": 0.6639657337260036,
|
|
"learning_rate": 3.395185520470614e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26671597361564636,
|
|
"step": 1440,
|
|
"valid_targets_mean": 1104.8,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 2.312,
|
|
"grad_norm": 0.6541765951023386,
|
|
"learning_rate": 3.38945704757161e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751718759536743,
|
|
"step": 1445,
|
|
"valid_targets_mean": 1466.8,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"grad_norm": 0.6830759386228623,
|
|
"learning_rate": 3.383706456284498e-05,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2775251269340515,
|
|
"step": 1450,
|
|
"valid_targets_mean": 1275.9,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 2.328,
|
|
"grad_norm": 0.6503158392839788,
|
|
"learning_rate": 3.377933838151374e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26958006620407104,
|
|
"step": 1455,
|
|
"valid_targets_mean": 1281.1,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 2.336,
|
|
"grad_norm": 0.6406862619550551,
|
|
"learning_rate": 3.3721392850649714e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27200114727020264,
|
|
"step": 1460,
|
|
"valid_targets_mean": 1535.9,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.344,
|
|
"grad_norm": 0.6974138922917019,
|
|
"learning_rate": 3.3663228892672034e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2914842963218689,
|
|
"step": 1465,
|
|
"valid_targets_mean": 1253.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.352,
|
|
"grad_norm": 0.7329331917845247,
|
|
"learning_rate": 3.36048474334769e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27479422092437744,
|
|
"step": 1470,
|
|
"valid_targets_mean": 1226.4,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"grad_norm": 0.7128518334531693,
|
|
"learning_rate": 3.3546249402422834e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2725678086280823,
|
|
"step": 1475,
|
|
"valid_targets_mean": 1185.9,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 2.368,
|
|
"grad_norm": 0.6900987679538318,
|
|
"learning_rate": 3.3487435732315944e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3021412193775177,
|
|
"step": 1480,
|
|
"valid_targets_mean": 1554.4,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 2.376,
|
|
"grad_norm": 0.7706306252209298,
|
|
"learning_rate": 3.342840735939501e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717379331588745,
|
|
"step": 1485,
|
|
"valid_targets_mean": 1090.4,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 2.384,
|
|
"grad_norm": 0.6414924279954658,
|
|
"learning_rate": 3.33691652233166e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29157620668411255,
|
|
"step": 1490,
|
|
"valid_targets_mean": 1423.0,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 2.392,
|
|
"grad_norm": 0.6406573915545201,
|
|
"learning_rate": 3.330971026714016e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630154490470886,
|
|
"step": 1495,
|
|
"valid_targets_mean": 1417.2,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"grad_norm": 0.6869192075333184,
|
|
"learning_rate": 3.325004343731292e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30006977915763855,
|
|
"step": 1500,
|
|
"valid_targets_mean": 1471.4,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 2.408,
|
|
"grad_norm": 0.684747767483085,
|
|
"learning_rate": 3.3190165683654885e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2811269462108612,
|
|
"step": 1505,
|
|
"valid_targets_mean": 1381.1,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 2.416,
|
|
"grad_norm": 0.6854153302531673,
|
|
"learning_rate": 3.31300779593437e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671985924243927,
|
|
"step": 1510,
|
|
"valid_targets_mean": 1329.5,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 2.424,
|
|
"grad_norm": 0.6958488938812165,
|
|
"learning_rate": 3.306978122089948e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27929019927978516,
|
|
"step": 1515,
|
|
"valid_targets_mean": 1156.1,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 2.432,
|
|
"grad_norm": 0.6789366540088919,
|
|
"learning_rate": 3.300927642816957e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2786034047603607,
|
|
"step": 1520,
|
|
"valid_targets_mean": 1317.5,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"grad_norm": 0.6179578940825363,
|
|
"learning_rate": 3.294856454431328e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789478003978729,
|
|
"step": 1525,
|
|
"valid_targets_mean": 1450.9,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 2.448,
|
|
"grad_norm": 0.643402499027211,
|
|
"learning_rate": 3.288764653578653e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620701789855957,
|
|
"step": 1530,
|
|
"valid_targets_mean": 1208.7,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 2.456,
|
|
"grad_norm": 0.6550530054627135,
|
|
"learning_rate": 3.2826523372326516e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778134346008301,
|
|
"step": 1535,
|
|
"valid_targets_mean": 1272.6,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 2.464,
|
|
"grad_norm": 0.664344203793034,
|
|
"learning_rate": 3.276519602693621e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2841399908065796,
|
|
"step": 1540,
|
|
"valid_targets_mean": 1507.1,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 2.472,
|
|
"grad_norm": 0.645378599333798,
|
|
"learning_rate": 3.270366547586892e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27312642335891724,
|
|
"step": 1545,
|
|
"valid_targets_mean": 1263.8,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"grad_norm": 0.6297324593231745,
|
|
"learning_rate": 3.2641932698612715e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29611048102378845,
|
|
"step": 1550,
|
|
"valid_targets_mean": 1320.3,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 2.488,
|
|
"grad_norm": 0.6777799607334879,
|
|
"learning_rate": 3.2579998677874855e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28705888986587524,
|
|
"step": 1555,
|
|
"valid_targets_mean": 1333.3,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 2.496,
|
|
"grad_norm": 0.7103503726671799,
|
|
"learning_rate": 3.251786439956614e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803288400173187,
|
|
"step": 1560,
|
|
"valid_targets_mean": 1199.4,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 2.504,
|
|
"grad_norm": 0.6844525741962038,
|
|
"learning_rate": 3.2455530852785206e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624068856239319,
|
|
"step": 1565,
|
|
"valid_targets_mean": 1214.2,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 2.512,
|
|
"grad_norm": 0.6637194102935352,
|
|
"learning_rate": 3.239299902980281e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2625361680984497,
|
|
"step": 1570,
|
|
"valid_targets_mean": 1255.9,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"grad_norm": 0.7217981173404302,
|
|
"learning_rate": 3.2330269926046e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29314571619033813,
|
|
"step": 1575,
|
|
"valid_targets_mean": 1097.4,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 2.528,
|
|
"grad_norm": 0.6135221917499928,
|
|
"learning_rate": 3.2267344540082284e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2735772728919983,
|
|
"step": 1580,
|
|
"valid_targets_mean": 1516.1,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 2.536,
|
|
"grad_norm": 0.9418666995673906,
|
|
"learning_rate": 3.220422387360373e-05,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2693006098270416,
|
|
"step": 1585,
|
|
"valid_targets_mean": 1245.2,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 2.544,
|
|
"grad_norm": 0.6745277832762189,
|
|
"learning_rate": 3.2140908931411026e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28518372774124146,
|
|
"step": 1590,
|
|
"valid_targets_mean": 1442.8,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 2.552,
|
|
"grad_norm": 0.6528174512724153,
|
|
"learning_rate": 3.207740072139748e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29156622290611267,
|
|
"step": 1595,
|
|
"valid_targets_mean": 1321.4,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"grad_norm": 0.6963648615821638,
|
|
"learning_rate": 3.2013700254532996e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2679610848426819,
|
|
"step": 1600,
|
|
"valid_targets_mean": 1094.6,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 2.568,
|
|
"grad_norm": 0.6240685918879156,
|
|
"learning_rate": 3.194980854484794e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2774415612220764,
|
|
"step": 1605,
|
|
"valid_targets_mean": 1344.4,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 2.576,
|
|
"grad_norm": 0.6247981463950897,
|
|
"learning_rate": 3.188572660941702e-05,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30891597270965576,
|
|
"step": 1610,
|
|
"valid_targets_mean": 1606.8,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 2.584,
|
|
"grad_norm": 0.6644512063662843,
|
|
"learning_rate": 3.182145546834311e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28088533878326416,
|
|
"step": 1615,
|
|
"valid_targets_mean": 1087.8,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 2.592,
|
|
"grad_norm": 0.7351582546356388,
|
|
"learning_rate": 3.1756996144740994e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751609981060028,
|
|
"step": 1620,
|
|
"valid_targets_mean": 1192.9,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"grad_norm": 0.7001069305876476,
|
|
"learning_rate": 3.1692349664721074e-05,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.253386914730072,
|
|
"step": 1625,
|
|
"valid_targets_mean": 1074.0,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 2.608,
|
|
"grad_norm": 0.7071201673452803,
|
|
"learning_rate": 3.1627517057373046e-05,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28095006942749023,
|
|
"step": 1630,
|
|
"valid_targets_mean": 1147.3,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 2.616,
|
|
"grad_norm": 0.6526011604089567,
|
|
"learning_rate": 3.156249935474953e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25191718339920044,
|
|
"step": 1635,
|
|
"valid_targets_mean": 1192.1,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 2.624,
|
|
"grad_norm": 0.618042630852169,
|
|
"learning_rate": 3.1497297591849614e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28645265102386475,
|
|
"step": 1640,
|
|
"valid_targets_mean": 1445.8,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 2.632,
|
|
"grad_norm": 0.6367040544935965,
|
|
"learning_rate": 3.143191280660238e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29418519139289856,
|
|
"step": 1645,
|
|
"valid_targets_mean": 1404.3,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"grad_norm": 0.7068096922213519,
|
|
"learning_rate": 3.1366346039850424e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2647915780544281,
|
|
"step": 1650,
|
|
"valid_targets_mean": 1209.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 2.648,
|
|
"grad_norm": 0.5964425482144414,
|
|
"learning_rate": 3.130059833533323e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2799581289291382,
|
|
"step": 1655,
|
|
"valid_targets_mean": 1549.2,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 2.656,
|
|
"grad_norm": 0.6861467791598059,
|
|
"learning_rate": 3.123467073967059e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27958473563194275,
|
|
"step": 1660,
|
|
"valid_targets_mean": 1171.5,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 2.664,
|
|
"grad_norm": 0.6785126755518206,
|
|
"learning_rate": 3.116856430234594e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2756059467792511,
|
|
"step": 1665,
|
|
"valid_targets_mean": 1348.6,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 2.672,
|
|
"grad_norm": 0.6868021955639556,
|
|
"learning_rate": 3.110228007568963e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2699786126613617,
|
|
"step": 1670,
|
|
"valid_targets_mean": 1075.9,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"grad_norm": 0.6837558009659767,
|
|
"learning_rate": 3.103581911486221e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663053870201111,
|
|
"step": 1675,
|
|
"valid_targets_mean": 1203.6,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 2.6879999999999997,
|
|
"grad_norm": 0.7077642406028208,
|
|
"learning_rate": 3.0969182477837604e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27322307229042053,
|
|
"step": 1680,
|
|
"valid_targets_mean": 1136.6,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 2.6959999999999997,
|
|
"grad_norm": 0.626285425605735,
|
|
"learning_rate": 3.090237122538628e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2884223759174347,
|
|
"step": 1685,
|
|
"valid_targets_mean": 1551.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 2.7039999999999997,
|
|
"grad_norm": 0.6552637498349907,
|
|
"learning_rate": 3.0835386421058345e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23552396893501282,
|
|
"step": 1690,
|
|
"valid_targets_mean": 1040.1,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 2.7119999999999997,
|
|
"grad_norm": 0.6125922304724731,
|
|
"learning_rate": 3.0768229131166664e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2881890535354614,
|
|
"step": 1695,
|
|
"valid_targets_mean": 1627.1,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 2.7199999999999998,
|
|
"grad_norm": 0.6110399420543144,
|
|
"learning_rate": 3.070090042476983e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2738461196422577,
|
|
"step": 1700,
|
|
"valid_targets_mean": 1326.9,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 2.7279999999999998,
|
|
"grad_norm": 0.6878103190035995,
|
|
"learning_rate": 3.063340137365517e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33009785413742065,
|
|
"step": 1705,
|
|
"valid_targets_mean": 1681.4,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 2.7359999999999998,
|
|
"grad_norm": 0.6446580606361303,
|
|
"learning_rate": 3.0565733052321674e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816465497016907,
|
|
"step": 1710,
|
|
"valid_targets_mean": 1395.1,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 2.7439999999999998,
|
|
"grad_norm": 0.6886131254289206,
|
|
"learning_rate": 3.0497896537962924e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29708582162857056,
|
|
"step": 1715,
|
|
"valid_targets_mean": 1441.6,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 2.752,
|
|
"grad_norm": 0.7010048933902271,
|
|
"learning_rate": 3.042989291044991e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2690610885620117,
|
|
"step": 1720,
|
|
"valid_targets_mean": 1039.3,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"grad_norm": 0.6547515362671856,
|
|
"learning_rate": 3.036172325231383e-05,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2983243465423584,
|
|
"step": 1725,
|
|
"valid_targets_mean": 1371.3,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 2.768,
|
|
"grad_norm": 0.687999306733564,
|
|
"learning_rate": 3.0293388648728908e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736377418041229,
|
|
"step": 1730,
|
|
"valid_targets_mean": 1228.2,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 2.776,
|
|
"grad_norm": 0.6614346299517412,
|
|
"learning_rate": 3.022489018749508e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815403640270233,
|
|
"step": 1735,
|
|
"valid_targets_mean": 1426.5,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 2.784,
|
|
"grad_norm": 0.6751430029646093,
|
|
"learning_rate": 3.015622895902068e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28339284658432007,
|
|
"step": 1740,
|
|
"valid_targets_mean": 1558.9,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 2.792,
|
|
"grad_norm": 0.7204212927807165,
|
|
"learning_rate": 3.008740605630508e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608069181442261,
|
|
"step": 1745,
|
|
"valid_targets_mean": 1208.2,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"grad_norm": 0.7081200251754021,
|
|
"learning_rate": 3.0018422574921337e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26226675510406494,
|
|
"step": 1750,
|
|
"valid_targets_mean": 1096.6,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 2.808,
|
|
"grad_norm": 0.6737275577448616,
|
|
"learning_rate": 2.9949279612998673e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26400458812713623,
|
|
"step": 1755,
|
|
"valid_targets_mean": 1113.1,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 2.816,
|
|
"grad_norm": 0.7286685160057882,
|
|
"learning_rate": 2.9879978271205064e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29918211698532104,
|
|
"step": 1760,
|
|
"valid_targets_mean": 1236.9,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 2.824,
|
|
"grad_norm": 0.7059035574944392,
|
|
"learning_rate": 2.9810519652729692e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27685168385505676,
|
|
"step": 1765,
|
|
"valid_targets_mean": 1191.6,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 2.832,
|
|
"grad_norm": 0.646364351741693,
|
|
"learning_rate": 2.9740904863265378e-05,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25256067514419556,
|
|
"step": 1770,
|
|
"valid_targets_mean": 1210.2,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"grad_norm": 0.6551795311268278,
|
|
"learning_rate": 2.967113501099097e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27837198972702026,
|
|
"step": 1775,
|
|
"valid_targets_mean": 1231.1,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 2.848,
|
|
"grad_norm": 0.6264280236892904,
|
|
"learning_rate": 2.9601211206553745e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2940847873687744,
|
|
"step": 1780,
|
|
"valid_targets_mean": 1444.4,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 2.856,
|
|
"grad_norm": 0.6260668123180814,
|
|
"learning_rate": 2.9531134563051686e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27554386854171753,
|
|
"step": 1785,
|
|
"valid_targets_mean": 1309.7,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 2.864,
|
|
"grad_norm": 0.7073164777138209,
|
|
"learning_rate": 2.946090619601579e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30758726596832275,
|
|
"step": 1790,
|
|
"valid_targets_mean": 1245.4,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 2.872,
|
|
"grad_norm": 0.6959252209235993,
|
|
"learning_rate": 2.9390527223392292e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541739046573639,
|
|
"step": 1795,
|
|
"valid_targets_mean": 1061.4,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"grad_norm": 0.6059233814177686,
|
|
"learning_rate": 2.931999876552488e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897215485572815,
|
|
"step": 1800,
|
|
"valid_targets_mean": 1444.6,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 2.888,
|
|
"grad_norm": 0.647514191812558,
|
|
"learning_rate": 2.9249321945136854e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672356963157654,
|
|
"step": 1805,
|
|
"valid_targets_mean": 1240.6,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 2.896,
|
|
"grad_norm": 0.7101378692707876,
|
|
"learning_rate": 2.9178497887313257e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2725386619567871,
|
|
"step": 1810,
|
|
"valid_targets_mean": 1095.4,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 2.904,
|
|
"grad_norm": 0.6295473163851605,
|
|
"learning_rate": 2.9107527719482968e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2805788218975067,
|
|
"step": 1815,
|
|
"valid_targets_mean": 1265.8,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 2.912,
|
|
"grad_norm": 0.6952187241894342,
|
|
"learning_rate": 2.9036412571400747e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28346186876296997,
|
|
"step": 1820,
|
|
"valid_targets_mean": 1113.0,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"grad_norm": 0.6574768531799914,
|
|
"learning_rate": 2.8965153575129255e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29339683055877686,
|
|
"step": 1825,
|
|
"valid_targets_mean": 1275.6,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 2.928,
|
|
"grad_norm": 0.6741740470948383,
|
|
"learning_rate": 2.8893751865021044e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27708783745765686,
|
|
"step": 1830,
|
|
"valid_targets_mean": 1175.4,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 2.936,
|
|
"grad_norm": 0.6190867338789477,
|
|
"learning_rate": 2.8822208577700473e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30331486463546753,
|
|
"step": 1835,
|
|
"valid_targets_mean": 1407.8,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 2.944,
|
|
"grad_norm": 0.6105569407509004,
|
|
"learning_rate": 2.8750524852045642e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25947797298431396,
|
|
"step": 1840,
|
|
"valid_targets_mean": 1234.4,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 2.952,
|
|
"grad_norm": 0.6761709908526802,
|
|
"learning_rate": 2.867870182917024e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822380065917969,
|
|
"step": 1845,
|
|
"valid_targets_mean": 1241.3,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"grad_norm": 0.7094404713196931,
|
|
"learning_rate": 2.8606740652405394e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28638163208961487,
|
|
"step": 1850,
|
|
"valid_targets_mean": 1219.3,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.968,
|
|
"grad_norm": 0.6243844773518137,
|
|
"learning_rate": 2.853464246728147e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30146270990371704,
|
|
"step": 1855,
|
|
"valid_targets_mean": 1662.6,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 2.976,
|
|
"grad_norm": 0.6420384589601552,
|
|
"learning_rate": 2.846240842150984e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27096953988075256,
|
|
"step": 1860,
|
|
"valid_targets_mean": 1164.1,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 2.984,
|
|
"grad_norm": 0.6894706295393126,
|
|
"learning_rate": 2.839003966496458e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689642608165741,
|
|
"step": 1865,
|
|
"valid_targets_mean": 1070.7,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 2.992,
|
|
"grad_norm": 0.694330841594969,
|
|
"learning_rate": 2.8317537349664215e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28451550006866455,
|
|
"step": 1870,
|
|
"valid_targets_mean": 1364.6,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.5965226888616316,
|
|
"learning_rate": 2.824490262975334e-05,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26313117146492004,
|
|
"step": 1875,
|
|
"valid_targets_mean": 1376.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.008,
|
|
"grad_norm": 0.6441827065961485,
|
|
"learning_rate": 2.817213666148427e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27181917428970337,
|
|
"step": 1880,
|
|
"valid_targets_mean": 1337.6,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 3.016,
|
|
"grad_norm": 0.6610651304160714,
|
|
"learning_rate": 2.809924060319862e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26347678899765015,
|
|
"step": 1885,
|
|
"valid_targets_mean": 1278.7,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 3.024,
|
|
"grad_norm": 0.6740265332429951,
|
|
"learning_rate": 2.802621561530888e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27930864691734314,
|
|
"step": 1890,
|
|
"valid_targets_mean": 1376.5,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 3.032,
|
|
"grad_norm": 0.6522179002747285,
|
|
"learning_rate": 2.7953062860279937e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26702678203582764,
|
|
"step": 1895,
|
|
"valid_targets_mean": 1377.0,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 3.04,
|
|
"grad_norm": 0.6592926469490572,
|
|
"learning_rate": 2.7879783502610557e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26415085792541504,
|
|
"step": 1900,
|
|
"valid_targets_mean": 1233.1,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 3.048,
|
|
"grad_norm": 0.7006814893802589,
|
|
"learning_rate": 2.7806378708814875e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3003199100494385,
|
|
"step": 1905,
|
|
"valid_targets_mean": 1436.9,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 3.056,
|
|
"grad_norm": 0.670782593465585,
|
|
"learning_rate": 2.773284964740379e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2658756375312805,
|
|
"step": 1910,
|
|
"valid_targets_mean": 1323.0,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 3.064,
|
|
"grad_norm": 0.7151261364629163,
|
|
"learning_rate": 2.7659197488866403e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501222491264343,
|
|
"step": 1915,
|
|
"valid_targets_mean": 1106.9,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 3.072,
|
|
"grad_norm": 0.6842824954952684,
|
|
"learning_rate": 2.7585423405651347e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25760418176651,
|
|
"step": 1920,
|
|
"valid_targets_mean": 1263.2,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 3.08,
|
|
"grad_norm": 0.6851435100102551,
|
|
"learning_rate": 2.7511528572148153e-05,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751304805278778,
|
|
"step": 1925,
|
|
"valid_targets_mean": 1516.8,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 3.088,
|
|
"grad_norm": 0.7116052314823448,
|
|
"learning_rate": 2.7437514164668536e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25580573081970215,
|
|
"step": 1930,
|
|
"valid_targets_mean": 1079.2,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 3.096,
|
|
"grad_norm": 0.6694860349286587,
|
|
"learning_rate": 2.7363381361427692e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26989132165908813,
|
|
"step": 1935,
|
|
"valid_targets_mean": 1372.5,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 3.104,
|
|
"grad_norm": 0.654822741183289,
|
|
"learning_rate": 2.72891313425255e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30456915497779846,
|
|
"step": 1940,
|
|
"valid_targets_mean": 1517.6,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 3.112,
|
|
"grad_norm": 0.6048135985337504,
|
|
"learning_rate": 2.7214765289927777e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26889568567276,
|
|
"step": 1945,
|
|
"valid_targets_mean": 1466.9,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 3.12,
|
|
"grad_norm": 0.6379364401499588,
|
|
"learning_rate": 2.714028438744746e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26536551117897034,
|
|
"step": 1950,
|
|
"valid_targets_mean": 1451.5,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 3.128,
|
|
"grad_norm": 0.6733648431420528,
|
|
"learning_rate": 2.706568982072573e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3206586241722107,
|
|
"step": 1955,
|
|
"valid_targets_mean": 1554.1,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 3.136,
|
|
"grad_norm": 0.6401864843680305,
|
|
"learning_rate": 2.6990982777213174e-05,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515425682067871,
|
|
"step": 1960,
|
|
"valid_targets_mean": 1207.6,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 3.144,
|
|
"grad_norm": 0.7606346577293706,
|
|
"learning_rate": 2.691616444615085e-05,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623598277568817,
|
|
"step": 1965,
|
|
"valid_targets_mean": 1155.6,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 3.152,
|
|
"grad_norm": 0.6756078434332166,
|
|
"learning_rate": 2.6841236018551402e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26152557134628296,
|
|
"step": 1970,
|
|
"valid_targets_mean": 1377.0,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 3.16,
|
|
"grad_norm": 0.6911756295978496,
|
|
"learning_rate": 2.6766198687180028e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784450650215149,
|
|
"step": 1975,
|
|
"valid_targets_mean": 1408.5,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.168,
|
|
"grad_norm": 0.6386101574631391,
|
|
"learning_rate": 2.6691053646535564e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2641109228134155,
|
|
"step": 1980,
|
|
"valid_targets_mean": 1290.5,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 3.176,
|
|
"grad_norm": 0.6623380644537674,
|
|
"learning_rate": 2.6615802092831446e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27975255250930786,
|
|
"step": 1985,
|
|
"valid_targets_mean": 1282.6,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 3.184,
|
|
"grad_norm": 0.650501911386913,
|
|
"learning_rate": 2.6540445223976637e-05,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23963376879692078,
|
|
"step": 1990,
|
|
"valid_targets_mean": 1069.8,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 3.192,
|
|
"grad_norm": 0.6408391084918802,
|
|
"learning_rate": 2.6464984239556602e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2742537558078766,
|
|
"step": 1995,
|
|
"valid_targets_mean": 1337.4,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 3.2,
|
|
"grad_norm": 0.6055022780172115,
|
|
"learning_rate": 2.63894203408142e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587350308895111,
|
|
"step": 2000,
|
|
"valid_targets_mean": 1507.9,
|
|
"valid_targets_min": 929
|
|
},
|
|
{
|
|
"epoch": 3.208,
|
|
"grad_norm": 0.7123563895911211,
|
|
"learning_rate": 2.6313754730630528e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623889446258545,
|
|
"step": 2005,
|
|
"valid_targets_mean": 937.2,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 3.216,
|
|
"grad_norm": 0.6430245521512512,
|
|
"learning_rate": 2.623798861350582e-05,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2792336344718933,
|
|
"step": 2010,
|
|
"valid_targets_mean": 1505.6,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 3.224,
|
|
"grad_norm": 0.6995254374597308,
|
|
"learning_rate": 2.6162123195540247e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27274253964424133,
|
|
"step": 2015,
|
|
"valid_targets_mean": 1320.9,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 3.232,
|
|
"grad_norm": 0.6509739736116275,
|
|
"learning_rate": 2.6086159684414726e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812984883785248,
|
|
"step": 2020,
|
|
"valid_targets_mean": 1577.1,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 3.24,
|
|
"grad_norm": 0.6235704652005767,
|
|
"learning_rate": 2.6010099289371694e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28560805320739746,
|
|
"step": 2025,
|
|
"valid_targets_mean": 1705.1,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 3.248,
|
|
"grad_norm": 0.6905835389885921,
|
|
"learning_rate": 2.5933943221195844e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24157525599002838,
|
|
"step": 2030,
|
|
"valid_targets_mean": 1015.9,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 3.2560000000000002,
|
|
"grad_norm": 0.6824245806915535,
|
|
"learning_rate": 2.5857692692194884e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27559101581573486,
|
|
"step": 2035,
|
|
"valid_targets_mean": 1272.2,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 3.2640000000000002,
|
|
"grad_norm": 0.6589804099506639,
|
|
"learning_rate": 2.5781348916180195e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27335792779922485,
|
|
"step": 2040,
|
|
"valid_targets_mean": 1285.4,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 3.2720000000000002,
|
|
"grad_norm": 0.6779592316129089,
|
|
"learning_rate": 2.570491310844755e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634948194026947,
|
|
"step": 2045,
|
|
"valid_targets_mean": 1197.8,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 3.2800000000000002,
|
|
"grad_norm": 0.6502129655431058,
|
|
"learning_rate": 2.562838648575774e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824130356311798,
|
|
"step": 2050,
|
|
"valid_targets_mean": 1673.9,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 3.288,
|
|
"grad_norm": 0.6721926931667754,
|
|
"learning_rate": 2.5551770266317224e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26235949993133545,
|
|
"step": 2055,
|
|
"valid_targets_mean": 1299.0,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 3.296,
|
|
"grad_norm": 0.695032871059252,
|
|
"learning_rate": 2.5475065669758713e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24650682508945465,
|
|
"step": 2060,
|
|
"valid_targets_mean": 1097.7,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 3.304,
|
|
"grad_norm": 0.6288733204439535,
|
|
"learning_rate": 2.5398273917121786e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26480379700660706,
|
|
"step": 2065,
|
|
"valid_targets_mean": 1543.4,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 3.312,
|
|
"grad_norm": 0.700927603502813,
|
|
"learning_rate": 2.532139623083342e-05,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26846858859062195,
|
|
"step": 2070,
|
|
"valid_targets_mean": 1185.6,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 3.32,
|
|
"grad_norm": 0.6111277048453039,
|
|
"learning_rate": 2.5244433834688552e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684972882270813,
|
|
"step": 2075,
|
|
"valid_targets_mean": 1505.6,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 3.328,
|
|
"grad_norm": 0.6615476475498308,
|
|
"learning_rate": 2.5167387953830602e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2560844123363495,
|
|
"step": 2080,
|
|
"valid_targets_mean": 1239.4,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 3.336,
|
|
"grad_norm": 0.6834214505056654,
|
|
"learning_rate": 2.5090259814731946e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24686677753925323,
|
|
"step": 2085,
|
|
"valid_targets_mean": 1132.9,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 3.344,
|
|
"grad_norm": 0.7082885175270403,
|
|
"learning_rate": 2.5013050645174414e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2395028918981552,
|
|
"step": 2090,
|
|
"valid_targets_mean": 1008.8,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 3.352,
|
|
"grad_norm": 0.6876509784496978,
|
|
"learning_rate": 2.4935761674229735e-05,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25685176253318787,
|
|
"step": 2095,
|
|
"valid_targets_mean": 1114.2,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 3.36,
|
|
"grad_norm": 0.6484257430077918,
|
|
"learning_rate": 2.4858394132239982e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25843772292137146,
|
|
"step": 2100,
|
|
"valid_targets_mean": 1438.8,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 3.368,
|
|
"grad_norm": 0.6794207575435304,
|
|
"learning_rate": 2.4780949250797964e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261205792427063,
|
|
"step": 2105,
|
|
"valid_targets_mean": 1279.4,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 3.376,
|
|
"grad_norm": 0.6373802010213625,
|
|
"learning_rate": 2.4703428262727656e-05,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266317218542099,
|
|
"step": 2110,
|
|
"valid_targets_mean": 1429.1,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 3.384,
|
|
"grad_norm": 0.7414396406633716,
|
|
"learning_rate": 2.4625832402064525e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24991574883460999,
|
|
"step": 2115,
|
|
"valid_targets_mean": 1069.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 3.392,
|
|
"grad_norm": 0.6894362505107211,
|
|
"learning_rate": 2.454816290403595e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270450234413147,
|
|
"step": 2120,
|
|
"valid_targets_mean": 1291.2,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 3.4,
|
|
"grad_norm": 0.6809353912768815,
|
|
"learning_rate": 2.4470421005041492e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651490569114685,
|
|
"step": 2125,
|
|
"valid_targets_mean": 1396.1,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 3.408,
|
|
"grad_norm": 0.6194267821198293,
|
|
"learning_rate": 2.4392607942633263e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2424091398715973,
|
|
"step": 2130,
|
|
"valid_targets_mean": 1309.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 3.416,
|
|
"grad_norm": 0.6240295297910221,
|
|
"learning_rate": 2.43147249554962e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265700101852417,
|
|
"step": 2135,
|
|
"valid_targets_mean": 1601.2,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.424,
|
|
"grad_norm": 0.6542032656922367,
|
|
"learning_rate": 2.423677328342835e-05,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26173776388168335,
|
|
"step": 2140,
|
|
"valid_targets_mean": 1310.1,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 3.432,
|
|
"grad_norm": 0.6741808837906534,
|
|
"learning_rate": 2.415875416732113e-05,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2550678849220276,
|
|
"step": 2145,
|
|
"valid_targets_mean": 1239.3,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 3.44,
|
|
"grad_norm": 0.6690301362728636,
|
|
"learning_rate": 2.4080668849139603e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2576570212841034,
|
|
"step": 2150,
|
|
"valid_targets_mean": 1348.2,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 3.448,
|
|
"grad_norm": 0.6970882924445339,
|
|
"learning_rate": 2.4002518571902665e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28229692578315735,
|
|
"step": 2155,
|
|
"valid_targets_mean": 1326.5,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 3.456,
|
|
"grad_norm": 0.6955358601953625,
|
|
"learning_rate": 2.392430457966328e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771756052970886,
|
|
"step": 2160,
|
|
"valid_targets_mean": 1343.8,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 3.464,
|
|
"grad_norm": 0.6941330428025131,
|
|
"learning_rate": 2.3846028117488686e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536184787750244,
|
|
"step": 2165,
|
|
"valid_targets_mean": 1178.4,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 3.472,
|
|
"grad_norm": 0.5867911264015586,
|
|
"learning_rate": 2.3767690431440533e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2742685079574585,
|
|
"step": 2170,
|
|
"valid_targets_mean": 1598.1,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 3.48,
|
|
"grad_norm": 0.657191740593938,
|
|
"learning_rate": 2.368929276855512e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26552343368530273,
|
|
"step": 2175,
|
|
"valid_targets_mean": 1228.5,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 3.488,
|
|
"grad_norm": 0.6404608705568032,
|
|
"learning_rate": 2.361083637682347e-05,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26672402024269104,
|
|
"step": 2180,
|
|
"valid_targets_mean": 1289.1,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 3.496,
|
|
"grad_norm": 0.6342439182597627,
|
|
"learning_rate": 2.3532322505171502e-05,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2743380069732666,
|
|
"step": 2185,
|
|
"valid_targets_mean": 1508.1,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 3.504,
|
|
"grad_norm": 0.6362044103184005,
|
|
"learning_rate": 2.3453752403440147e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2473170906305313,
|
|
"step": 2190,
|
|
"valid_targets_mean": 1300.8,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 3.512,
|
|
"grad_norm": 0.6179159084600548,
|
|
"learning_rate": 2.337512732236545e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26762187480926514,
|
|
"step": 2195,
|
|
"valid_targets_mean": 1373.2,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 3.52,
|
|
"grad_norm": 0.6760841661269625,
|
|
"learning_rate": 2.3296448513558628e-05,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25998616218566895,
|
|
"step": 2200,
|
|
"valid_targets_mean": 1204.0,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 3.528,
|
|
"grad_norm": 0.6680925225311661,
|
|
"learning_rate": 2.321771722948622e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887067496776581,
|
|
"step": 2205,
|
|
"valid_targets_mean": 1504.8,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 3.536,
|
|
"grad_norm": 0.6361680704930657,
|
|
"learning_rate": 2.3138934723450074e-05,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753642797470093,
|
|
"step": 2210,
|
|
"valid_targets_mean": 1553.9,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 3.544,
|
|
"grad_norm": 0.6474709911652999,
|
|
"learning_rate": 2.306010224956744e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24286013841629028,
|
|
"step": 2215,
|
|
"valid_targets_mean": 1294.9,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 3.552,
|
|
"grad_norm": 0.6592834845470664,
|
|
"learning_rate": 2.2981221062750986e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547059953212738,
|
|
"step": 2220,
|
|
"valid_targets_mean": 1237.4,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 3.56,
|
|
"grad_norm": 0.7100790484638881,
|
|
"learning_rate": 2.290229241868882e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24680167436599731,
|
|
"step": 2225,
|
|
"valid_targets_mean": 1082.2,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 3.568,
|
|
"grad_norm": 0.6323663415054647,
|
|
"learning_rate": 2.282331757382454e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27193230390548706,
|
|
"step": 2230,
|
|
"valid_targets_mean": 1322.2,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 3.576,
|
|
"grad_norm": 0.718686620502723,
|
|
"learning_rate": 2.2744297785337155e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23245269060134888,
|
|
"step": 2235,
|
|
"valid_targets_mean": 1076.4,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 3.584,
|
|
"grad_norm": 0.6983573675585034,
|
|
"learning_rate": 2.2665234311121155e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27206701040267944,
|
|
"step": 2240,
|
|
"valid_targets_mean": 1311.3,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 3.592,
|
|
"grad_norm": 0.693823774907838,
|
|
"learning_rate": 2.258612840976645e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26347246766090393,
|
|
"step": 2245,
|
|
"valid_targets_mean": 1194.4,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 3.6,
|
|
"grad_norm": 0.6220144997957632,
|
|
"learning_rate": 2.2506981340538315e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26488739252090454,
|
|
"step": 2250,
|
|
"valid_targets_mean": 1438.6,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 3.608,
|
|
"grad_norm": 0.6361527071307445,
|
|
"learning_rate": 2.2427794363357384e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26273518800735474,
|
|
"step": 2255,
|
|
"valid_targets_mean": 1425.4,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 3.616,
|
|
"grad_norm": 0.6590748207184768,
|
|
"learning_rate": 2.2348568738779566e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25226038694381714,
|
|
"step": 2260,
|
|
"valid_targets_mean": 1263.2,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 3.624,
|
|
"grad_norm": 0.6150820831653943,
|
|
"learning_rate": 2.2269305727975993e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2877650558948517,
|
|
"step": 2265,
|
|
"valid_targets_mean": 1610.2,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 3.632,
|
|
"grad_norm": 0.6639047165927768,
|
|
"learning_rate": 2.2190006592712927e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27034199237823486,
|
|
"step": 2270,
|
|
"valid_targets_mean": 1326.8,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 3.64,
|
|
"grad_norm": 0.6846411501667449,
|
|
"learning_rate": 2.2110672595331698e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25584256649017334,
|
|
"step": 2275,
|
|
"valid_targets_mean": 1118.9,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 3.648,
|
|
"grad_norm": 0.671233698182662,
|
|
"learning_rate": 2.2031304998728587e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2716110348701477,
|
|
"step": 2280,
|
|
"valid_targets_mean": 1328.6,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 3.656,
|
|
"grad_norm": 0.6805219452638351,
|
|
"learning_rate": 2.1951905066334737e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24805986881256104,
|
|
"step": 2285,
|
|
"valid_targets_mean": 1178.6,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 3.664,
|
|
"grad_norm": 0.6282042217501369,
|
|
"learning_rate": 2.1872474062096046e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779634892940521,
|
|
"step": 2290,
|
|
"valid_targets_mean": 1658.6,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 3.672,
|
|
"grad_norm": 0.5982057767317621,
|
|
"learning_rate": 2.179301325045301e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541889548301697,
|
|
"step": 2295,
|
|
"valid_targets_mean": 1546.9,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 3.68,
|
|
"grad_norm": 0.6172823940316745,
|
|
"learning_rate": 2.1713523896320647e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27987393736839294,
|
|
"step": 2300,
|
|
"valid_targets_mean": 1562.8,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 3.6879999999999997,
|
|
"grad_norm": 0.689345965670474,
|
|
"learning_rate": 2.163400726506832e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665555477142334,
|
|
"step": 2305,
|
|
"valid_targets_mean": 1153.3,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 3.6959999999999997,
|
|
"grad_norm": 0.6007049832011164,
|
|
"learning_rate": 2.155446462249961e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665506601333618,
|
|
"step": 2310,
|
|
"valid_targets_mean": 1519.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 3.7039999999999997,
|
|
"grad_norm": 0.5869859062746967,
|
|
"learning_rate": 2.147489723483217e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2703627645969391,
|
|
"step": 2315,
|
|
"valid_targets_mean": 1768.3,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 3.7119999999999997,
|
|
"grad_norm": 0.6723944249524194,
|
|
"learning_rate": 2.139530636867757e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642304599285126,
|
|
"step": 2320,
|
|
"valid_targets_mean": 1287.4,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 3.7199999999999998,
|
|
"grad_norm": 0.6485349319217154,
|
|
"learning_rate": 2.1315693291021114e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.268324077129364,
|
|
"step": 2325,
|
|
"valid_targets_mean": 1410.3,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 3.7279999999999998,
|
|
"grad_norm": 0.6627449102269837,
|
|
"learning_rate": 2.1236059269201686e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24664853513240814,
|
|
"step": 2330,
|
|
"valid_targets_mean": 1091.8,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 3.7359999999999998,
|
|
"grad_norm": 0.6818152475736787,
|
|
"learning_rate": 2.1156405570891584e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24624744057655334,
|
|
"step": 2335,
|
|
"valid_targets_mean": 1100.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 3.7439999999999998,
|
|
"grad_norm": 0.6862149240864039,
|
|
"learning_rate": 2.1076733464076322e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28191810846328735,
|
|
"step": 2340,
|
|
"valid_targets_mean": 1253.8,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 3.752,
|
|
"grad_norm": 0.69512552125173,
|
|
"learning_rate": 2.0997044217034462e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24592283368110657,
|
|
"step": 2345,
|
|
"valid_targets_mean": 1105.6,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 3.76,
|
|
"grad_norm": 0.6701496692372355,
|
|
"learning_rate": 2.0917339098317405e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657725214958191,
|
|
"step": 2350,
|
|
"valid_targets_mean": 1227.7,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 3.768,
|
|
"grad_norm": 0.6583453990163107,
|
|
"learning_rate": 2.083761937672922e-05,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28746914863586426,
|
|
"step": 2355,
|
|
"valid_targets_mean": 1413.1,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 3.776,
|
|
"grad_norm": 0.647193703379553,
|
|
"learning_rate": 2.0757886321306433e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520062327384949,
|
|
"step": 2360,
|
|
"valid_targets_mean": 1292.3,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 3.784,
|
|
"grad_norm": 0.6544500168556511,
|
|
"learning_rate": 2.0678141201297827e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558833360671997,
|
|
"step": 2365,
|
|
"valid_targets_mean": 1230.6,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 3.792,
|
|
"grad_norm": 0.5908868577470436,
|
|
"learning_rate": 2.059838528614423e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25150492787361145,
|
|
"step": 2370,
|
|
"valid_targets_mean": 1492.7,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 3.8,
|
|
"grad_norm": 0.6536505359767332,
|
|
"learning_rate": 2.0518619845458322e-05,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25637102127075195,
|
|
"step": 2375,
|
|
"valid_targets_mean": 1262.4,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 3.808,
|
|
"grad_norm": 0.6970400810427089,
|
|
"learning_rate": 2.0438846149004426e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569451928138733,
|
|
"step": 2380,
|
|
"valid_targets_mean": 1290.3,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 3.816,
|
|
"grad_norm": 0.6860436289691644,
|
|
"learning_rate": 2.0359065466678268e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257647842168808,
|
|
"step": 2385,
|
|
"valid_targets_mean": 1242.9,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 3.824,
|
|
"grad_norm": 0.6649062313827049,
|
|
"learning_rate": 2.0279279068486795e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2723511755466461,
|
|
"step": 2390,
|
|
"valid_targets_mean": 1329.1,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 3.832,
|
|
"grad_norm": 0.6790621161618521,
|
|
"learning_rate": 2.019948822452794e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25694236159324646,
|
|
"step": 2395,
|
|
"valid_targets_mean": 1201.9,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 3.84,
|
|
"grad_norm": 0.6706029840981889,
|
|
"learning_rate": 2.0119694204970393e-05,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23751656711101532,
|
|
"step": 2400,
|
|
"valid_targets_mean": 1073.2,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 3.848,
|
|
"grad_norm": 0.6941374683184205,
|
|
"learning_rate": 2.0039898280033414e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25032341480255127,
|
|
"step": 2405,
|
|
"valid_targets_mean": 1076.4,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 3.856,
|
|
"grad_norm": 0.6620252014121757,
|
|
"learning_rate": 1.9960101719966592e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2632727026939392,
|
|
"step": 2410,
|
|
"valid_targets_mean": 1353.2,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 3.864,
|
|
"grad_norm": 0.6139635274245487,
|
|
"learning_rate": 1.9880305795029617e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24872928857803345,
|
|
"step": 2415,
|
|
"valid_targets_mean": 1351.9,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 3.872,
|
|
"grad_norm": 0.6470034279475497,
|
|
"learning_rate": 1.980051177547207e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24663600325584412,
|
|
"step": 2420,
|
|
"valid_targets_mean": 1244.1,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 3.88,
|
|
"grad_norm": 0.6782469926553503,
|
|
"learning_rate": 1.9720720931513212e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26283538341522217,
|
|
"step": 2425,
|
|
"valid_targets_mean": 1228.9,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 3.888,
|
|
"grad_norm": 0.6362788438143813,
|
|
"learning_rate": 1.9640934533321735e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518549859523773,
|
|
"step": 2430,
|
|
"valid_targets_mean": 1298.5,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 3.896,
|
|
"grad_norm": 0.6283093449451445,
|
|
"learning_rate": 1.9561153850995577e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2577846050262451,
|
|
"step": 2435,
|
|
"valid_targets_mean": 1401.3,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 3.904,
|
|
"grad_norm": 0.6683315898630078,
|
|
"learning_rate": 1.948138015454168e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2583683133125305,
|
|
"step": 2440,
|
|
"valid_targets_mean": 1249.4,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 3.912,
|
|
"grad_norm": 0.6812394706458885,
|
|
"learning_rate": 1.9401614713855775e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.255681574344635,
|
|
"step": 2445,
|
|
"valid_targets_mean": 1148.9,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 3.92,
|
|
"grad_norm": 0.6650802276321471,
|
|
"learning_rate": 1.932185879870218e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2701965868473053,
|
|
"step": 2450,
|
|
"valid_targets_mean": 1404.8,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 3.928,
|
|
"grad_norm": 0.6361622244240126,
|
|
"learning_rate": 1.924211367869357e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2748684883117676,
|
|
"step": 2455,
|
|
"valid_targets_mean": 1454.4,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 3.936,
|
|
"grad_norm": 0.66087382538398,
|
|
"learning_rate": 1.9162380623270783e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624155879020691,
|
|
"step": 2460,
|
|
"valid_targets_mean": 1293.4,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 3.944,
|
|
"grad_norm": 0.8528949959512993,
|
|
"learning_rate": 1.90826609016826e-05,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27243292331695557,
|
|
"step": 2465,
|
|
"valid_targets_mean": 1403.2,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 3.952,
|
|
"grad_norm": 0.6499965303866664,
|
|
"learning_rate": 1.9002955782965548e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2805604934692383,
|
|
"step": 2470,
|
|
"valid_targets_mean": 1488.4,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 3.96,
|
|
"grad_norm": 0.658969181193274,
|
|
"learning_rate": 1.8923266535923688e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27364569902420044,
|
|
"step": 2475,
|
|
"valid_targets_mean": 1414.6,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 3.968,
|
|
"grad_norm": 0.7142102559139896,
|
|
"learning_rate": 1.8843594429108426e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25395435094833374,
|
|
"step": 2480,
|
|
"valid_targets_mean": 1026.7,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 3.976,
|
|
"grad_norm": 0.6641654654032558,
|
|
"learning_rate": 1.8763940730798324e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2506484389305115,
|
|
"step": 2485,
|
|
"valid_targets_mean": 1232.2,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 3.984,
|
|
"grad_norm": 0.637654602488558,
|
|
"learning_rate": 1.8684306708978896e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26610612869262695,
|
|
"step": 2490,
|
|
"valid_targets_mean": 1364.2,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 3.992,
|
|
"grad_norm": 0.6588766976824374,
|
|
"learning_rate": 1.8604693631322433e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25643378496170044,
|
|
"step": 2495,
|
|
"valid_targets_mean": 1213.6,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.653949123831953,
|
|
"learning_rate": 1.852510276516783e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26928308606147766,
|
|
"step": 2500,
|
|
"valid_targets_mean": 1364.0,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 4.008,
|
|
"grad_norm": 0.6752226967073331,
|
|
"learning_rate": 1.8445535377500393e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2743467688560486,
|
|
"step": 2505,
|
|
"valid_targets_mean": 1330.3,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 4.016,
|
|
"grad_norm": 0.7437006714562896,
|
|
"learning_rate": 1.8365992734931686e-05,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25773298740386963,
|
|
"step": 2510,
|
|
"valid_targets_mean": 1130.7,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 4.024,
|
|
"grad_norm": 0.6867599576535997,
|
|
"learning_rate": 1.8286476103679356e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24066534638404846,
|
|
"step": 2515,
|
|
"valid_targets_mean": 1255.4,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 4.032,
|
|
"grad_norm": 0.7624424135535716,
|
|
"learning_rate": 1.8206986749546992e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24525007605552673,
|
|
"step": 2520,
|
|
"valid_targets_mean": 986.4,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 4.04,
|
|
"grad_norm": 0.6977327731928428,
|
|
"learning_rate": 1.8127525937903957e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2701755166053772,
|
|
"step": 2525,
|
|
"valid_targets_mean": 1225.4,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 4.048,
|
|
"grad_norm": 0.6185207332523498,
|
|
"learning_rate": 1.8048094933665262e-05,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2495201826095581,
|
|
"step": 2530,
|
|
"valid_targets_mean": 1605.4,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.056,
|
|
"grad_norm": 0.6967419200082658,
|
|
"learning_rate": 1.7968695001271416e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24452589452266693,
|
|
"step": 2535,
|
|
"valid_targets_mean": 1160.8,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 4.064,
|
|
"grad_norm": 0.6826662900709373,
|
|
"learning_rate": 1.7889327404668316e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27587270736694336,
|
|
"step": 2540,
|
|
"valid_targets_mean": 1370.8,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 4.072,
|
|
"grad_norm": 0.6842844987321012,
|
|
"learning_rate": 1.7809993407287083e-05,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24526247382164001,
|
|
"step": 2545,
|
|
"valid_targets_mean": 1197.8,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 4.08,
|
|
"grad_norm": 0.6660539291553894,
|
|
"learning_rate": 1.7730694272024018e-05,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26537883281707764,
|
|
"step": 2550,
|
|
"valid_targets_mean": 1423.4,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 4.088,
|
|
"grad_norm": 0.6761359589772779,
|
|
"learning_rate": 1.765143126122044e-05,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24097639322280884,
|
|
"step": 2555,
|
|
"valid_targets_mean": 1251.2,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 4.096,
|
|
"grad_norm": 0.6974075337689369,
|
|
"learning_rate": 1.7572205636642622e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24446558952331543,
|
|
"step": 2560,
|
|
"valid_targets_mean": 1170.4,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 4.104,
|
|
"grad_norm": 0.6925586588563667,
|
|
"learning_rate": 1.749301865946169e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24788764119148254,
|
|
"step": 2565,
|
|
"valid_targets_mean": 1186.6,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 4.112,
|
|
"grad_norm": 0.7291193460039049,
|
|
"learning_rate": 1.7413871590233557e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22426724433898926,
|
|
"step": 2570,
|
|
"valid_targets_mean": 911.9,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 4.12,
|
|
"grad_norm": 0.6636190405667458,
|
|
"learning_rate": 1.7334765688878848e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26385343074798584,
|
|
"step": 2575,
|
|
"valid_targets_mean": 1663.2,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 4.128,
|
|
"grad_norm": 0.6766216618634091,
|
|
"learning_rate": 1.7255702214662852e-05,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2610906958580017,
|
|
"step": 2580,
|
|
"valid_targets_mean": 1292.3,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.136,
|
|
"grad_norm": 0.6954675056451982,
|
|
"learning_rate": 1.7176682426175468e-05,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26476234197616577,
|
|
"step": 2585,
|
|
"valid_targets_mean": 1335.3,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 4.144,
|
|
"grad_norm": 0.6388809133250997,
|
|
"learning_rate": 1.709770758131118e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28252729773521423,
|
|
"step": 2590,
|
|
"valid_targets_mean": 1658.2,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 4.152,
|
|
"grad_norm": 0.6736486410723815,
|
|
"learning_rate": 1.7018778937249017e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26856860518455505,
|
|
"step": 2595,
|
|
"valid_targets_mean": 1623.7,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 4.16,
|
|
"grad_norm": 0.6702648950493726,
|
|
"learning_rate": 1.6939897750432562e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24913468956947327,
|
|
"step": 2600,
|
|
"valid_targets_mean": 1222.6,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 4.168,
|
|
"grad_norm": 0.6501407018403899,
|
|
"learning_rate": 1.6861065276549933e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2578279972076416,
|
|
"step": 2605,
|
|
"valid_targets_mean": 1562.8,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 4.176,
|
|
"grad_norm": 0.6545856496667984,
|
|
"learning_rate": 1.6782282770513788e-05,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23891770839691162,
|
|
"step": 2610,
|
|
"valid_targets_mean": 1307.3,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 4.184,
|
|
"grad_norm": 0.6731230040748872,
|
|
"learning_rate": 1.6703551486441382e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24786439538002014,
|
|
"step": 2615,
|
|
"valid_targets_mean": 1318.4,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 4.192,
|
|
"grad_norm": 0.6885600038346852,
|
|
"learning_rate": 1.6624872677634565e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24863067269325256,
|
|
"step": 2620,
|
|
"valid_targets_mean": 1198.1,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 4.2,
|
|
"grad_norm": 0.7490327717460988,
|
|
"learning_rate": 1.654624759655986e-05,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24127447605133057,
|
|
"step": 2625,
|
|
"valid_targets_mean": 1041.3,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 4.208,
|
|
"grad_norm": 0.6873953843632404,
|
|
"learning_rate": 1.64676774948285e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25932279229164124,
|
|
"step": 2630,
|
|
"valid_targets_mean": 1334.0,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 4.216,
|
|
"grad_norm": 0.6646149999288611,
|
|
"learning_rate": 1.6389163623176536e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.251889705657959,
|
|
"step": 2635,
|
|
"valid_targets_mean": 1407.6,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 4.224,
|
|
"grad_norm": 0.6518543973908394,
|
|
"learning_rate": 1.6310707231444884e-05,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26305222511291504,
|
|
"step": 2640,
|
|
"valid_targets_mean": 1470.2,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 4.232,
|
|
"grad_norm": 0.7304978495882661,
|
|
"learning_rate": 1.623230956855947e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27979856729507446,
|
|
"step": 2645,
|
|
"valid_targets_mean": 1238.6,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 4.24,
|
|
"grad_norm": 0.685096372215487,
|
|
"learning_rate": 1.6153971882511324e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2462332546710968,
|
|
"step": 2650,
|
|
"valid_targets_mean": 1192.6,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 4.248,
|
|
"grad_norm": 0.6842439023725819,
|
|
"learning_rate": 1.6075695420336724e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21787481009960175,
|
|
"step": 2655,
|
|
"valid_targets_mean": 1033.6,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 4.256,
|
|
"grad_norm": 0.7234231074156968,
|
|
"learning_rate": 1.5997481428097338e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24423924088478088,
|
|
"step": 2660,
|
|
"valid_targets_mean": 1063.7,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 4.264,
|
|
"grad_norm": 0.7305747274510119,
|
|
"learning_rate": 1.5919331150860396e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25347453355789185,
|
|
"step": 2665,
|
|
"valid_targets_mean": 1112.8,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 4.272,
|
|
"grad_norm": 0.6561719310008635,
|
|
"learning_rate": 1.5841245832678873e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2277790904045105,
|
|
"step": 2670,
|
|
"valid_targets_mean": 1221.9,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 4.28,
|
|
"grad_norm": 0.7187091286353593,
|
|
"learning_rate": 1.576322671657166e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26360541582107544,
|
|
"step": 2675,
|
|
"valid_targets_mean": 1182.9,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 4.288,
|
|
"grad_norm": 0.6760153512044099,
|
|
"learning_rate": 1.5685275044503804e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25002750754356384,
|
|
"step": 2680,
|
|
"valid_targets_mean": 1276.5,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 4.296,
|
|
"grad_norm": 0.7487812561472922,
|
|
"learning_rate": 1.560739205736674e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23954318463802338,
|
|
"step": 2685,
|
|
"valid_targets_mean": 1147.8,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 4.304,
|
|
"grad_norm": 0.6193157845592424,
|
|
"learning_rate": 1.552957899495851e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28379690647125244,
|
|
"step": 2690,
|
|
"valid_targets_mean": 2096.6,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 4.312,
|
|
"grad_norm": 0.6831214389857666,
|
|
"learning_rate": 1.5451837095964054e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677820920944214,
|
|
"step": 2695,
|
|
"valid_targets_mean": 1279.5,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 4.32,
|
|
"grad_norm": 0.6121898182301581,
|
|
"learning_rate": 1.5374167597935478e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.250791072845459,
|
|
"step": 2700,
|
|
"valid_targets_mean": 1475.9,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 4.328,
|
|
"grad_norm": 0.7359634397211369,
|
|
"learning_rate": 1.5296571737272354e-05,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26270294189453125,
|
|
"step": 2705,
|
|
"valid_targets_mean": 1212.4,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 4.336,
|
|
"grad_norm": 0.6838369303652589,
|
|
"learning_rate": 1.5219050749202037e-05,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23900796473026276,
|
|
"step": 2710,
|
|
"valid_targets_mean": 1247.9,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 4.344,
|
|
"grad_norm": 0.6932980897482319,
|
|
"learning_rate": 1.5141605867760021e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25772684812545776,
|
|
"step": 2715,
|
|
"valid_targets_mean": 1462.8,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 4.352,
|
|
"grad_norm": 0.6376147640857852,
|
|
"learning_rate": 1.5064238325770267e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24409160017967224,
|
|
"step": 2720,
|
|
"valid_targets_mean": 1370.3,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 4.36,
|
|
"grad_norm": 0.6449453804106439,
|
|
"learning_rate": 1.498694935482559e-05,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2567211985588074,
|
|
"step": 2725,
|
|
"valid_targets_mean": 1537.6,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 4.368,
|
|
"grad_norm": 0.7266076951079028,
|
|
"learning_rate": 1.4909740185268056e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25755059719085693,
|
|
"step": 2730,
|
|
"valid_targets_mean": 1204.7,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 4.376,
|
|
"grad_norm": 0.6580993804899302,
|
|
"learning_rate": 1.4832612046169408e-05,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2553867697715759,
|
|
"step": 2735,
|
|
"valid_targets_mean": 1392.2,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 4.384,
|
|
"grad_norm": 0.6591451650111931,
|
|
"learning_rate": 1.4755566165311455e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27087101340293884,
|
|
"step": 2740,
|
|
"valid_targets_mean": 1558.6,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 4.392,
|
|
"grad_norm": 0.6708446619497337,
|
|
"learning_rate": 1.4678603769166591e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588583827018738,
|
|
"step": 2745,
|
|
"valid_targets_mean": 1302.1,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"grad_norm": 0.6825146469564338,
|
|
"learning_rate": 1.4601726082878226e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24621394276618958,
|
|
"step": 2750,
|
|
"valid_targets_mean": 1244.8,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 4.408,
|
|
"grad_norm": 0.7444808670480348,
|
|
"learning_rate": 1.4524934330241292e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2529444694519043,
|
|
"step": 2755,
|
|
"valid_targets_mean": 1129.4,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 4.416,
|
|
"grad_norm": 0.6731503358818565,
|
|
"learning_rate": 1.4448229733682784e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24998781085014343,
|
|
"step": 2760,
|
|
"valid_targets_mean": 1352.8,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 4.424,
|
|
"grad_norm": 0.6870589246681222,
|
|
"learning_rate": 1.4371613514242264e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2391778826713562,
|
|
"step": 2765,
|
|
"valid_targets_mean": 1184.2,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 4.432,
|
|
"grad_norm": 0.6615619883987395,
|
|
"learning_rate": 1.4295086891552457e-05,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24108171463012695,
|
|
"step": 2770,
|
|
"valid_targets_mean": 1289.5,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 4.44,
|
|
"grad_norm": 0.6308998030853582,
|
|
"learning_rate": 1.4218651083819811e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2479122281074524,
|
|
"step": 2775,
|
|
"valid_targets_mean": 1449.1,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 4.448,
|
|
"grad_norm": 0.6190806691831151,
|
|
"learning_rate": 1.4142307307805125e-05,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24546939134597778,
|
|
"step": 2780,
|
|
"valid_targets_mean": 1490.2,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 4.456,
|
|
"grad_norm": 0.7217454642192399,
|
|
"learning_rate": 1.406605677880416e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24627843499183655,
|
|
"step": 2785,
|
|
"valid_targets_mean": 1110.8,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 4.464,
|
|
"grad_norm": 0.6352621836738089,
|
|
"learning_rate": 1.3989900710628313e-05,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23787061870098114,
|
|
"step": 2790,
|
|
"valid_targets_mean": 1328.3,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 4.4719999999999995,
|
|
"grad_norm": 0.706645601747854,
|
|
"learning_rate": 1.3913840315585279e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26373565196990967,
|
|
"step": 2795,
|
|
"valid_targets_mean": 1350.8,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 4.48,
|
|
"grad_norm": 0.7347066266174838,
|
|
"learning_rate": 1.3837876804459765e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2655651271343231,
|
|
"step": 2800,
|
|
"valid_targets_mean": 1359.9,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 4.4879999999999995,
|
|
"grad_norm": 0.7000435068601133,
|
|
"learning_rate": 1.3762011386494191e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638210952281952,
|
|
"step": 2805,
|
|
"valid_targets_mean": 1337.7,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 4.496,
|
|
"grad_norm": 0.6462688076270033,
|
|
"learning_rate": 1.3686245269369485e-05,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2566429376602173,
|
|
"step": 2810,
|
|
"valid_targets_mean": 1379.6,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 4.504,
|
|
"grad_norm": 0.7206319861354764,
|
|
"learning_rate": 1.3610579659185809e-05,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24437640607357025,
|
|
"step": 2815,
|
|
"valid_targets_mean": 1113.8,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 4.5120000000000005,
|
|
"grad_norm": 0.7325813796727262,
|
|
"learning_rate": 1.35350157604434e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25034505128860474,
|
|
"step": 2820,
|
|
"valid_targets_mean": 1212.3,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 4.52,
|
|
"grad_norm": 0.7096914124841465,
|
|
"learning_rate": 1.345955477602337e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25081247091293335,
|
|
"step": 2825,
|
|
"valid_targets_mean": 1241.6,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 4.5280000000000005,
|
|
"grad_norm": 0.6433987080440745,
|
|
"learning_rate": 1.3384197907168561e-05,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25386953353881836,
|
|
"step": 2830,
|
|
"valid_targets_mean": 1520.9,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 4.536,
|
|
"grad_norm": 0.6681182905464845,
|
|
"learning_rate": 1.3308946353464438e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509141266345978,
|
|
"step": 2835,
|
|
"valid_targets_mean": 1309.1,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 4.5440000000000005,
|
|
"grad_norm": 0.7017365787228597,
|
|
"learning_rate": 1.3233801312819979e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2543751001358032,
|
|
"step": 2840,
|
|
"valid_targets_mean": 1223.5,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 4.552,
|
|
"grad_norm": 0.6958094893225927,
|
|
"learning_rate": 1.3158763981448606e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25944116711616516,
|
|
"step": 2845,
|
|
"valid_targets_mean": 1195.2,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 4.5600000000000005,
|
|
"grad_norm": 0.68360671105646,
|
|
"learning_rate": 1.3083835553849148e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24183794856071472,
|
|
"step": 2850,
|
|
"valid_targets_mean": 1313.8,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 4.568,
|
|
"grad_norm": 0.6952397168160537,
|
|
"learning_rate": 1.3009017222786828e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23632170259952545,
|
|
"step": 2855,
|
|
"valid_targets_mean": 1198.1,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 4.576,
|
|
"grad_norm": 0.6500938519699755,
|
|
"learning_rate": 1.2934310179274269e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24437084794044495,
|
|
"step": 2860,
|
|
"valid_targets_mean": 1397.0,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 4.584,
|
|
"grad_norm": 0.6950857764516037,
|
|
"learning_rate": 1.2859715612552541e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2452639490365982,
|
|
"step": 2865,
|
|
"valid_targets_mean": 1172.4,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 4.592,
|
|
"grad_norm": 0.6470254114066142,
|
|
"learning_rate": 1.278523471007223e-05,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24477365612983704,
|
|
"step": 2870,
|
|
"valid_targets_mean": 1292.6,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.6,
|
|
"grad_norm": 0.6941442662750443,
|
|
"learning_rate": 1.271086865747451e-05,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26042091846466064,
|
|
"step": 2875,
|
|
"valid_targets_mean": 1390.6,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 4.608,
|
|
"grad_norm": 0.6986074659951925,
|
|
"learning_rate": 1.2636618638572316e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2476332187652588,
|
|
"step": 2880,
|
|
"valid_targets_mean": 1218.1,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 4.616,
|
|
"grad_norm": 0.6838057203342303,
|
|
"learning_rate": 1.2562485835331466e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24454310536384583,
|
|
"step": 2885,
|
|
"valid_targets_mean": 1276.9,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 4.624,
|
|
"grad_norm": 0.6758163329175509,
|
|
"learning_rate": 1.2488471427851852e-05,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24147255718708038,
|
|
"step": 2890,
|
|
"valid_targets_mean": 1260.6,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 4.632,
|
|
"grad_norm": 0.6923482106850123,
|
|
"learning_rate": 1.241457659434866e-05,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24787235260009766,
|
|
"step": 2895,
|
|
"valid_targets_mean": 1320.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 4.64,
|
|
"grad_norm": 0.7040386361556432,
|
|
"learning_rate": 1.2340802511133605e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2517484426498413,
|
|
"step": 2900,
|
|
"valid_targets_mean": 1230.8,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 4.648,
|
|
"grad_norm": 0.7244017597163401,
|
|
"learning_rate": 1.2267150352596216e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23491953313350677,
|
|
"step": 2905,
|
|
"valid_targets_mean": 1202.7,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 4.656,
|
|
"grad_norm": 0.7081271756271076,
|
|
"learning_rate": 1.2193621291185132e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27502161264419556,
|
|
"step": 2910,
|
|
"valid_targets_mean": 1231.0,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 4.664,
|
|
"grad_norm": 0.6989341805387547,
|
|
"learning_rate": 1.2120216497389446e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2622503638267517,
|
|
"step": 2915,
|
|
"valid_targets_mean": 1377.1,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 4.672,
|
|
"grad_norm": 0.6922404251462568,
|
|
"learning_rate": 1.2046937139720068e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2525664269924164,
|
|
"step": 2920,
|
|
"valid_targets_mean": 1295.2,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 4.68,
|
|
"grad_norm": 0.6743364067380119,
|
|
"learning_rate": 1.1973784384691121e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518729865550995,
|
|
"step": 2925,
|
|
"valid_targets_mean": 1356.4,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 4.688,
|
|
"grad_norm": 0.6487976873950894,
|
|
"learning_rate": 1.1900759396801382e-05,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26148733496665955,
|
|
"step": 2930,
|
|
"valid_targets_mean": 1495.9,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 4.696,
|
|
"grad_norm": 0.6981332197040793,
|
|
"learning_rate": 1.1827863338515741e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24565152823925018,
|
|
"step": 2935,
|
|
"valid_targets_mean": 1197.0,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 4.704,
|
|
"grad_norm": 0.6817749798446157,
|
|
"learning_rate": 1.1755097370246669e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24504896998405457,
|
|
"step": 2940,
|
|
"valid_targets_mean": 1293.8,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 4.712,
|
|
"grad_norm": 0.6692660460074669,
|
|
"learning_rate": 1.1682462650335791e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2478242963552475,
|
|
"step": 2945,
|
|
"valid_targets_mean": 1342.5,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 4.72,
|
|
"grad_norm": 0.6498478843390071,
|
|
"learning_rate": 1.1609960335035423e-05,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28133296966552734,
|
|
"step": 2950,
|
|
"valid_targets_mean": 1772.6,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 4.728,
|
|
"grad_norm": 0.6743545068116684,
|
|
"learning_rate": 1.1537591578490165e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26180499792099,
|
|
"step": 2955,
|
|
"valid_targets_mean": 1460.1,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 4.736,
|
|
"grad_norm": 0.6853943674076828,
|
|
"learning_rate": 1.146535753271853e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24321532249450684,
|
|
"step": 2960,
|
|
"valid_targets_mean": 1265.6,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 4.744,
|
|
"grad_norm": 0.6724557126503471,
|
|
"learning_rate": 1.139325934759461e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2539277672767639,
|
|
"step": 2965,
|
|
"valid_targets_mean": 1357.1,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 4.752,
|
|
"grad_norm": 0.8034347184554278,
|
|
"learning_rate": 1.1321298170829768e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2586537003517151,
|
|
"step": 2970,
|
|
"valid_targets_mean": 1217.7,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 4.76,
|
|
"grad_norm": 0.6654941383051768,
|
|
"learning_rate": 1.1249475147954363e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25424063205718994,
|
|
"step": 2975,
|
|
"valid_targets_mean": 1436.7,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 4.768,
|
|
"grad_norm": 0.6753522818234925,
|
|
"learning_rate": 1.1177791422299528e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27052754163742065,
|
|
"step": 2980,
|
|
"valid_targets_mean": 1509.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 4.776,
|
|
"grad_norm": 0.7056057662388697,
|
|
"learning_rate": 1.1106248134978959e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597852945327759,
|
|
"step": 2985,
|
|
"valid_targets_mean": 1225.6,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 4.784,
|
|
"grad_norm": 0.7288629262925027,
|
|
"learning_rate": 1.1034846424870744e-05,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24987894296646118,
|
|
"step": 2990,
|
|
"valid_targets_mean": 1444.4,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 4.792,
|
|
"grad_norm": 0.6648487683181882,
|
|
"learning_rate": 1.0963587428599256e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919997274875641,
|
|
"step": 2995,
|
|
"valid_targets_mean": 1735.0,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 4.8,
|
|
"grad_norm": 0.7136418205935049,
|
|
"learning_rate": 1.089247228051704e-05,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22973445057868958,
|
|
"step": 3000,
|
|
"valid_targets_mean": 1149.8,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.808,
|
|
"grad_norm": 0.6850215403969977,
|
|
"learning_rate": 1.0821502112686753e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.252066045999527,
|
|
"step": 3005,
|
|
"valid_targets_mean": 1317.4,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 4.816,
|
|
"grad_norm": 0.7149581789686188,
|
|
"learning_rate": 1.0750678054863158e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2506260573863983,
|
|
"step": 3010,
|
|
"valid_targets_mean": 1437.4,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 4.824,
|
|
"grad_norm": 0.6835899702081906,
|
|
"learning_rate": 1.0680001234475127e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870747148990631,
|
|
"step": 3015,
|
|
"valid_targets_mean": 1495.9,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 4.832,
|
|
"grad_norm": 0.703784725906714,
|
|
"learning_rate": 1.0609472776607715e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575741410255432,
|
|
"step": 3020,
|
|
"valid_targets_mean": 1141.8,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 4.84,
|
|
"grad_norm": 0.6533142879898276,
|
|
"learning_rate": 1.0539093803984217e-05,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2441544532775879,
|
|
"step": 3025,
|
|
"valid_targets_mean": 1420.1,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 4.848,
|
|
"grad_norm": 0.6723835559023364,
|
|
"learning_rate": 1.046886543694832e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25124266743659973,
|
|
"step": 3030,
|
|
"valid_targets_mean": 1324.6,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 4.856,
|
|
"grad_norm": 0.6911023845236709,
|
|
"learning_rate": 1.0398788793446263e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2542283535003662,
|
|
"step": 3035,
|
|
"valid_targets_mean": 1342.3,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 4.864,
|
|
"grad_norm": 0.6715727825739723,
|
|
"learning_rate": 1.0328864989009037e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27292191982269287,
|
|
"step": 3040,
|
|
"valid_targets_mean": 1397.9,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 4.872,
|
|
"grad_norm": 0.704961950236789,
|
|
"learning_rate": 1.0259095136734634e-05,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24199113249778748,
|
|
"step": 3045,
|
|
"valid_targets_mean": 1263.9,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 4.88,
|
|
"grad_norm": 0.7143041879942624,
|
|
"learning_rate": 1.0189480347270311e-05,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649514675140381,
|
|
"step": 3050,
|
|
"valid_targets_mean": 1257.9,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 4.888,
|
|
"grad_norm": 0.66698431782738,
|
|
"learning_rate": 1.0120021728794938e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257032185792923,
|
|
"step": 3055,
|
|
"valid_targets_mean": 1442.4,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 4.896,
|
|
"grad_norm": 0.7360294628244994,
|
|
"learning_rate": 1.0050720387001334e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24194364249706268,
|
|
"step": 3060,
|
|
"valid_targets_mean": 1130.2,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 4.904,
|
|
"grad_norm": 0.6910518898635257,
|
|
"learning_rate": 9.981577425078672e-06,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2463265359401703,
|
|
"step": 3065,
|
|
"valid_targets_mean": 1288.6,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 4.912,
|
|
"grad_norm": 0.686943427952227,
|
|
"learning_rate": 9.912593943694924e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24577048420906067,
|
|
"step": 3070,
|
|
"valid_targets_mean": 1249.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 4.92,
|
|
"grad_norm": 0.7262934853794486,
|
|
"learning_rate": 9.843771040979328e-06,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2426367700099945,
|
|
"step": 3075,
|
|
"valid_targets_mean": 1139.6,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 4.928,
|
|
"grad_norm": 0.7115513572200957,
|
|
"learning_rate": 9.775109812504922e-06,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26636549830436707,
|
|
"step": 3080,
|
|
"valid_targets_mean": 1407.6,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 4.936,
|
|
"grad_norm": 0.7106921066772746,
|
|
"learning_rate": 9.706611351271088e-06,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24484871327877045,
|
|
"step": 3085,
|
|
"valid_targets_mean": 1168.4,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 4.944,
|
|
"grad_norm": 0.6626622150330714,
|
|
"learning_rate": 9.638276747686169e-06,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24687638878822327,
|
|
"step": 3090,
|
|
"valid_targets_mean": 1351.1,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 4.952,
|
|
"grad_norm": 0.6601586891304977,
|
|
"learning_rate": 9.570107089550091e-06,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642899453639984,
|
|
"step": 3095,
|
|
"valid_targets_mean": 1515.6,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 4.96,
|
|
"grad_norm": 0.6828442058963528,
|
|
"learning_rate": 9.502103462037074e-06,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24730509519577026,
|
|
"step": 3100,
|
|
"valid_targets_mean": 1309.7,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 4.968,
|
|
"grad_norm": 0.6716720263669597,
|
|
"learning_rate": 9.434266947678326e-06,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25155168771743774,
|
|
"step": 3105,
|
|
"valid_targets_mean": 1367.3,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 4.976,
|
|
"grad_norm": 0.6512838674069077,
|
|
"learning_rate": 9.366598626344836e-06,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25202611088752747,
|
|
"step": 3110,
|
|
"valid_targets_mean": 1484.6,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 4.984,
|
|
"grad_norm": 0.6660761765433434,
|
|
"learning_rate": 9.299099575230172e-06,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2511383891105652,
|
|
"step": 3115,
|
|
"valid_targets_mean": 1355.9,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 4.992,
|
|
"grad_norm": 0.6893950416619892,
|
|
"learning_rate": 9.231770868833334e-06,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24409474432468414,
|
|
"step": 3120,
|
|
"valid_targets_mean": 1220.9,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6504252304118086,
|
|
"learning_rate": 9.164613578941652e-06,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24151936173439026,
|
|
"step": 3125,
|
|
"valid_targets_mean": 1373.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 5.008,
|
|
"grad_norm": 0.6715755796568568,
|
|
"learning_rate": 9.097628774613732e-06,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24477669596672058,
|
|
"step": 3130,
|
|
"valid_targets_mean": 1449.2,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 5.016,
|
|
"grad_norm": 0.6991678117559349,
|
|
"learning_rate": 9.030817522162403e-06,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24060535430908203,
|
|
"step": 3135,
|
|
"valid_targets_mean": 1286.8,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 5.024,
|
|
"grad_norm": 0.7146189475370768,
|
|
"learning_rate": 8.964180885137797e-06,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540103793144226,
|
|
"step": 3140,
|
|
"valid_targets_mean": 1322.5,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 5.032,
|
|
"grad_norm": 0.6801067206155416,
|
|
"learning_rate": 8.897719924310375e-06,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22609618306159973,
|
|
"step": 3145,
|
|
"valid_targets_mean": 1309.8,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 5.04,
|
|
"grad_norm": 0.6832994135491867,
|
|
"learning_rate": 8.831435697654068e-06,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2523775100708008,
|
|
"step": 3150,
|
|
"valid_targets_mean": 1401.0,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 5.048,
|
|
"grad_norm": 0.6840188478683501,
|
|
"learning_rate": 8.765329260329413e-06,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2315932810306549,
|
|
"step": 3155,
|
|
"valid_targets_mean": 1377.4,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 5.056,
|
|
"grad_norm": 0.6881234483277803,
|
|
"learning_rate": 8.699401664666774e-06,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24423149228096008,
|
|
"step": 3160,
|
|
"valid_targets_mean": 1347.3,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 5.064,
|
|
"grad_norm": 0.7088161732694084,
|
|
"learning_rate": 8.633653960149579e-06,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25290802121162415,
|
|
"step": 3165,
|
|
"valid_targets_mean": 1326.4,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 5.072,
|
|
"grad_norm": 0.6564421369860639,
|
|
"learning_rate": 8.56808719339762e-06,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2517143487930298,
|
|
"step": 3170,
|
|
"valid_targets_mean": 1402.5,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 5.08,
|
|
"grad_norm": 0.7127921919378486,
|
|
"learning_rate": 8.502702408150391e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22026586532592773,
|
|
"step": 3175,
|
|
"valid_targets_mean": 1138.4,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 5.088,
|
|
"grad_norm": 0.752387806115798,
|
|
"learning_rate": 8.43750064525047e-06,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2398773729801178,
|
|
"step": 3180,
|
|
"valid_targets_mean": 1132.1,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 5.096,
|
|
"grad_norm": 0.726822538951681,
|
|
"learning_rate": 8.372482942626952e-06,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23133322596549988,
|
|
"step": 3185,
|
|
"valid_targets_mean": 1225.6,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 5.104,
|
|
"grad_norm": 0.7021443933077153,
|
|
"learning_rate": 8.307650335278927e-06,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23946408927440643,
|
|
"step": 3190,
|
|
"valid_targets_mean": 1296.1,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 5.112,
|
|
"grad_norm": 0.765913187873542,
|
|
"learning_rate": 8.243003855259015e-06,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2345171868801117,
|
|
"step": 3195,
|
|
"valid_targets_mean": 1147.8,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 5.12,
|
|
"grad_norm": 0.6978526924402231,
|
|
"learning_rate": 8.178544531656897e-06,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22995984554290771,
|
|
"step": 3200,
|
|
"valid_targets_mean": 1188.6,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.128,
|
|
"grad_norm": 0.6330276132528445,
|
|
"learning_rate": 8.11427339058299e-06,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2427782416343689,
|
|
"step": 3205,
|
|
"valid_targets_mean": 1519.7,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 5.136,
|
|
"grad_norm": 0.7285364352686475,
|
|
"learning_rate": 8.050191455152072e-06,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24198296666145325,
|
|
"step": 3210,
|
|
"valid_targets_mean": 1185.3,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 5.144,
|
|
"grad_norm": 0.6787890511613267,
|
|
"learning_rate": 7.986299745467013e-06,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25340622663497925,
|
|
"step": 3215,
|
|
"valid_targets_mean": 1583.4,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 5.152,
|
|
"grad_norm": 0.7817556906844635,
|
|
"learning_rate": 7.922599278602524e-06,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22889339923858643,
|
|
"step": 3220,
|
|
"valid_targets_mean": 1058.9,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 5.16,
|
|
"grad_norm": 0.7355522978789336,
|
|
"learning_rate": 7.859091068588987e-06,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21840153634548187,
|
|
"step": 3225,
|
|
"valid_targets_mean": 1122.7,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 5.168,
|
|
"grad_norm": 0.723050078089713,
|
|
"learning_rate": 7.795776126396284e-06,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22929269075393677,
|
|
"step": 3230,
|
|
"valid_targets_mean": 1167.2,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 5.176,
|
|
"grad_norm": 0.7048836847803331,
|
|
"learning_rate": 7.732655459917726e-06,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24611414968967438,
|
|
"step": 3235,
|
|
"valid_targets_mean": 1259.9,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 5.184,
|
|
"grad_norm": 0.7077217179635145,
|
|
"learning_rate": 7.669730073954005e-06,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23491084575653076,
|
|
"step": 3240,
|
|
"valid_targets_mean": 1320.3,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 5.192,
|
|
"grad_norm": 0.7159076923566614,
|
|
"learning_rate": 7.607000970197194e-06,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23441427946090698,
|
|
"step": 3245,
|
|
"valid_targets_mean": 1295.0,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 5.2,
|
|
"grad_norm": 0.7498248893640597,
|
|
"learning_rate": 7.544469147214797e-06,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23341432213783264,
|
|
"step": 3250,
|
|
"valid_targets_mean": 1174.8,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 5.208,
|
|
"grad_norm": 0.7631634163487188,
|
|
"learning_rate": 7.482135600433868e-06,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25253504514694214,
|
|
"step": 3255,
|
|
"valid_targets_mean": 1232.4,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 5.216,
|
|
"grad_norm": 0.7808590855554689,
|
|
"learning_rate": 7.420001322125156e-06,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23996907472610474,
|
|
"step": 3260,
|
|
"valid_targets_mean": 1328.5,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 5.224,
|
|
"grad_norm": 0.7383609844447404,
|
|
"learning_rate": 7.3580673013872946e-06,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2377435266971588,
|
|
"step": 3265,
|
|
"valid_targets_mean": 1278.5,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 5.232,
|
|
"grad_norm": 0.7695468167943351,
|
|
"learning_rate": 7.2963345241310904e-06,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2231089323759079,
|
|
"step": 3270,
|
|
"valid_targets_mean": 1164.7,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 5.24,
|
|
"grad_norm": 0.7350613982243853,
|
|
"learning_rate": 7.234803973063797e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2598554491996765,
|
|
"step": 3275,
|
|
"valid_targets_mean": 1666.9,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 5.248,
|
|
"grad_norm": 0.7340126767106325,
|
|
"learning_rate": 7.173476627673492e-06,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23413725197315216,
|
|
"step": 3280,
|
|
"valid_targets_mean": 1324.0,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 5.256,
|
|
"grad_norm": 0.7167404681050423,
|
|
"learning_rate": 7.112353464213477e-06,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24914006888866425,
|
|
"step": 3285,
|
|
"valid_targets_mean": 1364.6,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 5.264,
|
|
"grad_norm": 0.7236406600949122,
|
|
"learning_rate": 7.051435455686735e-06,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2116950899362564,
|
|
"step": 3290,
|
|
"valid_targets_mean": 1016.9,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 5.272,
|
|
"grad_norm": 0.675508978614003,
|
|
"learning_rate": 6.990723571830438e-06,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23331603407859802,
|
|
"step": 3295,
|
|
"valid_targets_mean": 1641.6,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 5.28,
|
|
"grad_norm": 0.6818405594508099,
|
|
"learning_rate": 6.93021877910052e-06,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2246658205986023,
|
|
"step": 3300,
|
|
"valid_targets_mean": 1358.3,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 5.288,
|
|
"grad_norm": 0.7739785226430342,
|
|
"learning_rate": 6.8699220406562985e-06,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2512899339199066,
|
|
"step": 3305,
|
|
"valid_targets_mean": 1205.6,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 5.296,
|
|
"grad_norm": 0.7061427490486496,
|
|
"learning_rate": 6.809834316345117e-06,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2522796094417572,
|
|
"step": 3310,
|
|
"valid_targets_mean": 1335.8,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 5.304,
|
|
"grad_norm": 0.7443417703544494,
|
|
"learning_rate": 6.749956562687083e-06,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25568628311157227,
|
|
"step": 3315,
|
|
"valid_targets_mean": 1547.6,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 5.312,
|
|
"grad_norm": 0.7122833623518084,
|
|
"learning_rate": 6.690289732859841e-06,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22702521085739136,
|
|
"step": 3320,
|
|
"valid_targets_mean": 1337.6,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 5.32,
|
|
"grad_norm": 0.7501934678329574,
|
|
"learning_rate": 6.630834776683403e-06,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24431604146957397,
|
|
"step": 3325,
|
|
"valid_targets_mean": 1359.2,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 5.328,
|
|
"grad_norm": 0.7192714913480165,
|
|
"learning_rate": 6.571592640605e-06,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620084881782532,
|
|
"step": 3330,
|
|
"valid_targets_mean": 1464.8,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 5.336,
|
|
"grad_norm": 0.6813583816647133,
|
|
"learning_rate": 6.512564267684061e-06,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2528916597366333,
|
|
"step": 3335,
|
|
"valid_targets_mean": 1720.8,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 5.344,
|
|
"grad_norm": 0.6861703280210316,
|
|
"learning_rate": 6.453750597577167e-06,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2521404027938843,
|
|
"step": 3340,
|
|
"valid_targets_mean": 1567.0,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 5.352,
|
|
"grad_norm": 0.7284595333289756,
|
|
"learning_rate": 6.395152566523106e-06,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22243276238441467,
|
|
"step": 3345,
|
|
"valid_targets_mean": 1145.8,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 5.36,
|
|
"grad_norm": 0.7473848511321307,
|
|
"learning_rate": 6.336771107327966e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2358008623123169,
|
|
"step": 3350,
|
|
"valid_targets_mean": 1199.9,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 5.368,
|
|
"grad_norm": 0.6785013247565734,
|
|
"learning_rate": 6.278607149350289e-06,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23492605984210968,
|
|
"step": 3355,
|
|
"valid_targets_mean": 1438.1,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 5.376,
|
|
"grad_norm": 0.7799010961842815,
|
|
"learning_rate": 6.220661618486268e-06,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24882496893405914,
|
|
"step": 3360,
|
|
"valid_targets_mean": 1231.2,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 5.384,
|
|
"grad_norm": 0.7556202652749414,
|
|
"learning_rate": 6.162935437155024e-06,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23796001076698303,
|
|
"step": 3365,
|
|
"valid_targets_mean": 1241.6,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 5.392,
|
|
"grad_norm": 0.7221926718720368,
|
|
"learning_rate": 6.105429524283901e-06,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2359953671693802,
|
|
"step": 3370,
|
|
"valid_targets_mean": 1207.3,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.4,
|
|
"grad_norm": 0.7209558815646294,
|
|
"learning_rate": 6.04814479529386e-06,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24364429712295532,
|
|
"step": 3375,
|
|
"valid_targets_mean": 1257.2,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 5.408,
|
|
"grad_norm": 0.688886450794107,
|
|
"learning_rate": 5.991082162084889e-06,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24487216770648956,
|
|
"step": 3380,
|
|
"valid_targets_mean": 1446.4,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 5.416,
|
|
"grad_norm": 0.7452337559532781,
|
|
"learning_rate": 5.934242533021499e-06,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581064701080322,
|
|
"step": 3385,
|
|
"valid_targets_mean": 1261.1,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 5.424,
|
|
"grad_norm": 0.6869684089831242,
|
|
"learning_rate": 5.877626812918258e-06,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23654977977275848,
|
|
"step": 3390,
|
|
"valid_targets_mean": 1390.6,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 5.432,
|
|
"grad_norm": 0.6682580418231815,
|
|
"learning_rate": 5.821235903025378e-06,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21641075611114502,
|
|
"step": 3395,
|
|
"valid_targets_mean": 1276.0,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 5.44,
|
|
"grad_norm": 0.6879902966132936,
|
|
"learning_rate": 5.765070701014391e-06,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23468250036239624,
|
|
"step": 3400,
|
|
"valid_targets_mean": 1366.7,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 5.448,
|
|
"grad_norm": 0.7369538633830218,
|
|
"learning_rate": 5.709132100963841e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24121759831905365,
|
|
"step": 3405,
|
|
"valid_targets_mean": 1283.8,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 5.456,
|
|
"grad_norm": 0.6591303669109096,
|
|
"learning_rate": 5.653420993345062e-06,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24720197916030884,
|
|
"step": 3410,
|
|
"valid_targets_mean": 1498.6,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 5.464,
|
|
"grad_norm": 0.7086116832097555,
|
|
"learning_rate": 5.597938265007994e-06,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23889540135860443,
|
|
"step": 3415,
|
|
"valid_targets_mean": 1316.0,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 5.4719999999999995,
|
|
"grad_norm": 0.7032555585214957,
|
|
"learning_rate": 5.542684799167069e-06,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2220039665699005,
|
|
"step": 3420,
|
|
"valid_targets_mean": 1227.2,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 5.48,
|
|
"grad_norm": 0.7001325507144122,
|
|
"learning_rate": 5.487661475387152e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22191809117794037,
|
|
"step": 3425,
|
|
"valid_targets_mean": 1167.6,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 5.4879999999999995,
|
|
"grad_norm": 0.7475710995501054,
|
|
"learning_rate": 5.432869169569541e-06,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24566727876663208,
|
|
"step": 3430,
|
|
"valid_targets_mean": 1308.1,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 5.496,
|
|
"grad_norm": 0.7255207739941208,
|
|
"learning_rate": 5.378308753938024e-06,
|
|
"loss": 0.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23658011853694916,
|
|
"step": 3435,
|
|
"valid_targets_mean": 1190.3,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 5.504,
|
|
"grad_norm": 0.6843468963149033,
|
|
"learning_rate": 5.323981097024986e-06,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24397364258766174,
|
|
"step": 3440,
|
|
"valid_targets_mean": 1378.1,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 5.5120000000000005,
|
|
"grad_norm": 0.7052517347139078,
|
|
"learning_rate": 5.269887063657595e-06,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23196235299110413,
|
|
"step": 3445,
|
|
"valid_targets_mean": 1253.1,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 5.52,
|
|
"grad_norm": 0.7084283425986678,
|
|
"learning_rate": 5.216027514944027e-06,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24132892489433289,
|
|
"step": 3450,
|
|
"valid_targets_mean": 1298.9,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 5.5280000000000005,
|
|
"grad_norm": 0.7264202198489862,
|
|
"learning_rate": 5.162403308259767e-06,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2310267984867096,
|
|
"step": 3455,
|
|
"valid_targets_mean": 1158.1,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 5.536,
|
|
"grad_norm": 0.7411039954825046,
|
|
"learning_rate": 5.109015297233935e-06,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2423068732023239,
|
|
"step": 3460,
|
|
"valid_targets_mean": 1244.8,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 5.5440000000000005,
|
|
"grad_norm": 0.7598257803309283,
|
|
"learning_rate": 5.055864331735736e-06,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21444880962371826,
|
|
"step": 3465,
|
|
"valid_targets_mean": 964.5,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 5.552,
|
|
"grad_norm": 0.7607125529990287,
|
|
"learning_rate": 5.002951257860909e-06,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23588283360004425,
|
|
"step": 3470,
|
|
"valid_targets_mean": 1128.8,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 5.5600000000000005,
|
|
"grad_norm": 0.7753734839332812,
|
|
"learning_rate": 4.950276917918256e-06,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608233094215393,
|
|
"step": 3475,
|
|
"valid_targets_mean": 1181.1,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 5.568,
|
|
"grad_norm": 0.7451354233713086,
|
|
"learning_rate": 4.8978421504162385e-06,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24013280868530273,
|
|
"step": 3480,
|
|
"valid_targets_mean": 1181.5,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 5.576,
|
|
"grad_norm": 0.6807142828844108,
|
|
"learning_rate": 4.845647790049634e-06,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23125770688056946,
|
|
"step": 3485,
|
|
"valid_targets_mean": 1366.8,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 5.584,
|
|
"grad_norm": 0.6726312721247383,
|
|
"learning_rate": 4.793694667686244e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2524241507053375,
|
|
"step": 3490,
|
|
"valid_targets_mean": 1603.4,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 5.592,
|
|
"grad_norm": 0.7800708692797956,
|
|
"learning_rate": 4.741983610353664e-06,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24468514323234558,
|
|
"step": 3495,
|
|
"valid_targets_mean": 1075.0,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 5.6,
|
|
"grad_norm": 0.6873060278841281,
|
|
"learning_rate": 4.690515441226122e-06,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22999659180641174,
|
|
"step": 3500,
|
|
"valid_targets_mean": 1419.1,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 5.608,
|
|
"grad_norm": 0.7471440314105869,
|
|
"learning_rate": 4.639290979611379e-06,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.234755739569664,
|
|
"step": 3505,
|
|
"valid_targets_mean": 1151.3,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 5.616,
|
|
"grad_norm": 0.7123928887469533,
|
|
"learning_rate": 4.588311040937683e-06,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2435423582792282,
|
|
"step": 3510,
|
|
"valid_targets_mean": 1452.6,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 5.624,
|
|
"grad_norm": 0.6880915033442926,
|
|
"learning_rate": 4.537576436740783e-06,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22903689742088318,
|
|
"step": 3515,
|
|
"valid_targets_mean": 1387.2,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 5.632,
|
|
"grad_norm": 0.6826366940823673,
|
|
"learning_rate": 4.487087974651016e-06,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24762219190597534,
|
|
"step": 3520,
|
|
"valid_targets_mean": 1494.5,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 5.64,
|
|
"grad_norm": 0.6664376406983509,
|
|
"learning_rate": 4.436846458380455e-06,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22574418783187866,
|
|
"step": 3525,
|
|
"valid_targets_mean": 1323.1,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 5.648,
|
|
"grad_norm": 0.7287861193273228,
|
|
"learning_rate": 4.386852687710104e-06,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24595105648040771,
|
|
"step": 3530,
|
|
"valid_targets_mean": 1222.8,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 5.656,
|
|
"grad_norm": 0.722074637967132,
|
|
"learning_rate": 4.337107458477177e-06,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25595757365226746,
|
|
"step": 3535,
|
|
"valid_targets_mean": 1291.8,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 5.664,
|
|
"grad_norm": 0.7318662222148391,
|
|
"learning_rate": 4.287611562562422e-06,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2375495880842209,
|
|
"step": 3540,
|
|
"valid_targets_mean": 1347.1,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 5.672,
|
|
"grad_norm": 0.6801904799364237,
|
|
"learning_rate": 4.238365787877516e-06,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273668110370636,
|
|
"step": 3545,
|
|
"valid_targets_mean": 1686.3,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 5.68,
|
|
"grad_norm": 0.7344888576773735,
|
|
"learning_rate": 4.189370918352531e-06,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22617895901203156,
|
|
"step": 3550,
|
|
"valid_targets_mean": 1147.2,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 5.688,
|
|
"grad_norm": 0.7065560935185432,
|
|
"learning_rate": 4.140627733923439e-06,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2397240698337555,
|
|
"step": 3555,
|
|
"valid_targets_mean": 1302.9,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 5.696,
|
|
"grad_norm": 0.715385171191787,
|
|
"learning_rate": 4.092137010519712e-06,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2350618988275528,
|
|
"step": 3560,
|
|
"valid_targets_mean": 1265.7,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 5.704,
|
|
"grad_norm": 0.7641166308135144,
|
|
"learning_rate": 4.043899520051964e-06,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23024366796016693,
|
|
"step": 3565,
|
|
"valid_targets_mean": 1100.1,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 5.712,
|
|
"grad_norm": 0.7381755807492725,
|
|
"learning_rate": 3.995916030399658e-06,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23974119126796722,
|
|
"step": 3570,
|
|
"valid_targets_mean": 1202.8,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 5.72,
|
|
"grad_norm": 0.7616971991277222,
|
|
"learning_rate": 3.948187305398892e-06,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24974742531776428,
|
|
"step": 3575,
|
|
"valid_targets_mean": 1306.2,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 5.728,
|
|
"grad_norm": 0.8140447073294639,
|
|
"learning_rate": 3.90071410483023e-06,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767132818698883,
|
|
"step": 3580,
|
|
"valid_targets_mean": 1289.1,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 5.736,
|
|
"grad_norm": 0.7420994877927274,
|
|
"learning_rate": 3.853497184406623e-06,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21718215942382812,
|
|
"step": 3585,
|
|
"valid_targets_mean": 1012.9,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 5.744,
|
|
"grad_norm": 0.7077131820116656,
|
|
"learning_rate": 3.80653729576135e-06,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22454826533794403,
|
|
"step": 3590,
|
|
"valid_targets_mean": 1246.8,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 5.752,
|
|
"grad_norm": 0.6869228644646804,
|
|
"learning_rate": 3.7598351864360872e-06,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22253365814685822,
|
|
"step": 3595,
|
|
"valid_targets_mean": 1226.1,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 5.76,
|
|
"grad_norm": 0.7614713610632257,
|
|
"learning_rate": 3.713391599868985e-06,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22991634905338287,
|
|
"step": 3600,
|
|
"valid_targets_mean": 1070.6,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 5.768,
|
|
"grad_norm": 0.7310747612390063,
|
|
"learning_rate": 3.6672072753828424e-06,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23760882019996643,
|
|
"step": 3605,
|
|
"valid_targets_mean": 1240.8,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 5.776,
|
|
"grad_norm": 0.716031286595589,
|
|
"learning_rate": 3.6212829481733368e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25068017840385437,
|
|
"step": 3610,
|
|
"valid_targets_mean": 1364.7,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 5.784,
|
|
"grad_norm": 0.7208722825547398,
|
|
"learning_rate": 3.575619349297317e-06,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2525652050971985,
|
|
"step": 3615,
|
|
"valid_targets_mean": 1410.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 5.792,
|
|
"grad_norm": 0.7359185170616163,
|
|
"learning_rate": 3.5302172056611682e-06,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22956907749176025,
|
|
"step": 3620,
|
|
"valid_targets_mean": 1167.9,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 5.8,
|
|
"grad_norm": 0.6914912487949498,
|
|
"learning_rate": 3.485077240009247e-06,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24244782328605652,
|
|
"step": 3625,
|
|
"valid_targets_mean": 1371.4,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 5.808,
|
|
"grad_norm": 0.6990277762072865,
|
|
"learning_rate": 3.4402001709123643e-06,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24179582297801971,
|
|
"step": 3630,
|
|
"valid_targets_mean": 1392.8,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 5.816,
|
|
"grad_norm": 0.7379110055934747,
|
|
"learning_rate": 3.3955867127563515e-06,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23867405951023102,
|
|
"step": 3635,
|
|
"valid_targets_mean": 1246.1,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 5.824,
|
|
"grad_norm": 0.7010804114146375,
|
|
"learning_rate": 3.351237575730695e-06,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23783151805400848,
|
|
"step": 3640,
|
|
"valid_targets_mean": 1324.3,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 5.832,
|
|
"grad_norm": 0.7226756964050675,
|
|
"learning_rate": 3.307153465817219e-06,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596341371536255,
|
|
"step": 3645,
|
|
"valid_targets_mean": 1432.4,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 5.84,
|
|
"grad_norm": 0.7033688034864882,
|
|
"learning_rate": 3.263335084778856e-06,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23930712044239044,
|
|
"step": 3650,
|
|
"valid_targets_mean": 1272.7,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 5.848,
|
|
"grad_norm": 0.7805468068008702,
|
|
"learning_rate": 3.2197831301484816e-06,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24556457996368408,
|
|
"step": 3655,
|
|
"valid_targets_mean": 1156.8,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 5.856,
|
|
"grad_norm": 0.7490449802420018,
|
|
"learning_rate": 3.1764982952177805e-06,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23505446314811707,
|
|
"step": 3660,
|
|
"valid_targets_mean": 1181.4,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 5.864,
|
|
"grad_norm": 0.7182084556577327,
|
|
"learning_rate": 3.1334812690262507e-06,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2507456839084625,
|
|
"step": 3665,
|
|
"valid_targets_mean": 1446.4,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 5.872,
|
|
"grad_norm": 0.6965411623106647,
|
|
"learning_rate": 3.0907327363502084e-06,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23397178947925568,
|
|
"step": 3670,
|
|
"valid_targets_mean": 1288.6,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 5.88,
|
|
"grad_norm": 0.7226691004945118,
|
|
"learning_rate": 3.0482533776918987e-06,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2319324016571045,
|
|
"step": 3675,
|
|
"valid_targets_mean": 1255.2,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 5.888,
|
|
"grad_norm": 0.6875665650487043,
|
|
"learning_rate": 3.0060438692686533e-06,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2293645143508911,
|
|
"step": 3680,
|
|
"valid_targets_mean": 1300.9,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 5.896,
|
|
"grad_norm": 0.7247478982980132,
|
|
"learning_rate": 2.964104883002139e-06,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23502866923809052,
|
|
"step": 3685,
|
|
"valid_targets_mean": 1234.5,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 5.904,
|
|
"grad_norm": 0.7913836705206649,
|
|
"learning_rate": 2.9224370865076457e-06,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24870547652244568,
|
|
"step": 3690,
|
|
"valid_targets_mean": 1201.2,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 5.912,
|
|
"grad_norm": 0.7208266256753435,
|
|
"learning_rate": 2.8810411430834716e-06,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23251444101333618,
|
|
"step": 3695,
|
|
"valid_targets_mean": 1213.2,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 5.92,
|
|
"grad_norm": 0.6705564137253074,
|
|
"learning_rate": 2.8399177117003595e-06,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23906902968883514,
|
|
"step": 3700,
|
|
"valid_targets_mean": 1488.6,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 5.928,
|
|
"grad_norm": 0.707889409133832,
|
|
"learning_rate": 2.7990674469910085e-06,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2529758810997009,
|
|
"step": 3705,
|
|
"valid_targets_mean": 1464.8,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 5.936,
|
|
"grad_norm": 1.0807301063491397,
|
|
"learning_rate": 2.7584909992396515e-06,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23173265159130096,
|
|
"step": 3710,
|
|
"valid_targets_mean": 1296.1,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 5.944,
|
|
"grad_norm": 0.7133458492189336,
|
|
"learning_rate": 2.7181890143716995e-06,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23026418685913086,
|
|
"step": 3715,
|
|
"valid_targets_mean": 1283.8,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 5.952,
|
|
"grad_norm": 0.7052703531031919,
|
|
"learning_rate": 2.6781621339434717e-06,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23370087146759033,
|
|
"step": 3720,
|
|
"valid_targets_mean": 1250.6,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 5.96,
|
|
"grad_norm": 0.718463099248823,
|
|
"learning_rate": 2.638410995131966e-06,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2589815855026245,
|
|
"step": 3725,
|
|
"valid_targets_mean": 1484.1,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 5.968,
|
|
"grad_norm": 0.7475419288142809,
|
|
"learning_rate": 2.5989362307247313e-06,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2511245012283325,
|
|
"step": 3730,
|
|
"valid_targets_mean": 1201.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 5.976,
|
|
"grad_norm": 0.6481537691310473,
|
|
"learning_rate": 2.5597384691097847e-06,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23358039557933807,
|
|
"step": 3735,
|
|
"valid_targets_mean": 1656.9,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 5.984,
|
|
"grad_norm": 0.6823004666996731,
|
|
"learning_rate": 2.520818334265611e-06,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24928034842014313,
|
|
"step": 3740,
|
|
"valid_targets_mean": 1546.1,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 5.992,
|
|
"grad_norm": 0.7152261560941364,
|
|
"learning_rate": 2.482176445751232e-06,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22823381423950195,
|
|
"step": 3745,
|
|
"valid_targets_mean": 1219.4,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.703183589643631,
|
|
"learning_rate": 2.4438134186963415e-06,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.252239465713501,
|
|
"step": 3750,
|
|
"valid_targets_mean": 1522.9,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 6.008,
|
|
"grad_norm": 0.7514371815069106,
|
|
"learning_rate": 2.4057298637915105e-06,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24327628314495087,
|
|
"step": 3755,
|
|
"valid_targets_mean": 1256.9,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 6.016,
|
|
"grad_norm": 0.7466442149349161,
|
|
"learning_rate": 2.3679263872784717e-06,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2222370207309723,
|
|
"step": 3760,
|
|
"valid_targets_mean": 1156.6,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 6.024,
|
|
"grad_norm": 0.7487706945383559,
|
|
"learning_rate": 2.330403590940471e-06,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26308536529541016,
|
|
"step": 3765,
|
|
"valid_targets_mean": 1470.6,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 6.032,
|
|
"grad_norm": 0.6001362653197189,
|
|
"learning_rate": 2.2931620720926717e-06,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27862074971199036,
|
|
"step": 3770,
|
|
"valid_targets_mean": 2421.1,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 6.04,
|
|
"grad_norm": 0.7267678524627877,
|
|
"learning_rate": 2.256202423572669e-06,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23411108553409576,
|
|
"step": 3775,
|
|
"valid_targets_mean": 1255.4,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 6.048,
|
|
"grad_norm": 0.7060061040999087,
|
|
"learning_rate": 2.219525233731035e-06,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23224863409996033,
|
|
"step": 3780,
|
|
"valid_targets_mean": 1360.3,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 6.056,
|
|
"grad_norm": 0.7234442260751625,
|
|
"learning_rate": 2.183131086421961e-06,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23001417517662048,
|
|
"step": 3785,
|
|
"valid_targets_mean": 1231.4,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 6.064,
|
|
"grad_norm": 0.7122449136990033,
|
|
"learning_rate": 2.1470205609939533e-06,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23207834362983704,
|
|
"step": 3790,
|
|
"valid_targets_mean": 1376.8,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 6.072,
|
|
"grad_norm": 0.7416719819598611,
|
|
"learning_rate": 2.1111942322806335e-06,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2228378802537918,
|
|
"step": 3795,
|
|
"valid_targets_mean": 1091.0,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 6.08,
|
|
"grad_norm": 0.7481057858755126,
|
|
"learning_rate": 2.0756526705915635e-06,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23551088571548462,
|
|
"step": 3800,
|
|
"valid_targets_mean": 1250.9,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 6.088,
|
|
"grad_norm": 0.7727618330477835,
|
|
"learning_rate": 2.0403964417031764e-06,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21937456727027893,
|
|
"step": 3805,
|
|
"valid_targets_mean": 1157.9,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 6.096,
|
|
"grad_norm": 0.7886664888122171,
|
|
"learning_rate": 2.0054261068497773e-06,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22627376019954681,
|
|
"step": 3810,
|
|
"valid_targets_mean": 1097.8,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 6.104,
|
|
"grad_norm": 0.7163256292055006,
|
|
"learning_rate": 1.9707422227145922e-06,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23331984877586365,
|
|
"step": 3815,
|
|
"valid_targets_mean": 1394.8,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 6.112,
|
|
"grad_norm": 0.722190664849765,
|
|
"learning_rate": 1.936345341420924e-06,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24693769216537476,
|
|
"step": 3820,
|
|
"valid_targets_mean": 1379.4,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 6.12,
|
|
"grad_norm": 0.746933807556648,
|
|
"learning_rate": 1.9022360105233507e-06,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23548638820648193,
|
|
"step": 3825,
|
|
"valid_targets_mean": 1174.9,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 6.128,
|
|
"grad_norm": 0.7428843381866582,
|
|
"learning_rate": 1.8684147729990188e-06,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20896920561790466,
|
|
"step": 3830,
|
|
"valid_targets_mean": 1094.4,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 6.136,
|
|
"grad_norm": 0.7077550675323978,
|
|
"learning_rate": 1.8348821672389893e-06,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2356010228395462,
|
|
"step": 3835,
|
|
"valid_targets_mean": 1296.6,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 6.144,
|
|
"grad_norm": 0.7758329904004831,
|
|
"learning_rate": 1.8016387270396784e-06,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2147502899169922,
|
|
"step": 3840,
|
|
"valid_targets_mean": 1034.6,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 6.152,
|
|
"grad_norm": 0.7769488915635737,
|
|
"learning_rate": 1.7686849815943486e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22903814911842346,
|
|
"step": 3845,
|
|
"valid_targets_mean": 1051.7,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 6.16,
|
|
"grad_norm": 0.7558317585719244,
|
|
"learning_rate": 1.7360214554847e-06,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23141948878765106,
|
|
"step": 3850,
|
|
"valid_targets_mean": 1123.8,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 6.168,
|
|
"grad_norm": 0.6761645840880993,
|
|
"learning_rate": 1.703648668672495e-06,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23755371570587158,
|
|
"step": 3855,
|
|
"valid_targets_mean": 1541.9,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 6.176,
|
|
"grad_norm": 0.7473885457602161,
|
|
"learning_rate": 1.6715671364913077e-06,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22996127605438232,
|
|
"step": 3860,
|
|
"valid_targets_mean": 1246.6,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 6.184,
|
|
"grad_norm": 0.7547202576084149,
|
|
"learning_rate": 1.6397773696383091e-06,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2060251086950302,
|
|
"step": 3865,
|
|
"valid_targets_mean": 1018.9,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 6.192,
|
|
"grad_norm": 0.7497300212720819,
|
|
"learning_rate": 1.6082798741661321e-06,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2315627634525299,
|
|
"step": 3870,
|
|
"valid_targets_mean": 1206.9,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 6.2,
|
|
"grad_norm": 0.7487908600440004,
|
|
"learning_rate": 1.5770751514748273e-06,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24393165111541748,
|
|
"step": 3875,
|
|
"valid_targets_mean": 1287.3,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 6.208,
|
|
"grad_norm": 0.7355621815995907,
|
|
"learning_rate": 1.5461636983038686e-06,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2292770892381668,
|
|
"step": 3880,
|
|
"valid_targets_mean": 1240.3,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 6.216,
|
|
"grad_norm": 0.70785066033013,
|
|
"learning_rate": 1.5155460067242578e-06,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22344128787517548,
|
|
"step": 3885,
|
|
"valid_targets_mean": 1394.6,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 6.224,
|
|
"grad_norm": 0.7179235269200792,
|
|
"learning_rate": 1.4852225641306816e-06,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23789604008197784,
|
|
"step": 3890,
|
|
"valid_targets_mean": 1348.8,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 6.232,
|
|
"grad_norm": 0.7021312215222486,
|
|
"learning_rate": 1.4551938532337607e-06,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23826859891414642,
|
|
"step": 3895,
|
|
"valid_targets_mean": 1524.0,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 6.24,
|
|
"grad_norm": 0.7625426094452529,
|
|
"learning_rate": 1.4254603520523614e-06,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22259096801280975,
|
|
"step": 3900,
|
|
"valid_targets_mean": 1117.6,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 6.248,
|
|
"grad_norm": 0.7563142443191094,
|
|
"learning_rate": 1.3960225339059875e-06,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22776547074317932,
|
|
"step": 3905,
|
|
"valid_targets_mean": 1274.7,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 6.256,
|
|
"grad_norm": 0.6808365928257146,
|
|
"learning_rate": 1.3668808674072409e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2489374876022339,
|
|
"step": 3910,
|
|
"valid_targets_mean": 1654.2,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 6.264,
|
|
"grad_norm": 0.7136954310424075,
|
|
"learning_rate": 1.338035816454375e-06,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551230788230896,
|
|
"step": 3915,
|
|
"valid_targets_mean": 1494.9,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 6.272,
|
|
"grad_norm": 0.7508134678321918,
|
|
"learning_rate": 1.3094878402238887e-06,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2175656259059906,
|
|
"step": 3920,
|
|
"valid_targets_mean": 1212.2,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 6.28,
|
|
"grad_norm": 0.7524639229004331,
|
|
"learning_rate": 1.2812373931632371e-06,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22484488785266876,
|
|
"step": 3925,
|
|
"valid_targets_mean": 1240.5,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 6.288,
|
|
"grad_norm": 0.7415968370180998,
|
|
"learning_rate": 1.2532849249835932e-06,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24198241531848907,
|
|
"step": 3930,
|
|
"valid_targets_mean": 1241.2,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 6.296,
|
|
"grad_norm": 0.7610486481587991,
|
|
"learning_rate": 1.2256308806526774e-06,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23806941509246826,
|
|
"step": 3935,
|
|
"valid_targets_mean": 1247.7,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 6.304,
|
|
"grad_norm": 0.7638342446927218,
|
|
"learning_rate": 1.1982757003876855e-06,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25441616773605347,
|
|
"step": 3940,
|
|
"valid_targets_mean": 1521.8,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 6.312,
|
|
"grad_norm": 0.7094731287547585,
|
|
"learning_rate": 1.1712198196482793e-06,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22807534039020538,
|
|
"step": 3945,
|
|
"valid_targets_mean": 1451.4,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 6.32,
|
|
"grad_norm": 0.7380461188534542,
|
|
"learning_rate": 1.1444636691296518e-06,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24391891062259674,
|
|
"step": 3950,
|
|
"valid_targets_mean": 1378.5,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 6.328,
|
|
"grad_norm": 0.7696469631441553,
|
|
"learning_rate": 1.11800767475567e-06,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22943317890167236,
|
|
"step": 3955,
|
|
"valid_targets_mean": 1178.1,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 6.336,
|
|
"grad_norm": 0.7300215207382803,
|
|
"learning_rate": 1.0918522576721014e-06,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24211618304252625,
|
|
"step": 3960,
|
|
"valid_targets_mean": 1368.9,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 6.344,
|
|
"grad_norm": 0.7509665094160838,
|
|
"learning_rate": 1.0659978342399003e-06,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22505459189414978,
|
|
"step": 3965,
|
|
"valid_targets_mean": 1148.9,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 6.352,
|
|
"grad_norm": 0.7370056336955896,
|
|
"learning_rate": 1.0404448160285897e-06,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20663653314113617,
|
|
"step": 3970,
|
|
"valid_targets_mean": 1003.4,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 6.36,
|
|
"grad_norm": 0.7260217503467596,
|
|
"learning_rate": 1.0151936098097015e-06,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23803086578845978,
|
|
"step": 3975,
|
|
"valid_targets_mean": 1328.8,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 6.368,
|
|
"grad_norm": 0.7264770836889591,
|
|
"learning_rate": 9.902446175503089e-07,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.244807630777359,
|
|
"step": 3980,
|
|
"valid_targets_mean": 1408.2,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 6.376,
|
|
"grad_norm": 0.6928851505764125,
|
|
"learning_rate": 9.655982364066197e-07,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23448073863983154,
|
|
"step": 3985,
|
|
"valid_targets_mean": 1468.0,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 6.384,
|
|
"grad_norm": 0.7629133642696474,
|
|
"learning_rate": 9.412548587176595e-07,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22949525713920593,
|
|
"step": 3990,
|
|
"valid_targets_mean": 1161.1,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 6.392,
|
|
"grad_norm": 0.7195137349588978,
|
|
"learning_rate": 9.172148719990237e-07,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2162041962146759,
|
|
"step": 3995,
|
|
"valid_targets_mean": 1223.1,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 6.4,
|
|
"grad_norm": 0.7240757734483103,
|
|
"learning_rate": 8.934786589367106e-07,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23259784281253815,
|
|
"step": 4000,
|
|
"valid_targets_mean": 1322.4,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 6.408,
|
|
"grad_norm": 0.7209269433252022,
|
|
"learning_rate": 8.700465973810246e-07,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22464004158973694,
|
|
"step": 4005,
|
|
"valid_targets_mean": 1183.2,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 6.416,
|
|
"grad_norm": 0.704612430630419,
|
|
"learning_rate": 8.469190603405719e-07,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24196138978004456,
|
|
"step": 4010,
|
|
"valid_targets_mean": 1577.1,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 6.424,
|
|
"grad_norm": 0.7058994515262664,
|
|
"learning_rate": 8.240964159763121e-07,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23296499252319336,
|
|
"step": 4015,
|
|
"valid_targets_mean": 1365.7,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 6.432,
|
|
"grad_norm": 0.7399857814603646,
|
|
"learning_rate": 8.015790275957003e-07,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2370910942554474,
|
|
"step": 4020,
|
|
"valid_targets_mean": 1295.4,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 6.44,
|
|
"grad_norm": 0.7613685089306402,
|
|
"learning_rate": 7.793672536469077e-07,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23857440054416656,
|
|
"step": 4025,
|
|
"valid_targets_mean": 1273.8,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 6.448,
|
|
"grad_norm": 0.6858720529244813,
|
|
"learning_rate": 7.574614477131081e-07,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23780368268489838,
|
|
"step": 4030,
|
|
"valid_targets_mean": 1513.5,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 6.456,
|
|
"grad_norm": 0.7094590379892729,
|
|
"learning_rate": 7.358619585068583e-07,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23009896278381348,
|
|
"step": 4035,
|
|
"valid_targets_mean": 1336.9,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 6.464,
|
|
"grad_norm": 0.711728620584929,
|
|
"learning_rate": 7.145691298645419e-07,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24415968358516693,
|
|
"step": 4040,
|
|
"valid_targets_mean": 1397.2,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 6.4719999999999995,
|
|
"grad_norm": 0.7350427720007542,
|
|
"learning_rate": 6.935833007408965e-07,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2205914407968521,
|
|
"step": 4045,
|
|
"valid_targets_mean": 1257.2,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 6.48,
|
|
"grad_norm": 0.7657419803659051,
|
|
"learning_rate": 6.729048052036136e-07,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22780416905879974,
|
|
"step": 4050,
|
|
"valid_targets_mean": 1080.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 6.4879999999999995,
|
|
"grad_norm": 0.7709037443236045,
|
|
"learning_rate": 6.52533972428031e-07,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22721043229103088,
|
|
"step": 4055,
|
|
"valid_targets_mean": 1146.0,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 6.496,
|
|
"grad_norm": 0.7748465021865996,
|
|
"learning_rate": 6.324711266918826e-07,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2236689031124115,
|
|
"step": 4060,
|
|
"valid_targets_mean": 1121.7,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 6.504,
|
|
"grad_norm": 0.7484444940840957,
|
|
"learning_rate": 6.127165873701457e-07,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22915686666965485,
|
|
"step": 4065,
|
|
"valid_targets_mean": 1390.2,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 6.5120000000000005,
|
|
"grad_norm": 0.7926496846146547,
|
|
"learning_rate": 5.932706689299461e-07,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22777403891086578,
|
|
"step": 4070,
|
|
"valid_targets_mean": 1112.4,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 6.52,
|
|
"grad_norm": 0.7291878852431773,
|
|
"learning_rate": 5.741336809255615e-07,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22574101388454437,
|
|
"step": 4075,
|
|
"valid_targets_mean": 1212.2,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 6.5280000000000005,
|
|
"grad_norm": 0.7157393474894417,
|
|
"learning_rate": 5.553059279934902e-07,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2373976856470108,
|
|
"step": 4080,
|
|
"valid_targets_mean": 1487.4,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 6.536,
|
|
"grad_norm": 0.7385278230340897,
|
|
"learning_rate": 5.36787709847597e-07,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23163509368896484,
|
|
"step": 4085,
|
|
"valid_targets_mean": 1182.2,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 6.5440000000000005,
|
|
"grad_norm": 0.7186872910862663,
|
|
"learning_rate": 5.185793212743529e-07,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24166414141654968,
|
|
"step": 4090,
|
|
"valid_targets_mean": 1492.9,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 6.552,
|
|
"grad_norm": 0.7487199190431058,
|
|
"learning_rate": 5.006810521281335e-07,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24209828674793243,
|
|
"step": 4095,
|
|
"valid_targets_mean": 1260.0,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 6.5600000000000005,
|
|
"grad_norm": 0.7395464114665022,
|
|
"learning_rate": 4.830931873266065e-07,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23089516162872314,
|
|
"step": 4100,
|
|
"valid_targets_mean": 1285.1,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 6.568,
|
|
"grad_norm": 0.7111838183825617,
|
|
"learning_rate": 4.658160068462025e-07,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24008457362651825,
|
|
"step": 4105,
|
|
"valid_targets_mean": 1339.1,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 6.576,
|
|
"grad_norm": 0.7209223968861672,
|
|
"learning_rate": 4.488497857176466e-07,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2307056188583374,
|
|
"step": 4110,
|
|
"valid_targets_mean": 1378.7,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 6.584,
|
|
"grad_norm": 0.7299579899226537,
|
|
"learning_rate": 4.321947940215898e-07,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26598238945007324,
|
|
"step": 4115,
|
|
"valid_targets_mean": 1472.9,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 6.592,
|
|
"grad_norm": 0.7603548727744546,
|
|
"learning_rate": 4.1585129688430425e-07,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22465896606445312,
|
|
"step": 4120,
|
|
"valid_targets_mean": 1174.8,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 6.6,
|
|
"grad_norm": 0.7042611942892704,
|
|
"learning_rate": 3.998195544734706e-07,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22120928764343262,
|
|
"step": 4125,
|
|
"valid_targets_mean": 1271.0,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 6.608,
|
|
"grad_norm": 0.7427324011985378,
|
|
"learning_rate": 3.840998219940284e-07,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23668722808361053,
|
|
"step": 4130,
|
|
"valid_targets_mean": 1227.0,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 6.616,
|
|
"grad_norm": 0.7784600989987982,
|
|
"learning_rate": 3.6869234968411214e-07,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20594242215156555,
|
|
"step": 4135,
|
|
"valid_targets_mean": 937.3,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 6.624,
|
|
"grad_norm": 0.7617014877476396,
|
|
"learning_rate": 3.5359738281107504e-07,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24284669756889343,
|
|
"step": 4140,
|
|
"valid_targets_mean": 1318.1,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 6.632,
|
|
"grad_norm": 0.7523825128158046,
|
|
"learning_rate": 3.38815161667585e-07,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22902150452136993,
|
|
"step": 4145,
|
|
"valid_targets_mean": 1100.2,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 6.64,
|
|
"grad_norm": 0.7756620632626724,
|
|
"learning_rate": 3.24345921567788e-07,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22955486178398132,
|
|
"step": 4150,
|
|
"valid_targets_mean": 1258.4,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 6.648,
|
|
"grad_norm": 0.6879124182488607,
|
|
"learning_rate": 3.101898928435754e-07,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22384321689605713,
|
|
"step": 4155,
|
|
"valid_targets_mean": 1272.2,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 6.656,
|
|
"grad_norm": 0.6345185543497504,
|
|
"learning_rate": 2.9634730084091343e-07,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24245388805866241,
|
|
"step": 4160,
|
|
"valid_targets_mean": 1743.6,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 6.664,
|
|
"grad_norm": 0.6948280395962673,
|
|
"learning_rate": 2.8281836591624865e-07,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23183703422546387,
|
|
"step": 4165,
|
|
"valid_targets_mean": 1453.0,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 6.672,
|
|
"grad_norm": 0.748548286420873,
|
|
"learning_rate": 2.6960330343301033e-07,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23871101438999176,
|
|
"step": 4170,
|
|
"valid_targets_mean": 1281.3,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 6.68,
|
|
"grad_norm": 0.7040312048357552,
|
|
"learning_rate": 2.5670232375817784e-07,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2245919555425644,
|
|
"step": 4175,
|
|
"valid_targets_mean": 1283.9,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 6.688,
|
|
"grad_norm": 2.4362372326758828,
|
|
"learning_rate": 2.441156322589322e-07,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2165592610836029,
|
|
"step": 4180,
|
|
"valid_targets_mean": 1033.4,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 6.696,
|
|
"grad_norm": 0.7053750376312463,
|
|
"learning_rate": 2.318434292993832e-07,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2096986174583435,
|
|
"step": 4185,
|
|
"valid_targets_mean": 1348.8,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 6.704,
|
|
"grad_norm": 0.7503849269494608,
|
|
"learning_rate": 2.1988591023738514e-07,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2123297154903412,
|
|
"step": 4190,
|
|
"valid_targets_mean": 1144.6,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 6.712,
|
|
"grad_norm": 0.7299520548336608,
|
|
"learning_rate": 2.0824326542142835e-07,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23339882493019104,
|
|
"step": 4195,
|
|
"valid_targets_mean": 1288.5,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 6.72,
|
|
"grad_norm": 0.7227971365797824,
|
|
"learning_rate": 1.9691568018759931e-07,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22914820909500122,
|
|
"step": 4200,
|
|
"valid_targets_mean": 1172.8,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 6.728,
|
|
"grad_norm": 0.6850314108121872,
|
|
"learning_rate": 1.8590333485664525e-07,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21886573731899261,
|
|
"step": 4205,
|
|
"valid_targets_mean": 1458.2,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 6.736,
|
|
"grad_norm": 0.7731759550955083,
|
|
"learning_rate": 1.752064047310853e-07,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2319599986076355,
|
|
"step": 4210,
|
|
"valid_targets_mean": 1171.1,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 6.744,
|
|
"grad_norm": 0.7026019889631542,
|
|
"learning_rate": 1.6482506009243949e-07,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23918884992599487,
|
|
"step": 4215,
|
|
"valid_targets_mean": 1491.8,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 6.752,
|
|
"grad_norm": 0.7588492808798216,
|
|
"learning_rate": 1.5475946619850192e-07,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23531416058540344,
|
|
"step": 4220,
|
|
"valid_targets_mean": 1491.7,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 6.76,
|
|
"grad_norm": 0.7471335013420828,
|
|
"learning_rate": 1.4500978328071845e-07,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23427581787109375,
|
|
"step": 4225,
|
|
"valid_targets_mean": 1273.4,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 6.768,
|
|
"grad_norm": 0.7444390165821237,
|
|
"learning_rate": 1.3557616654163775e-07,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22755512595176697,
|
|
"step": 4230,
|
|
"valid_targets_mean": 1204.9,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 6.776,
|
|
"grad_norm": 0.6886200530114438,
|
|
"learning_rate": 1.264587661524308e-07,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22386683523654938,
|
|
"step": 4235,
|
|
"valid_targets_mean": 1314.0,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 6.784,
|
|
"grad_norm": 0.7237283470769146,
|
|
"learning_rate": 1.1765772725051084e-07,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22144156694412231,
|
|
"step": 4240,
|
|
"valid_targets_mean": 1200.1,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 6.792,
|
|
"grad_norm": 0.7195749089254302,
|
|
"learning_rate": 1.0917318993721726e-07,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23054638504981995,
|
|
"step": 4245,
|
|
"valid_targets_mean": 1323.6,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 6.8,
|
|
"grad_norm": 0.7868760010850135,
|
|
"learning_rate": 1.0100528927558861e-07,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22706332802772522,
|
|
"step": 4250,
|
|
"valid_targets_mean": 1180.1,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 6.808,
|
|
"grad_norm": 0.7104071438911685,
|
|
"learning_rate": 9.31541552882087e-08,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24277979135513306,
|
|
"step": 4255,
|
|
"valid_targets_mean": 1501.5,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 6.816,
|
|
"grad_norm": 0.7363026984574033,
|
|
"learning_rate": 8.561991295514161e-08,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26068824529647827,
|
|
"step": 4260,
|
|
"valid_targets_mean": 1453.3,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 6.824,
|
|
"grad_norm": 0.7197301935820319,
|
|
"learning_rate": 7.840268221193548e-08,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2392248809337616,
|
|
"step": 4265,
|
|
"valid_targets_mean": 1306.4,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 6.832,
|
|
"grad_norm": 0.6922600239632283,
|
|
"learning_rate": 7.150257794772186e-08,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23342570662498474,
|
|
"step": 4270,
|
|
"valid_targets_mean": 1480.1,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 6.84,
|
|
"grad_norm": 0.6693863894047978,
|
|
"learning_rate": 6.491971000337938e-08,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21505916118621826,
|
|
"step": 4275,
|
|
"valid_targets_mean": 1583.9,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 6.848,
|
|
"grad_norm": 0.7000123310127081,
|
|
"learning_rate": 5.8654183169788435e-08,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22945305705070496,
|
|
"step": 4280,
|
|
"valid_targets_mean": 1541.9,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 6.856,
|
|
"grad_norm": 0.7601704471661711,
|
|
"learning_rate": 5.270609718616593e-08,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2093864381313324,
|
|
"step": 4285,
|
|
"valid_targets_mean": 1032.3,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 6.864,
|
|
"grad_norm": 0.7495052401496632,
|
|
"learning_rate": 4.70755467384687e-08,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2181587815284729,
|
|
"step": 4290,
|
|
"valid_targets_mean": 1099.4,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 6.872,
|
|
"grad_norm": 0.8018611721266622,
|
|
"learning_rate": 4.176262145789478e-08,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2126857042312622,
|
|
"step": 4295,
|
|
"valid_targets_mean": 1149.3,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 6.88,
|
|
"grad_norm": 0.6971205772387004,
|
|
"learning_rate": 3.676740591945782e-08,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22674190998077393,
|
|
"step": 4300,
|
|
"valid_targets_mean": 1428.5,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 6.888,
|
|
"grad_norm": 0.7131266890746867,
|
|
"learning_rate": 3.208997964062821e-08,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23928573727607727,
|
|
"step": 4305,
|
|
"valid_targets_mean": 1313.2,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 6.896,
|
|
"grad_norm": 0.6984789654011763,
|
|
"learning_rate": 2.773041708008295e-08,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22383782267570496,
|
|
"step": 4310,
|
|
"valid_targets_mean": 1510.1,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 6.904,
|
|
"grad_norm": 0.7393983459099267,
|
|
"learning_rate": 2.3688787636511057e-08,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2349054217338562,
|
|
"step": 4315,
|
|
"valid_targets_mean": 1243.1,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 6.912,
|
|
"grad_norm": 0.740044707780889,
|
|
"learning_rate": 1.9965155647507782e-08,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22836017608642578,
|
|
"step": 4320,
|
|
"valid_targets_mean": 1249.2,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 6.92,
|
|
"grad_norm": 0.758081298103966,
|
|
"learning_rate": 1.655958038855765e-08,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21802884340286255,
|
|
"step": 4325,
|
|
"valid_targets_mean": 1119.3,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 6.928,
|
|
"grad_norm": 0.7166536429250479,
|
|
"learning_rate": 1.3472116072084096e-08,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23139332234859467,
|
|
"step": 4330,
|
|
"valid_targets_mean": 1298.2,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 6.936,
|
|
"grad_norm": 0.7452037325458652,
|
|
"learning_rate": 1.0702811846590167e-08,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21017000079154968,
|
|
"step": 4335,
|
|
"valid_targets_mean": 1029.8,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 6.944,
|
|
"grad_norm": 0.7269754160875623,
|
|
"learning_rate": 8.251711795876916e-09,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2296445667743683,
|
|
"step": 4340,
|
|
"valid_targets_mean": 1231.9,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 6.952,
|
|
"grad_norm": 0.7678154067303642,
|
|
"learning_rate": 6.1188549383373044e-09,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25704026222229004,
|
|
"step": 4345,
|
|
"valid_targets_mean": 1338.1,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 6.96,
|
|
"grad_norm": 0.712760582988766,
|
|
"learning_rate": 4.304275226338916e-09,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2253701239824295,
|
|
"step": 4350,
|
|
"valid_targets_mean": 1310.9,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 6.968,
|
|
"grad_norm": 0.7261158997628984,
|
|
"learning_rate": 2.8080015456799503e-09,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20785915851593018,
|
|
"step": 4355,
|
|
"valid_targets_mean": 1180.8,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 6.976,
|
|
"grad_norm": 0.7439968622593376,
|
|
"learning_rate": 1.6300577151340257e-09,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23048461973667145,
|
|
"step": 4360,
|
|
"valid_targets_mean": 1260.3,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 6.984,
|
|
"grad_norm": 0.7324720186615127,
|
|
"learning_rate": 7.70462486070489e-10,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22307077050209045,
|
|
"step": 4365,
|
|
"valid_targets_mean": 1276.4,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 6.992,
|
|
"grad_norm": 0.687230290171882,
|
|
"learning_rate": 2.2922954214799065e-10,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23314671218395233,
|
|
"step": 4370,
|
|
"valid_targets_mean": 1408.4,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.7193010118927723,
|
|
"learning_rate": 6.367499107984288e-12,
|
|
"loss": 0.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23595906794071198,
|
|
"step": 4375,
|
|
"valid_targets_mean": 1299.3,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23595906794071198,
|
|
"step": 4375,
|
|
"total_flos": 342994073812992.0,
|
|
"train_loss": 0.2826101470402309,
|
|
"train_runtime": 6308.6868,
|
|
"train_samples_per_second": 11.096,
|
|
"train_steps_per_second": 0.693,
|
|
"valid_targets_mean": 1299.3,
|
|
"valid_targets_min": 527
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4375,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 342994073812992.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|