Model: laion/exp-syh-r2egym-swesmith-mixed_glm_4_7_traces_locetash Source: Original Platform
8826 lines
244 KiB
JSON
8826 lines
244 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3990,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00877963125548727,
|
|
"grad_norm": 12.649351438860265,
|
|
"learning_rate": 4.010025062656642e-07,
|
|
"loss": 0.9466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40519675612449646,
|
|
"step": 5,
|
|
"valid_targets_mean": 3270.8,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 0.01755926251097454,
|
|
"grad_norm": 15.155120097363977,
|
|
"learning_rate": 9.022556390977444e-07,
|
|
"loss": 0.919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5128268003463745,
|
|
"step": 10,
|
|
"valid_targets_mean": 3349.1,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 0.02633889376646181,
|
|
"grad_norm": 10.628783212657249,
|
|
"learning_rate": 1.4035087719298246e-06,
|
|
"loss": 0.8847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4738978147506714,
|
|
"step": 15,
|
|
"valid_targets_mean": 2883.6,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.03511852502194908,
|
|
"grad_norm": 8.040377238983012,
|
|
"learning_rate": 1.904761904761905e-06,
|
|
"loss": 0.8843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21155613660812378,
|
|
"step": 20,
|
|
"valid_targets_mean": 1087.8,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 0.043898156277436345,
|
|
"grad_norm": 6.038318275447897,
|
|
"learning_rate": 2.406015037593985e-06,
|
|
"loss": 0.8472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3912035822868347,
|
|
"step": 25,
|
|
"valid_targets_mean": 2062.4,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 0.05267778753292362,
|
|
"grad_norm": 4.507631102410868,
|
|
"learning_rate": 2.9072681704260652e-06,
|
|
"loss": 0.8311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.427338182926178,
|
|
"step": 30,
|
|
"valid_targets_mean": 2360.5,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 0.061457418788410885,
|
|
"grad_norm": 3.490883206622139,
|
|
"learning_rate": 3.4085213032581455e-06,
|
|
"loss": 0.7441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35009434819221497,
|
|
"step": 35,
|
|
"valid_targets_mean": 2530.2,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 0.07023705004389816,
|
|
"grad_norm": 2.3073423172364316,
|
|
"learning_rate": 3.909774436090225e-06,
|
|
"loss": 0.7168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34194087982177734,
|
|
"step": 40,
|
|
"valid_targets_mean": 3161.1,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 0.07901668129938542,
|
|
"grad_norm": 2.182688201680529,
|
|
"learning_rate": 4.411027568922306e-06,
|
|
"loss": 0.6809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36747634410858154,
|
|
"step": 45,
|
|
"valid_targets_mean": 2045.2,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 0.08779631255487269,
|
|
"grad_norm": 1.6345175314959248,
|
|
"learning_rate": 4.912280701754386e-06,
|
|
"loss": 0.6744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25288575887680054,
|
|
"step": 50,
|
|
"valid_targets_mean": 1775.0,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 0.09657594381035997,
|
|
"grad_norm": 1.468031654910614,
|
|
"learning_rate": 5.413533834586467e-06,
|
|
"loss": 0.6192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3417006731033325,
|
|
"step": 55,
|
|
"valid_targets_mean": 2332.9,
|
|
"valid_targets_min": 185
|
|
},
|
|
{
|
|
"epoch": 0.10535557506584724,
|
|
"grad_norm": 1.3692707555020516,
|
|
"learning_rate": 5.9147869674185465e-06,
|
|
"loss": 0.6169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4717869460582733,
|
|
"step": 60,
|
|
"valid_targets_mean": 3316.8,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 0.1141352063213345,
|
|
"grad_norm": 1.2402370411291184,
|
|
"learning_rate": 6.416040100250627e-06,
|
|
"loss": 0.5816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24836552143096924,
|
|
"step": 65,
|
|
"valid_targets_mean": 1733.9,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 0.12291483757682177,
|
|
"grad_norm": 0.8895598006944427,
|
|
"learning_rate": 6.917293233082707e-06,
|
|
"loss": 0.5498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3478800654411316,
|
|
"step": 70,
|
|
"valid_targets_mean": 4681.1,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 0.13169446883230904,
|
|
"grad_norm": 1.2789911136242778,
|
|
"learning_rate": 7.418546365914787e-06,
|
|
"loss": 0.5395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736978530883789,
|
|
"step": 75,
|
|
"valid_targets_mean": 1912.2,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 0.14047410008779632,
|
|
"grad_norm": 1.015875912689838,
|
|
"learning_rate": 7.919799498746868e-06,
|
|
"loss": 0.5269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711946964263916,
|
|
"step": 80,
|
|
"valid_targets_mean": 2320.9,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 0.14925373134328357,
|
|
"grad_norm": 0.8872070590609626,
|
|
"learning_rate": 8.421052631578948e-06,
|
|
"loss": 0.5255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715321183204651,
|
|
"step": 85,
|
|
"valid_targets_mean": 3202.5,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 0.15803336259877085,
|
|
"grad_norm": 0.9636723049000766,
|
|
"learning_rate": 8.922305764411027e-06,
|
|
"loss": 0.4942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3397523760795593,
|
|
"step": 90,
|
|
"valid_targets_mean": 3018.6,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 0.16681299385425813,
|
|
"grad_norm": 0.8833461771819953,
|
|
"learning_rate": 9.423558897243108e-06,
|
|
"loss": 0.4857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21870018541812897,
|
|
"step": 95,
|
|
"valid_targets_mean": 2276.2,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 0.17559262510974538,
|
|
"grad_norm": 1.0424730614888218,
|
|
"learning_rate": 9.924812030075189e-06,
|
|
"loss": 0.4596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15294340252876282,
|
|
"step": 100,
|
|
"valid_targets_mean": 1343.5,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 0.18437225636523266,
|
|
"grad_norm": 0.8618866292383338,
|
|
"learning_rate": 1.0426065162907268e-05,
|
|
"loss": 0.472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20313535630702972,
|
|
"step": 105,
|
|
"valid_targets_mean": 2056.0,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 0.19315188762071994,
|
|
"grad_norm": 0.8281762658094207,
|
|
"learning_rate": 1.0927318295739348e-05,
|
|
"loss": 0.4661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26091787219047546,
|
|
"step": 110,
|
|
"valid_targets_mean": 3441.9,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 0.2019315188762072,
|
|
"grad_norm": 0.7988352560841464,
|
|
"learning_rate": 1.1428571428571429e-05,
|
|
"loss": 0.4668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1812494993209839,
|
|
"step": 115,
|
|
"valid_targets_mean": 2036.4,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 0.21071115013169447,
|
|
"grad_norm": 0.8571707580491156,
|
|
"learning_rate": 1.192982456140351e-05,
|
|
"loss": 0.4787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24355491995811462,
|
|
"step": 120,
|
|
"valid_targets_mean": 2478.2,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 0.21949078138718173,
|
|
"grad_norm": 0.9874236239252236,
|
|
"learning_rate": 1.2431077694235589e-05,
|
|
"loss": 0.4723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686903774738312,
|
|
"step": 125,
|
|
"valid_targets_mean": 2052.8,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 0.228270412642669,
|
|
"grad_norm": 0.7697948643881428,
|
|
"learning_rate": 1.293233082706767e-05,
|
|
"loss": 0.4531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953730821609497,
|
|
"step": 130,
|
|
"valid_targets_mean": 3590.1,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 0.2370500438981563,
|
|
"grad_norm": 0.7052327150847424,
|
|
"learning_rate": 1.343358395989975e-05,
|
|
"loss": 0.4462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720586359500885,
|
|
"step": 135,
|
|
"valid_targets_mean": 3983.1,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 0.24582967515364354,
|
|
"grad_norm": 0.70447385376843,
|
|
"learning_rate": 1.3934837092731829e-05,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22973623871803284,
|
|
"step": 140,
|
|
"valid_targets_mean": 4067.6,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 0.2546093064091308,
|
|
"grad_norm": 0.8372477604530956,
|
|
"learning_rate": 1.443609022556391e-05,
|
|
"loss": 0.4389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36316585540771484,
|
|
"step": 145,
|
|
"valid_targets_mean": 3748.2,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 0.2633889376646181,
|
|
"grad_norm": 0.9839798944105693,
|
|
"learning_rate": 1.493734335839599e-05,
|
|
"loss": 0.4351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26002877950668335,
|
|
"step": 150,
|
|
"valid_targets_mean": 2538.1,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 0.2721685689201054,
|
|
"grad_norm": 0.9041587925842182,
|
|
"learning_rate": 1.543859649122807e-05,
|
|
"loss": 0.4257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19440549612045288,
|
|
"step": 155,
|
|
"valid_targets_mean": 2684.5,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 0.28094820017559263,
|
|
"grad_norm": 0.9451351015940459,
|
|
"learning_rate": 1.593984962406015e-05,
|
|
"loss": 0.4109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2465362399816513,
|
|
"step": 160,
|
|
"valid_targets_mean": 2743.5,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 0.2897278314310799,
|
|
"grad_norm": 0.752251780704793,
|
|
"learning_rate": 1.6441102756892233e-05,
|
|
"loss": 0.4125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22052188217639923,
|
|
"step": 165,
|
|
"valid_targets_mean": 3587.9,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 0.29850746268656714,
|
|
"grad_norm": 0.7971645470695788,
|
|
"learning_rate": 1.694235588972431e-05,
|
|
"loss": 0.4179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.152998149394989,
|
|
"step": 170,
|
|
"valid_targets_mean": 1962.0,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 0.30728709394205445,
|
|
"grad_norm": 0.8625636197570632,
|
|
"learning_rate": 1.744360902255639e-05,
|
|
"loss": 0.4312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2361379861831665,
|
|
"step": 175,
|
|
"valid_targets_mean": 2572.1,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 0.3160667251975417,
|
|
"grad_norm": 0.7258257590416349,
|
|
"learning_rate": 1.7944862155388473e-05,
|
|
"loss": 0.4189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18382346630096436,
|
|
"step": 180,
|
|
"valid_targets_mean": 3134.6,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 0.32484635645302895,
|
|
"grad_norm": 0.8741003946497015,
|
|
"learning_rate": 1.8446115288220552e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1638769805431366,
|
|
"step": 185,
|
|
"valid_targets_mean": 1772.1,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.33362598770851626,
|
|
"grad_norm": 0.8227833862137313,
|
|
"learning_rate": 1.894736842105263e-05,
|
|
"loss": 0.4174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19856193661689758,
|
|
"step": 190,
|
|
"valid_targets_mean": 3065.5,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 0.3424056189640035,
|
|
"grad_norm": 0.7960315991129969,
|
|
"learning_rate": 1.9448621553884713e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19015879929065704,
|
|
"step": 195,
|
|
"valid_targets_mean": 3041.9,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 0.35118525021949076,
|
|
"grad_norm": 0.7919234593074661,
|
|
"learning_rate": 1.9949874686716792e-05,
|
|
"loss": 0.3941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20683753490447998,
|
|
"step": 200,
|
|
"valid_targets_mean": 3452.9,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 0.35996488147497807,
|
|
"grad_norm": 0.7401372550415715,
|
|
"learning_rate": 2.045112781954887e-05,
|
|
"loss": 0.4074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18923993408679962,
|
|
"step": 205,
|
|
"valid_targets_mean": 4190.8,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 0.3687445127304653,
|
|
"grad_norm": 1.022066983868932,
|
|
"learning_rate": 2.0952380952380954e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17522388696670532,
|
|
"step": 210,
|
|
"valid_targets_mean": 1784.1,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 0.3775241439859526,
|
|
"grad_norm": 0.8293231893278189,
|
|
"learning_rate": 2.1453634085213033e-05,
|
|
"loss": 0.418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27000442147254944,
|
|
"step": 215,
|
|
"valid_targets_mean": 4182.2,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 0.3863037752414399,
|
|
"grad_norm": 1.1053663927400494,
|
|
"learning_rate": 2.1954887218045115e-05,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29235392808914185,
|
|
"step": 220,
|
|
"valid_targets_mean": 2866.4,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 0.39508340649692714,
|
|
"grad_norm": 0.8550607785954433,
|
|
"learning_rate": 2.2456140350877194e-05,
|
|
"loss": 0.4022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17124208807945251,
|
|
"step": 225,
|
|
"valid_targets_mean": 3380.9,
|
|
"valid_targets_min": 180
|
|
},
|
|
{
|
|
"epoch": 0.4038630377524144,
|
|
"grad_norm": 0.9260425486619788,
|
|
"learning_rate": 2.2957393483709277e-05,
|
|
"loss": 0.3994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13650885224342346,
|
|
"step": 230,
|
|
"valid_targets_mean": 1750.9,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 0.41264266900790164,
|
|
"grad_norm": 0.8755766019585831,
|
|
"learning_rate": 2.3458646616541356e-05,
|
|
"loss": 0.38,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1735352724790573,
|
|
"step": 235,
|
|
"valid_targets_mean": 2305.8,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 0.42142230026338895,
|
|
"grad_norm": 1.2072668548387762,
|
|
"learning_rate": 2.3959899749373438e-05,
|
|
"loss": 0.4199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2129884660243988,
|
|
"step": 240,
|
|
"valid_targets_mean": 2068.0,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 0.4302019315188762,
|
|
"grad_norm": 0.9992616003160035,
|
|
"learning_rate": 2.4461152882205514e-05,
|
|
"loss": 0.4055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20127980411052704,
|
|
"step": 245,
|
|
"valid_targets_mean": 2676.0,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 0.43898156277436345,
|
|
"grad_norm": 0.7584970096013042,
|
|
"learning_rate": 2.4962406015037596e-05,
|
|
"loss": 0.3858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15883001685142517,
|
|
"step": 250,
|
|
"valid_targets_mean": 2906.4,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 0.44776119402985076,
|
|
"grad_norm": 0.7802055391782767,
|
|
"learning_rate": 2.5463659147869675e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1683444380760193,
|
|
"step": 255,
|
|
"valid_targets_mean": 2634.9,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 0.456540825285338,
|
|
"grad_norm": 0.7771286414101365,
|
|
"learning_rate": 2.5964912280701757e-05,
|
|
"loss": 0.3734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21748316287994385,
|
|
"step": 260,
|
|
"valid_targets_mean": 3476.2,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 0.46532045654082527,
|
|
"grad_norm": 0.8106487479182475,
|
|
"learning_rate": 2.6466165413533836e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23684431612491608,
|
|
"step": 265,
|
|
"valid_targets_mean": 3377.9,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 0.4741000877963126,
|
|
"grad_norm": 1.151251066211724,
|
|
"learning_rate": 2.696741854636592e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.216173455119133,
|
|
"step": 270,
|
|
"valid_targets_mean": 2962.5,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 0.4828797190517998,
|
|
"grad_norm": 0.7326826128796893,
|
|
"learning_rate": 2.7468671679197994e-05,
|
|
"loss": 0.3787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21516844630241394,
|
|
"step": 275,
|
|
"valid_targets_mean": 3673.4,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 0.4916593503072871,
|
|
"grad_norm": 0.855884951116909,
|
|
"learning_rate": 2.7969924812030077e-05,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11377794295549393,
|
|
"step": 280,
|
|
"valid_targets_mean": 1672.5,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 0.5004389815627743,
|
|
"grad_norm": 0.7149279814961669,
|
|
"learning_rate": 2.8471177944862156e-05,
|
|
"loss": 0.3976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19637948274612427,
|
|
"step": 285,
|
|
"valid_targets_mean": 3771.5,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 0.5092186128182616,
|
|
"grad_norm": 0.9447856507087417,
|
|
"learning_rate": 2.8972431077694238e-05,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23925140500068665,
|
|
"step": 290,
|
|
"valid_targets_mean": 2923.9,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 0.517998244073749,
|
|
"grad_norm": 1.0681984414802446,
|
|
"learning_rate": 2.9473684210526317e-05,
|
|
"loss": 0.3727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07643012702465057,
|
|
"step": 295,
|
|
"valid_targets_mean": 879.2,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 0.5267778753292361,
|
|
"grad_norm": 0.8094942991036429,
|
|
"learning_rate": 2.99749373433584e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17002525925636292,
|
|
"step": 300,
|
|
"valid_targets_mean": 3089.5,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 0.5355575065847235,
|
|
"grad_norm": 0.8294121488114734,
|
|
"learning_rate": 3.047619047619048e-05,
|
|
"loss": 0.3552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09964583814144135,
|
|
"step": 305,
|
|
"valid_targets_mean": 1860.0,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 0.5443371378402108,
|
|
"grad_norm": 0.8763896701189194,
|
|
"learning_rate": 3.097744360902256e-05,
|
|
"loss": 0.3843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2323790192604065,
|
|
"step": 310,
|
|
"valid_targets_mean": 2783.1,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 0.553116769095698,
|
|
"grad_norm": 0.8891671465264732,
|
|
"learning_rate": 3.147869674185464e-05,
|
|
"loss": 0.3652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2079133242368698,
|
|
"step": 315,
|
|
"valid_targets_mean": 2561.6,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 0.5618964003511853,
|
|
"grad_norm": 0.9774894061744978,
|
|
"learning_rate": 3.197994987468672e-05,
|
|
"loss": 0.3668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16514402627944946,
|
|
"step": 320,
|
|
"valid_targets_mean": 2309.0,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 0.5706760316066726,
|
|
"grad_norm": 0.7843562632016934,
|
|
"learning_rate": 3.24812030075188e-05,
|
|
"loss": 0.371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1521378606557846,
|
|
"step": 325,
|
|
"valid_targets_mean": 2077.0,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 0.5794556628621598,
|
|
"grad_norm": 0.8771495951846054,
|
|
"learning_rate": 3.298245614035088e-05,
|
|
"loss": 0.3722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17517231404781342,
|
|
"step": 330,
|
|
"valid_targets_mean": 2178.5,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 0.8819990917378693,
|
|
"learning_rate": 3.3483709273182956e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20055246353149414,
|
|
"step": 335,
|
|
"valid_targets_mean": 2855.2,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 0.5970149253731343,
|
|
"grad_norm": 1.7123497635270155,
|
|
"learning_rate": 3.398496240601504e-05,
|
|
"loss": 0.3847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2494686245918274,
|
|
"step": 340,
|
|
"valid_targets_mean": 5348.4,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 0.6057945566286216,
|
|
"grad_norm": 0.7713371097223082,
|
|
"learning_rate": 3.448621553884712e-05,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3131565451622009,
|
|
"step": 345,
|
|
"valid_targets_mean": 4613.4,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 0.6145741878841089,
|
|
"grad_norm": 0.8060664695410512,
|
|
"learning_rate": 3.49874686716792e-05,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329038441181183,
|
|
"step": 350,
|
|
"valid_targets_mean": 2312.0,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 0.6233538191395961,
|
|
"grad_norm": 0.7594490711736277,
|
|
"learning_rate": 3.548872180451128e-05,
|
|
"loss": 0.37,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21609294414520264,
|
|
"step": 355,
|
|
"valid_targets_mean": 3820.5,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 0.6321334503950834,
|
|
"grad_norm": 1.0476314350200038,
|
|
"learning_rate": 3.5989974937343364e-05,
|
|
"loss": 0.3744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13880789279937744,
|
|
"step": 360,
|
|
"valid_targets_mean": 1484.2,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 0.6409130816505707,
|
|
"grad_norm": 0.8160260108539003,
|
|
"learning_rate": 3.649122807017544e-05,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2220887690782547,
|
|
"step": 365,
|
|
"valid_targets_mean": 2540.2,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 0.6496927129060579,
|
|
"grad_norm": 0.9581703743758772,
|
|
"learning_rate": 3.699248120300752e-05,
|
|
"loss": 0.3749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11678123474121094,
|
|
"step": 370,
|
|
"valid_targets_mean": 1443.9,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 0.6584723441615452,
|
|
"grad_norm": 0.8222998181782235,
|
|
"learning_rate": 3.74937343358396e-05,
|
|
"loss": 0.367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1433645784854889,
|
|
"step": 375,
|
|
"valid_targets_mean": 2375.0,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 0.6672519754170325,
|
|
"grad_norm": 0.7820020591940476,
|
|
"learning_rate": 3.799498746867168e-05,
|
|
"loss": 0.369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18960776925086975,
|
|
"step": 380,
|
|
"valid_targets_mean": 3547.6,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 0.6760316066725197,
|
|
"grad_norm": 0.7791330473938298,
|
|
"learning_rate": 3.849624060150376e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14161279797554016,
|
|
"step": 385,
|
|
"valid_targets_mean": 2352.1,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 0.684811237928007,
|
|
"grad_norm": 0.9651638636225072,
|
|
"learning_rate": 3.8997493734335845e-05,
|
|
"loss": 0.3656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1971740424633026,
|
|
"step": 390,
|
|
"valid_targets_mean": 1881.6,
|
|
"valid_targets_min": 400
|
|
},
|
|
{
|
|
"epoch": 0.6935908691834943,
|
|
"grad_norm": 0.8746658844156182,
|
|
"learning_rate": 3.949874686716792e-05,
|
|
"loss": 0.3533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17492684721946716,
|
|
"step": 395,
|
|
"valid_targets_mean": 2318.8,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 0.7023705004389815,
|
|
"grad_norm": 0.9835692929095511,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14873580634593964,
|
|
"step": 400,
|
|
"valid_targets_mean": 1774.9,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.7111501316944688,
|
|
"grad_norm": 0.8438273414010364,
|
|
"learning_rate": 3.999980865890662e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19427412748336792,
|
|
"step": 405,
|
|
"valid_targets_mean": 3059.1,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 0.7199297629499561,
|
|
"grad_norm": 0.8658768095704614,
|
|
"learning_rate": 3.999923463928763e-05,
|
|
"loss": 0.3589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19229033589363098,
|
|
"step": 410,
|
|
"valid_targets_mean": 2805.6,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 0.7287093942054433,
|
|
"grad_norm": 1.1439144660858873,
|
|
"learning_rate": 3.999827795212637e-05,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13483887910842896,
|
|
"step": 415,
|
|
"valid_targets_mean": 1697.9,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 0.7374890254609306,
|
|
"grad_norm": 0.7807614104772654,
|
|
"learning_rate": 3.99969386157282e-05,
|
|
"loss": 0.3669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.152652770280838,
|
|
"step": 420,
|
|
"valid_targets_mean": 3770.8,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 0.746268656716418,
|
|
"grad_norm": 0.6938748968194495,
|
|
"learning_rate": 3.999521665572014e-05,
|
|
"loss": 0.3736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20225508511066437,
|
|
"step": 425,
|
|
"valid_targets_mean": 3654.4,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 0.7550482879719052,
|
|
"grad_norm": 1.017948977087783,
|
|
"learning_rate": 3.999311210505035e-05,
|
|
"loss": 0.355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1592870056629181,
|
|
"step": 430,
|
|
"valid_targets_mean": 3316.0,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 0.7638279192273925,
|
|
"grad_norm": 0.8847873111056213,
|
|
"learning_rate": 3.999062500398753e-05,
|
|
"loss": 0.3699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20758408308029175,
|
|
"step": 435,
|
|
"valid_targets_mean": 3131.8,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 0.7726075504828798,
|
|
"grad_norm": 0.8482969979191853,
|
|
"learning_rate": 3.998775540012016e-05,
|
|
"loss": 0.3712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20086196064949036,
|
|
"step": 440,
|
|
"valid_targets_mean": 2515.8,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 0.781387181738367,
|
|
"grad_norm": 0.8410183514599373,
|
|
"learning_rate": 3.998450334835553e-05,
|
|
"loss": 0.3457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21278539299964905,
|
|
"step": 445,
|
|
"valid_targets_mean": 3096.4,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 0.7901668129938543,
|
|
"grad_norm": 0.949471053246281,
|
|
"learning_rate": 3.998086891091878e-05,
|
|
"loss": 0.3556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19485443830490112,
|
|
"step": 450,
|
|
"valid_targets_mean": 2158.2,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 0.7989464442493416,
|
|
"grad_norm": 0.8863755767146599,
|
|
"learning_rate": 3.9976852157351625e-05,
|
|
"loss": 0.3877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19970840215682983,
|
|
"step": 455,
|
|
"valid_targets_mean": 2431.2,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 0.8077260755048288,
|
|
"grad_norm": 0.7341440782243831,
|
|
"learning_rate": 3.997245316451105e-05,
|
|
"loss": 0.3551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09942640364170074,
|
|
"step": 460,
|
|
"valid_targets_mean": 1549.9,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 0.8165057067603161,
|
|
"grad_norm": 0.937611553680699,
|
|
"learning_rate": 3.996767201656789e-05,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24870307743549347,
|
|
"step": 465,
|
|
"valid_targets_mean": 2782.8,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.8252853380158033,
|
|
"grad_norm": 0.9029665254568031,
|
|
"learning_rate": 3.9962508805005135e-05,
|
|
"loss": 0.3589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.256511926651001,
|
|
"step": 470,
|
|
"valid_targets_mean": 3239.0,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 0.8340649692712906,
|
|
"grad_norm": 0.8842374578079575,
|
|
"learning_rate": 3.995696362861625e-05,
|
|
"loss": 0.3423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21527531743049622,
|
|
"step": 475,
|
|
"valid_targets_mean": 2603.1,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 0.8428446005267779,
|
|
"grad_norm": 0.7912463919307183,
|
|
"learning_rate": 3.9951036593503236e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540820240974426,
|
|
"step": 480,
|
|
"valid_targets_mean": 3065.9,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 0.8516242317822651,
|
|
"grad_norm": 0.7736446570622055,
|
|
"learning_rate": 3.9944727813074635e-05,
|
|
"loss": 0.3389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19993150234222412,
|
|
"step": 485,
|
|
"valid_targets_mean": 3115.9,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 0.8604038630377524,
|
|
"grad_norm": 0.8810510874065453,
|
|
"learning_rate": 3.9938037408043354e-05,
|
|
"loss": 0.3677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14277991652488708,
|
|
"step": 490,
|
|
"valid_targets_mean": 1796.1,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 0.8691834942932397,
|
|
"grad_norm": 0.8591406227555088,
|
|
"learning_rate": 3.993096550642431e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20769265294075012,
|
|
"step": 495,
|
|
"valid_targets_mean": 2972.9,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 0.8779631255487269,
|
|
"grad_norm": 0.7629887874974166,
|
|
"learning_rate": 3.992351224353207e-05,
|
|
"loss": 0.3722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25301894545555115,
|
|
"step": 500,
|
|
"valid_targets_mean": 3497.5,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.8867427568042142,
|
|
"grad_norm": 0.7690271720903777,
|
|
"learning_rate": 3.991567776197815e-05,
|
|
"loss": 0.3701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1995421200990677,
|
|
"step": 505,
|
|
"valid_targets_mean": 2839.8,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.8955223880597015,
|
|
"grad_norm": 0.8645172082755588,
|
|
"learning_rate": 3.9907462211668404e-05,
|
|
"loss": 0.369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20652811229228973,
|
|
"step": 510,
|
|
"valid_targets_mean": 2295.2,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 0.9043020193151887,
|
|
"grad_norm": 1.3663721417389973,
|
|
"learning_rate": 3.989886574980005e-05,
|
|
"loss": 0.3388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2137913703918457,
|
|
"step": 515,
|
|
"valid_targets_mean": 2360.9,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 0.913081650570676,
|
|
"grad_norm": 0.7783730401846496,
|
|
"learning_rate": 3.988988854085875e-05,
|
|
"loss": 0.3399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15000317990779877,
|
|
"step": 520,
|
|
"valid_targets_mean": 2384.8,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 0.9218612818261633,
|
|
"grad_norm": 0.6874560578632031,
|
|
"learning_rate": 3.9880530756615386e-05,
|
|
"loss": 0.3668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17067737877368927,
|
|
"step": 525,
|
|
"valid_targets_mean": 3469.6,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 0.9306409130816505,
|
|
"grad_norm": 0.6175461448857309,
|
|
"learning_rate": 3.9870792576122825e-05,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11742635071277618,
|
|
"step": 530,
|
|
"valid_targets_mean": 3189.1,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 0.9394205443371378,
|
|
"grad_norm": 0.7893030708794482,
|
|
"learning_rate": 3.986067418571249e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1232496052980423,
|
|
"step": 535,
|
|
"valid_targets_mean": 2514.8,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 0.9482001755926251,
|
|
"grad_norm": 0.8022408016088783,
|
|
"learning_rate": 3.985017577899075e-05,
|
|
"loss": 0.3464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17317229509353638,
|
|
"step": 540,
|
|
"valid_targets_mean": 2394.0,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 0.9569798068481123,
|
|
"grad_norm": 0.7271059335394584,
|
|
"learning_rate": 3.9839297556835273e-05,
|
|
"loss": 0.353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1846744418144226,
|
|
"step": 545,
|
|
"valid_targets_mean": 3618.8,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 0.9657594381035997,
|
|
"grad_norm": 0.9684984226411585,
|
|
"learning_rate": 3.982803972739117e-05,
|
|
"loss": 0.3428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20731565356254578,
|
|
"step": 550,
|
|
"valid_targets_mean": 3154.6,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 0.974539069359087,
|
|
"grad_norm": 0.7295746570548438,
|
|
"learning_rate": 3.9816402506066955e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22044870257377625,
|
|
"step": 555,
|
|
"valid_targets_mean": 2985.6,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 0.9833187006145742,
|
|
"grad_norm": 0.8549901744742142,
|
|
"learning_rate": 3.9804386115530506e-05,
|
|
"loss": 0.348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14425580203533173,
|
|
"step": 560,
|
|
"valid_targets_mean": 3023.5,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 0.9920983318700615,
|
|
"grad_norm": 0.7319299496923765,
|
|
"learning_rate": 3.979199078570476e-05,
|
|
"loss": 0.3412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12932679057121277,
|
|
"step": 565,
|
|
"valid_targets_mean": 2053.8,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 1.3054497119690325,
|
|
"learning_rate": 3.9779216753763294e-05,
|
|
"loss": 0.3737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3890548646450043,
|
|
"step": 570,
|
|
"valid_targets_mean": 2471.8,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 1.0087796312554873,
|
|
"grad_norm": 0.5658802825241621,
|
|
"learning_rate": 3.976606426412585e-05,
|
|
"loss": 0.3321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1617394983768463,
|
|
"step": 575,
|
|
"valid_targets_mean": 3882.4,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 1.0175592625109746,
|
|
"grad_norm": 0.8424151472525372,
|
|
"learning_rate": 3.9752533568453595e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16494178771972656,
|
|
"step": 580,
|
|
"valid_targets_mean": 2461.9,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 1.0263388937664617,
|
|
"grad_norm": 0.8309145453670582,
|
|
"learning_rate": 3.973862492564434e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15589746832847595,
|
|
"step": 585,
|
|
"valid_targets_mean": 2177.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.035118525021949,
|
|
"grad_norm": 0.9920725838084226,
|
|
"learning_rate": 3.972433860182757e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16900984942913055,
|
|
"step": 590,
|
|
"valid_targets_mean": 2022.5,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 1.0438981562774363,
|
|
"grad_norm": 0.8745251330922201,
|
|
"learning_rate": 3.970967487035937e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16499337553977966,
|
|
"step": 595,
|
|
"valid_targets_mean": 1975.0,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.0526777875329236,
|
|
"grad_norm": 0.742581880393213,
|
|
"learning_rate": 3.96946340118172e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15382374823093414,
|
|
"step": 600,
|
|
"valid_targets_mean": 2285.2,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.061457418788411,
|
|
"grad_norm": 2.4079641059871832,
|
|
"learning_rate": 3.967921631399447e-05,
|
|
"loss": 0.3387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1846892237663269,
|
|
"step": 605,
|
|
"valid_targets_mean": 2321.8,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 1.0702370500438982,
|
|
"grad_norm": 0.8299953691608384,
|
|
"learning_rate": 3.9663422071895103e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11769893765449524,
|
|
"step": 610,
|
|
"valid_targets_mean": 1778.2,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 1.0790166812993853,
|
|
"grad_norm": 0.8307438380547183,
|
|
"learning_rate": 3.964725158772785e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22870968282222748,
|
|
"step": 615,
|
|
"valid_targets_mean": 3147.8,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.0877963125548726,
|
|
"grad_norm": 0.7011260508087803,
|
|
"learning_rate": 3.963070517090054e-05,
|
|
"loss": 0.3418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18899005651474,
|
|
"step": 620,
|
|
"valid_targets_mean": 4037.2,
|
|
"valid_targets_min": 214
|
|
},
|
|
{
|
|
"epoch": 1.09657594381036,
|
|
"grad_norm": 0.9640125434002211,
|
|
"learning_rate": 3.96137831380141e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1828576922416687,
|
|
"step": 625,
|
|
"valid_targets_mean": 1967.5,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.1053555750658473,
|
|
"grad_norm": 0.8005836490748413,
|
|
"learning_rate": 3.959648581285657e-05,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17066419124603271,
|
|
"step": 630,
|
|
"valid_targets_mean": 2840.9,
|
|
"valid_targets_min": 184
|
|
},
|
|
{
|
|
"epoch": 1.1141352063213346,
|
|
"grad_norm": 0.6869055964816603,
|
|
"learning_rate": 3.9578813526396855e-05,
|
|
"loss": 0.339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0994076132774353,
|
|
"step": 635,
|
|
"valid_targets_mean": 1960.6,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 1.1229148375768219,
|
|
"grad_norm": 0.7286253219130021,
|
|
"learning_rate": 3.9560766616778414e-05,
|
|
"loss": 0.3474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20293965935707092,
|
|
"step": 640,
|
|
"valid_targets_mean": 3433.0,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.131694468832309,
|
|
"grad_norm": 0.8224041563469209,
|
|
"learning_rate": 3.95423454293128e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15849584341049194,
|
|
"step": 645,
|
|
"valid_targets_mean": 2912.8,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 1.1404741000877963,
|
|
"grad_norm": 0.8378024155184985,
|
|
"learning_rate": 3.9523550316473016e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15086118876934052,
|
|
"step": 650,
|
|
"valid_targets_mean": 2632.1,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 1.1492537313432836,
|
|
"grad_norm": 0.712006236672746,
|
|
"learning_rate": 3.950438163788682e-05,
|
|
"loss": 0.3233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17964491248130798,
|
|
"step": 655,
|
|
"valid_targets_mean": 3271.9,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 1.1580333625987709,
|
|
"grad_norm": 0.7433054557932156,
|
|
"learning_rate": 3.94848397603298e-05,
|
|
"loss": 0.3432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15217933058738708,
|
|
"step": 660,
|
|
"valid_targets_mean": 2735.8,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.1668129938542582,
|
|
"grad_norm": 0.6394934710797812,
|
|
"learning_rate": 3.9464925057718354e-05,
|
|
"loss": 0.3332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16042321920394897,
|
|
"step": 665,
|
|
"valid_targets_mean": 3745.9,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 1.1755926251097453,
|
|
"grad_norm": 0.6436175724175055,
|
|
"learning_rate": 3.9444637911102614e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17277657985687256,
|
|
"step": 670,
|
|
"valid_targets_mean": 3997.4,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 1.1843722563652326,
|
|
"grad_norm": 0.8118010311912865,
|
|
"learning_rate": 3.942397870865904e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09649040549993515,
|
|
"step": 675,
|
|
"valid_targets_mean": 1486.1,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 1.1931518876207199,
|
|
"grad_norm": 0.7682552267143881,
|
|
"learning_rate": 3.940294784568308e-05,
|
|
"loss": 0.3432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1452936977148056,
|
|
"step": 680,
|
|
"valid_targets_mean": 2322.8,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 1.2019315188762072,
|
|
"grad_norm": 0.7318226890222886,
|
|
"learning_rate": 3.938154572458156e-05,
|
|
"loss": 0.3375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1942543238401413,
|
|
"step": 685,
|
|
"valid_targets_mean": 3165.0,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 1.2107111501316945,
|
|
"grad_norm": 0.7160347918557816,
|
|
"learning_rate": 3.935977275486501e-05,
|
|
"loss": 0.336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11846923828125,
|
|
"step": 690,
|
|
"valid_targets_mean": 1687.0,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 1.2194907813871818,
|
|
"grad_norm": 0.6735905922884531,
|
|
"learning_rate": 3.933762935313981e-05,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1669241189956665,
|
|
"step": 695,
|
|
"valid_targets_mean": 3487.4,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 1.2282704126426691,
|
|
"grad_norm": 0.9132447844550329,
|
|
"learning_rate": 3.931511594310023e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24320876598358154,
|
|
"step": 700,
|
|
"valid_targets_mean": 3073.1,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.2370500438981562,
|
|
"grad_norm": 0.9525979690696377,
|
|
"learning_rate": 3.929223295552032e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07115025073289871,
|
|
"step": 705,
|
|
"valid_targets_mean": 831.6,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 1.2458296751536435,
|
|
"grad_norm": 0.7517644219341493,
|
|
"learning_rate": 3.9268980828245667e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15075862407684326,
|
|
"step": 710,
|
|
"valid_targets_mean": 3593.6,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 1.2546093064091308,
|
|
"grad_norm": 1.3458428142156125,
|
|
"learning_rate": 3.924536000618501e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13517750799655914,
|
|
"step": 715,
|
|
"valid_targets_mean": 1862.2,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.2633889376646181,
|
|
"grad_norm": 0.874400179327945,
|
|
"learning_rate": 3.922137094130175e-05,
|
|
"loss": 0.3447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17271219193935394,
|
|
"step": 720,
|
|
"valid_targets_mean": 2219.0,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 1.2721685689201054,
|
|
"grad_norm": 0.7323861423954755,
|
|
"learning_rate": 3.919701409260528e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2270701825618744,
|
|
"step": 725,
|
|
"valid_targets_mean": 3927.0,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 1.2809482001755925,
|
|
"grad_norm": 0.5895559340319005,
|
|
"learning_rate": 3.917228992614219e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14756271243095398,
|
|
"step": 730,
|
|
"valid_targets_mean": 3690.6,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.2897278314310798,
|
|
"grad_norm": 0.8040724667929208,
|
|
"learning_rate": 3.91471989149874e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1643550843000412,
|
|
"step": 735,
|
|
"valid_targets_mean": 1964.0,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 1.2985074626865671,
|
|
"grad_norm": 0.7975135736112822,
|
|
"learning_rate": 3.912174153923506e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16638962924480438,
|
|
"step": 740,
|
|
"valid_targets_mean": 2437.5,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 1.3072870939420544,
|
|
"grad_norm": 0.7382638202656249,
|
|
"learning_rate": 3.909591828598936e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22248606383800507,
|
|
"step": 745,
|
|
"valid_targets_mean": 3534.9,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.3160667251975418,
|
|
"grad_norm": 0.8075903889889201,
|
|
"learning_rate": 3.906972964935528e-05,
|
|
"loss": 0.3402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15773217380046844,
|
|
"step": 750,
|
|
"valid_targets_mean": 2483.9,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.3248463564530288,
|
|
"grad_norm": 0.6205187477387278,
|
|
"learning_rate": 3.904317613042905e-05,
|
|
"loss": 0.3387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16767677664756775,
|
|
"step": 755,
|
|
"valid_targets_mean": 4421.6,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 1.3336259877085164,
|
|
"grad_norm": 0.80967688914638,
|
|
"learning_rate": 3.901625823728859e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18271921575069427,
|
|
"step": 760,
|
|
"valid_targets_mean": 2807.5,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 1.3424056189640035,
|
|
"grad_norm": 2.212794684437827,
|
|
"learning_rate": 3.8988976484983815e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1464945673942566,
|
|
"step": 765,
|
|
"valid_targets_mean": 2755.6,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 1.3511852502194908,
|
|
"grad_norm": 0.7156662584535772,
|
|
"learning_rate": 3.8961331395526765e-05,
|
|
"loss": 0.3325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10767711699008942,
|
|
"step": 770,
|
|
"valid_targets_mean": 2403.6,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.359964881474978,
|
|
"grad_norm": 0.645029889822078,
|
|
"learning_rate": 3.893332349788161e-05,
|
|
"loss": 0.3464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16532206535339355,
|
|
"step": 775,
|
|
"valid_targets_mean": 3648.1,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 1.3687445127304654,
|
|
"grad_norm": 0.6934432870232037,
|
|
"learning_rate": 3.89049533279545e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13451361656188965,
|
|
"step": 780,
|
|
"valid_targets_mean": 2477.0,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 1.3775241439859527,
|
|
"grad_norm": 0.5837298651394063,
|
|
"learning_rate": 3.8876221428583394e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1862315833568573,
|
|
"step": 785,
|
|
"valid_targets_mean": 4296.8,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.3863037752414398,
|
|
"grad_norm": 0.8534101532979382,
|
|
"learning_rate": 3.8847128349527576e-05,
|
|
"loss": 0.3204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1887424886226654,
|
|
"step": 790,
|
|
"valid_targets_mean": 2665.0,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 1.395083406496927,
|
|
"grad_norm": 0.6737437786447367,
|
|
"learning_rate": 3.881767464745722e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19340237975120544,
|
|
"step": 795,
|
|
"valid_targets_mean": 4150.0,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 1.4038630377524144,
|
|
"grad_norm": 0.6271055486559424,
|
|
"learning_rate": 3.878786088594268e-05,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17191752791404724,
|
|
"step": 800,
|
|
"valid_targets_mean": 3981.2,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 1.4126426690079017,
|
|
"grad_norm": 0.7190437895018112,
|
|
"learning_rate": 3.875768763544371e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19700069725513458,
|
|
"step": 805,
|
|
"valid_targets_mean": 4559.0,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.421422300263389,
|
|
"grad_norm": 0.9020850302025065,
|
|
"learning_rate": 3.87271554732986e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18454861640930176,
|
|
"step": 810,
|
|
"valid_targets_mean": 2386.5,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 1.430201931518876,
|
|
"grad_norm": 0.7256907655165707,
|
|
"learning_rate": 3.8696264983713076e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1251751333475113,
|
|
"step": 815,
|
|
"valid_targets_mean": 2455.4,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 1.4389815627743634,
|
|
"grad_norm": 0.7119626252490737,
|
|
"learning_rate": 3.866501675774914e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14000914990901947,
|
|
"step": 820,
|
|
"valid_targets_mean": 2235.5,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 1.4477611940298507,
|
|
"grad_norm": 0.7031774428666011,
|
|
"learning_rate": 3.863341139331378e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10570028424263,
|
|
"step": 825,
|
|
"valid_targets_mean": 1939.5,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 1.456540825285338,
|
|
"grad_norm": 0.7933070022544044,
|
|
"learning_rate": 3.860144949514747e-05,
|
|
"loss": 0.3267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14249233901500702,
|
|
"step": 830,
|
|
"valid_targets_mean": 1868.2,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.4653204565408253,
|
|
"grad_norm": 0.7151121142421881,
|
|
"learning_rate": 3.856913167481267e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1296643167734146,
|
|
"step": 835,
|
|
"valid_targets_mean": 2380.4,
|
|
"valid_targets_min": 217
|
|
},
|
|
{
|
|
"epoch": 1.4741000877963126,
|
|
"grad_norm": 0.8840713581523054,
|
|
"learning_rate": 3.8536458550682104e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10282494127750397,
|
|
"step": 840,
|
|
"valid_targets_mean": 1366.2,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 1.4828797190518,
|
|
"grad_norm": 0.6860738276072148,
|
|
"learning_rate": 3.850343074792689e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20660808682441711,
|
|
"step": 845,
|
|
"valid_targets_mean": 3317.8,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 1.491659350307287,
|
|
"grad_norm": 0.6873714048441973,
|
|
"learning_rate": 3.847004889850461e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23735255002975464,
|
|
"step": 850,
|
|
"valid_targets_mean": 4418.1,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 1.5004389815627743,
|
|
"grad_norm": 0.6893004178332652,
|
|
"learning_rate": 3.8436313641147236e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2183263897895813,
|
|
"step": 855,
|
|
"valid_targets_mean": 3539.6,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.5092186128182616,
|
|
"grad_norm": 0.730389299659962,
|
|
"learning_rate": 3.8402225621348863e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15245646238327026,
|
|
"step": 860,
|
|
"valid_targets_mean": 2724.4,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.517998244073749,
|
|
"grad_norm": 0.6736190766064171,
|
|
"learning_rate": 3.836778549135339e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1829409897327423,
|
|
"step": 865,
|
|
"valid_targets_mean": 3613.8,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 1.5267778753292363,
|
|
"grad_norm": 0.7180019523430965,
|
|
"learning_rate": 3.8332993910142026e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2356838881969452,
|
|
"step": 870,
|
|
"valid_targets_mean": 3081.1,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 1.5355575065847233,
|
|
"grad_norm": 0.7020060732056074,
|
|
"learning_rate": 3.829785154342069e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16169404983520508,
|
|
"step": 875,
|
|
"valid_targets_mean": 2567.4,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 1.5443371378402109,
|
|
"grad_norm": 0.7446053421935439,
|
|
"learning_rate": 3.8262359063607285e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20872217416763306,
|
|
"step": 880,
|
|
"valid_targets_mean": 3206.1,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.553116769095698,
|
|
"grad_norm": 0.6862793379990826,
|
|
"learning_rate": 3.822651714981878e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17758795619010925,
|
|
"step": 885,
|
|
"valid_targets_mean": 2799.4,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 1.5618964003511853,
|
|
"grad_norm": 0.7877484897140691,
|
|
"learning_rate": 3.8190326487858286e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14113770425319672,
|
|
"step": 890,
|
|
"valid_targets_mean": 2319.4,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 1.5706760316066726,
|
|
"grad_norm": 0.7927242837931733,
|
|
"learning_rate": 3.815378777020187e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19230298697948456,
|
|
"step": 895,
|
|
"valid_targets_mean": 2558.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 1.5794556628621597,
|
|
"grad_norm": 0.6903279024377412,
|
|
"learning_rate": 3.8116901695985374e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09861838817596436,
|
|
"step": 900,
|
|
"valid_targets_mean": 2197.0,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 1.5882352941176472,
|
|
"grad_norm": 0.7292112373546713,
|
|
"learning_rate": 3.8079668970990954e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2330915927886963,
|
|
"step": 905,
|
|
"valid_targets_mean": 2896.5,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 1.5970149253731343,
|
|
"grad_norm": 0.7375532311007227,
|
|
"learning_rate": 3.804209030763365e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19229960441589355,
|
|
"step": 910,
|
|
"valid_targets_mean": 2635.8,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 1.6057945566286216,
|
|
"grad_norm": 0.8498330810171704,
|
|
"learning_rate": 3.800416642494772e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12928307056427002,
|
|
"step": 915,
|
|
"valid_targets_mean": 1691.9,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 1.614574187884109,
|
|
"grad_norm": 0.6724158792055428,
|
|
"learning_rate": 3.796589804857287e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21746611595153809,
|
|
"step": 920,
|
|
"valid_targets_mean": 3560.2,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 1.623353819139596,
|
|
"grad_norm": 0.5673725869207022,
|
|
"learning_rate": 3.792728591074041e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12107379734516144,
|
|
"step": 925,
|
|
"valid_targets_mean": 3402.9,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.6321334503950835,
|
|
"grad_norm": 0.7069925592002697,
|
|
"learning_rate": 3.788833075025919e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16514700651168823,
|
|
"step": 930,
|
|
"valid_targets_mean": 2386.5,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 1.6409130816505706,
|
|
"grad_norm": 0.5945510782336139,
|
|
"learning_rate": 3.784903331250153e-05,
|
|
"loss": 0.3161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13952824473381042,
|
|
"step": 935,
|
|
"valid_targets_mean": 3267.1,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 1.649692712906058,
|
|
"grad_norm": 0.9470117901050872,
|
|
"learning_rate": 3.7809394349388895e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16546159982681274,
|
|
"step": 940,
|
|
"valid_targets_mean": 4001.9,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 1.6584723441615452,
|
|
"grad_norm": 1.3256385721462167,
|
|
"learning_rate": 3.776941461937753e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13232488930225372,
|
|
"step": 945,
|
|
"valid_targets_mean": 2329.9,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 1.6672519754170325,
|
|
"grad_norm": 0.66389442654088,
|
|
"learning_rate": 3.772909488744397e-05,
|
|
"loss": 0.3206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10045899450778961,
|
|
"step": 950,
|
|
"valid_targets_mean": 2304.8,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 1.6760316066725198,
|
|
"grad_norm": 0.8003202751907905,
|
|
"learning_rate": 3.768843592507037e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11091996729373932,
|
|
"step": 955,
|
|
"valid_targets_mean": 1653.0,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 1.684811237928007,
|
|
"grad_norm": 0.6701511352476314,
|
|
"learning_rate": 3.764743851022976e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16563698649406433,
|
|
"step": 960,
|
|
"valid_targets_mean": 2978.2,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 1.6935908691834944,
|
|
"grad_norm": 0.6625366454655057,
|
|
"learning_rate": 3.7606103427371165e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22625257074832916,
|
|
"step": 965,
|
|
"valid_targets_mean": 4278.1,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 1.7023705004389815,
|
|
"grad_norm": 0.8125684926044928,
|
|
"learning_rate": 3.756443146740457e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21559211611747742,
|
|
"step": 970,
|
|
"valid_targets_mean": 2246.2,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 1.7111501316944688,
|
|
"grad_norm": 0.7710369881648462,
|
|
"learning_rate": 3.752242342768583e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2436092644929886,
|
|
"step": 975,
|
|
"valid_targets_mean": 2984.6,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 1.7199297629499561,
|
|
"grad_norm": 0.6477678621617686,
|
|
"learning_rate": 3.748008011200135e-05,
|
|
"loss": 0.313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20803940296173096,
|
|
"step": 980,
|
|
"valid_targets_mean": 3642.8,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 1.7287093942054432,
|
|
"grad_norm": 0.6657496280517152,
|
|
"learning_rate": 3.743740233055277e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12804578244686127,
|
|
"step": 985,
|
|
"valid_targets_mean": 2221.4,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 1.7374890254609308,
|
|
"grad_norm": 0.8611246908495322,
|
|
"learning_rate": 3.739439089994143e-05,
|
|
"loss": 0.3387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13701793551445007,
|
|
"step": 990,
|
|
"valid_targets_mean": 2900.9,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 1.7462686567164178,
|
|
"grad_norm": 1.1657543891196207,
|
|
"learning_rate": 3.735104664315274e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15635094046592712,
|
|
"step": 995,
|
|
"valid_targets_mean": 1891.6,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 1.7550482879719052,
|
|
"grad_norm": 0.726898657751365,
|
|
"learning_rate": 3.7307370389540463e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442728042602539,
|
|
"step": 1000,
|
|
"valid_targets_mean": 2370.6,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 1.7638279192273925,
|
|
"grad_norm": 0.6003506901429891,
|
|
"learning_rate": 3.726336297481079e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16724348068237305,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3855.0,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 1.7726075504828798,
|
|
"grad_norm": 0.7870312679692962,
|
|
"learning_rate": 3.721902524100642e-05,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12998312711715698,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2186.8,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.781387181738367,
|
|
"grad_norm": 0.6678541696646165,
|
|
"learning_rate": 3.717435803649039e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06689616292715073,
|
|
"step": 1015,
|
|
"valid_targets_mean": 1442.9,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 1.7901668129938542,
|
|
"grad_norm": 0.6898521836933139,
|
|
"learning_rate": 3.712936221592989e-05,
|
|
"loss": 0.3332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16238541901111603,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3150.6,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 1.7989464442493417,
|
|
"grad_norm": 0.7771437790788652,
|
|
"learning_rate": 3.708403864027985e-05,
|
|
"loss": 0.3241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13155479729175568,
|
|
"step": 1025,
|
|
"valid_targets_mean": 2206.8,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 1.8077260755048288,
|
|
"grad_norm": 0.6534952422139094,
|
|
"learning_rate": 3.703838817676654e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15612106025218964,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3930.0,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 1.816505706760316,
|
|
"grad_norm": 0.8791616133256746,
|
|
"learning_rate": 3.699241169887092e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0937880352139473,
|
|
"step": 1035,
|
|
"valid_targets_mean": 1279.8,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 1.8252853380158034,
|
|
"grad_norm": 0.6399171372655785,
|
|
"learning_rate": 3.694611008631193e-05,
|
|
"loss": 0.3269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1465381532907486,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3074.2,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 1.8340649692712905,
|
|
"grad_norm": 0.6903210043747664,
|
|
"learning_rate": 3.68994842250297e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1216018944978714,
|
|
"step": 1045,
|
|
"valid_targets_mean": 2202.1,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 1.842844600526778,
|
|
"grad_norm": 1.6690932839104002,
|
|
"learning_rate": 3.685253500716855e-05,
|
|
"loss": 0.3328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09076965600252151,
|
|
"step": 1050,
|
|
"valid_targets_mean": 1485.9,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 1.851624231782265,
|
|
"grad_norm": 0.8499858278574126,
|
|
"learning_rate": 3.680526333105995e-05,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24876947700977325,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3152.8,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 1.8604038630377524,
|
|
"grad_norm": 0.6600113213874604,
|
|
"learning_rate": 3.6757670101205326e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19562354683876038,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3505.4,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 1.8691834942932397,
|
|
"grad_norm": 0.6644007899337387,
|
|
"learning_rate": 3.6709756228258735e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12714380025863647,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2561.6,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 1.8779631255487268,
|
|
"grad_norm": 0.9366046724359708,
|
|
"learning_rate": 3.666152262900946e-05,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1646752655506134,
|
|
"step": 1070,
|
|
"valid_targets_mean": 2748.4,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.8867427568042143,
|
|
"grad_norm": 0.7132903562291403,
|
|
"learning_rate": 3.661297022636446e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18244531750679016,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3372.2,
|
|
"valid_targets_min": 195
|
|
},
|
|
{
|
|
"epoch": 1.8955223880597014,
|
|
"grad_norm": 0.8517616604775224,
|
|
"learning_rate": 3.656409994933073e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22797738015651703,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4430.1,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 1.9043020193151887,
|
|
"grad_norm": 0.5928389674603858,
|
|
"learning_rate": 3.6514912732997475e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2125198245048523,
|
|
"step": 1085,
|
|
"valid_targets_mean": 5836.5,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 1.913081650570676,
|
|
"grad_norm": 0.6480835767671451,
|
|
"learning_rate": 3.646540951851829e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18813969194889069,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3124.1,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 1.9218612818261633,
|
|
"grad_norm": 0.7365627063699877,
|
|
"learning_rate": 3.641559125309308e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21435289084911346,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3572.8,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.9306409130816506,
|
|
"grad_norm": 0.5588476524235052,
|
|
"learning_rate": 3.636545888994999e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19628357887268066,
|
|
"step": 1100,
|
|
"valid_targets_mean": 5029.0,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 1.9394205443371377,
|
|
"grad_norm": 0.7200678433199417,
|
|
"learning_rate": 3.631501338832712e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18402661383152008,
|
|
"step": 1105,
|
|
"valid_targets_mean": 2395.1,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 1.9482001755926253,
|
|
"grad_norm": 0.7219897175963325,
|
|
"learning_rate": 3.626425571345425e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1533927321434021,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2343.4,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 1.9569798068481123,
|
|
"grad_norm": 0.6134680529549916,
|
|
"learning_rate": 3.621318683653425e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1267402023077011,
|
|
"step": 1115,
|
|
"valid_targets_mean": 2345.8,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.9657594381035997,
|
|
"grad_norm": 0.9874416684003995,
|
|
"learning_rate": 3.6161807734724594e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15197385847568512,
|
|
"step": 1120,
|
|
"valid_targets_mean": 2811.9,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 1.974539069359087,
|
|
"grad_norm": 0.6644292119424218,
|
|
"learning_rate": 3.6110119391118646e-05,
|
|
"loss": 0.3345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571672201156616,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3664.5,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.983318700614574,
|
|
"grad_norm": 0.7719754907399858,
|
|
"learning_rate": 3.6058122794726826e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19154061377048492,
|
|
"step": 1130,
|
|
"valid_targets_mean": 2354.5,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 1.9920983318700616,
|
|
"grad_norm": 0.9114309226464474,
|
|
"learning_rate": 3.600581894045768e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13695700466632843,
|
|
"step": 1135,
|
|
"valid_targets_mean": 2639.0,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 1.2914762909704565,
|
|
"learning_rate": 3.59532088290989e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26202312111854553,
|
|
"step": 1140,
|
|
"valid_targets_mean": 1359.6,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 2.008779631255487,
|
|
"grad_norm": 0.6081223497773917,
|
|
"learning_rate": 3.590029346729808e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12928320467472076,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2820.0,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 2.0175592625109746,
|
|
"grad_norm": 0.7320731594992131,
|
|
"learning_rate": 3.5847073867543556e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14755654335021973,
|
|
"step": 1150,
|
|
"valid_targets_mean": 2646.4,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 2.0263388937664617,
|
|
"grad_norm": 0.6949642152596395,
|
|
"learning_rate": 3.579355104814497e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1986621916294098,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3490.5,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 2.0351185250219492,
|
|
"grad_norm": 0.8505678300053415,
|
|
"learning_rate": 3.5739726033213785e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1399223506450653,
|
|
"step": 1160,
|
|
"valid_targets_mean": 1704.2,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 2.0438981562774363,
|
|
"grad_norm": 0.6212802346767822,
|
|
"learning_rate": 3.5685599852643734e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17662954330444336,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4680.8,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 2.0526777875329234,
|
|
"grad_norm": 0.8059953616004798,
|
|
"learning_rate": 3.563117354209107e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15564998984336853,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3216.6,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 2.061457418788411,
|
|
"grad_norm": 0.9568065723354683,
|
|
"learning_rate": 3.557644814295477e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16575339436531067,
|
|
"step": 1175,
|
|
"valid_targets_mean": 1826.0,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 2.070237050043898,
|
|
"grad_norm": 0.6719671096224339,
|
|
"learning_rate": 3.552142470235661e-05,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19325456023216248,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3607.0,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 2.0790166812993855,
|
|
"grad_norm": 0.7474817412554579,
|
|
"learning_rate": 3.546610427312112e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13441506028175354,
|
|
"step": 1185,
|
|
"valid_targets_mean": 2038.8,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 2.0877963125548726,
|
|
"grad_norm": 0.9242135205193751,
|
|
"learning_rate": 3.541048791375542e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417178213596344,
|
|
"step": 1190,
|
|
"valid_targets_mean": 1751.0,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 2.09657594381036,
|
|
"grad_norm": 0.8918443687477924,
|
|
"learning_rate": 3.535457668842904e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14685913920402527,
|
|
"step": 1195,
|
|
"valid_targets_mean": 1678.1,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 2.1053555750658473,
|
|
"grad_norm": 0.6964162845630637,
|
|
"learning_rate": 3.5298371666953466e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15706263482570648,
|
|
"step": 1200,
|
|
"valid_targets_mean": 2868.9,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 2.1141352063213343,
|
|
"grad_norm": 0.7167070827307819,
|
|
"learning_rate": 3.524187392476172e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16277706623077393,
|
|
"step": 1205,
|
|
"valid_targets_mean": 2353.8,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 2.122914837576822,
|
|
"grad_norm": 0.8105196123823601,
|
|
"learning_rate": 3.5185084542887776e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11192994564771652,
|
|
"step": 1210,
|
|
"valid_targets_mean": 2002.2,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 2.131694468832309,
|
|
"grad_norm": 0.7679097379423208,
|
|
"learning_rate": 3.512800460794589e-05,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1804812103509903,
|
|
"step": 1215,
|
|
"valid_targets_mean": 2551.1,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 2.1404741000877965,
|
|
"grad_norm": 0.701188193332664,
|
|
"learning_rate": 3.507063521210977e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16829782724380493,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3159.9,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 2.1492537313432836,
|
|
"grad_norm": 0.6701988518699779,
|
|
"learning_rate": 3.5012977453091715e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09936836361885071,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2033.2,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 2.1580333625987707,
|
|
"grad_norm": 0.7555331651384278,
|
|
"learning_rate": 3.495503243412157e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08086525648832321,
|
|
"step": 1230,
|
|
"valid_targets_mean": 1276.1,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 2.166812993854258,
|
|
"grad_norm": 1.0503553922452193,
|
|
"learning_rate": 3.4896801263925685e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134367898106575,
|
|
"step": 1235,
|
|
"valid_targets_mean": 1603.0,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 2.1755926251097453,
|
|
"grad_norm": 1.1185558334057126,
|
|
"learning_rate": 3.483828505670563e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09935767948627472,
|
|
"step": 1240,
|
|
"valid_targets_mean": 1370.1,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 2.184372256365233,
|
|
"grad_norm": 0.9632501603992967,
|
|
"learning_rate": 3.4779484932116916e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09757079184055328,
|
|
"step": 1245,
|
|
"valid_targets_mean": 1834.5,
|
|
"valid_targets_min": 174
|
|
},
|
|
{
|
|
"epoch": 2.19315188762072,
|
|
"grad_norm": 1.2195737596795637,
|
|
"learning_rate": 3.4720402015247555e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1475660800933838,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2027.6,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 2.201931518876207,
|
|
"grad_norm": 0.7005254213722892,
|
|
"learning_rate": 3.4661037436596526e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08502741903066635,
|
|
"step": 1255,
|
|
"valid_targets_mean": 1701.0,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 2.2107111501316945,
|
|
"grad_norm": 0.7536798541171645,
|
|
"learning_rate": 3.460139233205219e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09824533760547638,
|
|
"step": 1260,
|
|
"valid_targets_mean": 1317.5,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 2.2194907813871816,
|
|
"grad_norm": 0.6366007961303759,
|
|
"learning_rate": 3.4541467842870476e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13212904334068298,
|
|
"step": 1265,
|
|
"valid_targets_mean": 2226.0,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 2.228270412642669,
|
|
"grad_norm": 0.7736240617088077,
|
|
"learning_rate": 3.448126511565312e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1782141923904419,
|
|
"step": 1270,
|
|
"valid_targets_mean": 2714.0,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 2.237050043898156,
|
|
"grad_norm": 0.6231636131152412,
|
|
"learning_rate": 3.44207853023257e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17305819690227509,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3670.9,
|
|
"valid_targets_min": 1481
|
|
},
|
|
{
|
|
"epoch": 2.2458296751536437,
|
|
"grad_norm": 0.7135091148529686,
|
|
"learning_rate": 3.436002956011556e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14135125279426575,
|
|
"step": 1280,
|
|
"valid_targets_mean": 2338.2,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 2.254609306409131,
|
|
"grad_norm": 0.7182490222093894,
|
|
"learning_rate": 3.429899905152972e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11369633674621582,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2355.4,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 2.263388937664618,
|
|
"grad_norm": 0.6390381675204151,
|
|
"learning_rate": 3.423769494433261e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1359964907169342,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3101.9,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 2.2721685689201054,
|
|
"grad_norm": 0.7511174571996146,
|
|
"learning_rate": 3.417611841152371e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259186863899231,
|
|
"step": 1295,
|
|
"valid_targets_mean": 2865.4,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 2.2809482001755925,
|
|
"grad_norm": 0.8231320982404742,
|
|
"learning_rate": 3.411427063131514e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12391317635774612,
|
|
"step": 1300,
|
|
"valid_targets_mean": 2249.5,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 2.28972783143108,
|
|
"grad_norm": 0.6510421632888698,
|
|
"learning_rate": 3.405215278710908e-05,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15197333693504333,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2728.5,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 2.298507462686567,
|
|
"grad_norm": 0.7001937104291418,
|
|
"learning_rate": 3.3989766067475164e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1537274271249771,
|
|
"step": 1310,
|
|
"valid_targets_mean": 2937.6,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 2.3072870939420547,
|
|
"grad_norm": 0.592104266514974,
|
|
"learning_rate": 3.3927111666127695e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13895496726036072,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3689.1,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 2.3160667251975418,
|
|
"grad_norm": 0.7973048014131275,
|
|
"learning_rate": 3.386419078190285e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1715036928653717,
|
|
"step": 1320,
|
|
"valid_targets_mean": 2656.9,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 2.324846356453029,
|
|
"grad_norm": 0.7250857235390548,
|
|
"learning_rate": 3.3801004618735696e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16707965731620789,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3347.6,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 2.3336259877085164,
|
|
"grad_norm": 0.6736399660327292,
|
|
"learning_rate": 3.37375543856372e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2123875617980957,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3108.9,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 2.3424056189640035,
|
|
"grad_norm": 0.6367145688503011,
|
|
"learning_rate": 3.367384129667105e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1458130180835724,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3328.2,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 2.3511852502194905,
|
|
"grad_norm": 0.6530574758994966,
|
|
"learning_rate": 3.3609866570930454e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13274237513542175,
|
|
"step": 1340,
|
|
"valid_targets_mean": 2562.5,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 2.359964881474978,
|
|
"grad_norm": 0.7471079637872791,
|
|
"learning_rate": 3.354563143251483e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1927621215581894,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3474.5,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 2.368744512730465,
|
|
"grad_norm": 0.8177865697582912,
|
|
"learning_rate": 3.3481137110506305e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1289474070072174,
|
|
"step": 1350,
|
|
"valid_targets_mean": 1818.8,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 2.3775241439859527,
|
|
"grad_norm": 0.6741042511353594,
|
|
"learning_rate": 3.341638483894633e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14325489103794098,
|
|
"step": 1355,
|
|
"valid_targets_mean": 2550.8,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 2.3863037752414398,
|
|
"grad_norm": 0.8301400900085244,
|
|
"learning_rate": 3.335137585681191e-05,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12955839931964874,
|
|
"step": 1360,
|
|
"valid_targets_mean": 2651.1,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 2.3950834064969273,
|
|
"grad_norm": 0.8166303783162531,
|
|
"learning_rate": 3.328611140799204e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386774480342865,
|
|
"step": 1365,
|
|
"valid_targets_mean": 2298.6,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 2.4038630377524144,
|
|
"grad_norm": 0.6998317511319757,
|
|
"learning_rate": 3.322059274126381e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24538111686706543,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3212.6,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 2.4126426690079015,
|
|
"grad_norm": 0.7407628155650371,
|
|
"learning_rate": 3.315482111026856e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1348312348127365,
|
|
"step": 1375,
|
|
"valid_targets_mean": 2065.2,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 2.421422300263389,
|
|
"grad_norm": 0.7434906822530881,
|
|
"learning_rate": 3.308879777348787e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1282220333814621,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2049.0,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 2.430201931518876,
|
|
"grad_norm": 0.7331753804064851,
|
|
"learning_rate": 3.3022523994219464e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13682827353477478,
|
|
"step": 1385,
|
|
"valid_targets_mean": 2182.0,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 2.4389815627743636,
|
|
"grad_norm": 0.6202599311182682,
|
|
"learning_rate": 3.295600104055311e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12925368547439575,
|
|
"step": 1390,
|
|
"valid_targets_mean": 3058.5,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 2.4477611940298507,
|
|
"grad_norm": 0.7569744680441515,
|
|
"learning_rate": 3.288923018534626e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1741652935743332,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4308.2,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 2.4565408252853382,
|
|
"grad_norm": 0.7026758382210729,
|
|
"learning_rate": 3.282221270619976e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15387719869613647,
|
|
"step": 1400,
|
|
"valid_targets_mean": 2861.9,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 2.4653204565408253,
|
|
"grad_norm": 0.8509969136596509,
|
|
"learning_rate": 3.2754949885433384e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12103115767240524,
|
|
"step": 1405,
|
|
"valid_targets_mean": 1685.8,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 2.4741000877963124,
|
|
"grad_norm": 0.6962647438171071,
|
|
"learning_rate": 3.268744301006131e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12691012024879456,
|
|
"step": 1410,
|
|
"valid_targets_mean": 2315.4,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 2.4828797190518,
|
|
"grad_norm": 0.7893800414741867,
|
|
"learning_rate": 3.261969337176745e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12834975123405457,
|
|
"step": 1415,
|
|
"valid_targets_mean": 2328.9,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 2.491659350307287,
|
|
"grad_norm": 1.0699336382871667,
|
|
"learning_rate": 3.2551702266880804e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16013789176940918,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3309.5,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 2.500438981562774,
|
|
"grad_norm": 0.6997094175553933,
|
|
"learning_rate": 3.248347099635061e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1283465325832367,
|
|
"step": 1425,
|
|
"valid_targets_mean": 2400.4,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 2.5092186128182616,
|
|
"grad_norm": 0.7284876309120639,
|
|
"learning_rate": 3.2415000865721456e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1515335887670517,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2209.1,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 2.517998244073749,
|
|
"grad_norm": 0.7691887349551787,
|
|
"learning_rate": 3.234629318510831e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1613338440656662,
|
|
"step": 1435,
|
|
"valid_targets_mean": 2750.5,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 2.5267778753292363,
|
|
"grad_norm": 0.7342202054748506,
|
|
"learning_rate": 3.2277349269171435e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12877315282821655,
|
|
"step": 1440,
|
|
"valid_targets_mean": 2197.2,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 2.5355575065847233,
|
|
"grad_norm": 0.644890318945179,
|
|
"learning_rate": 3.2208170437091267e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1004171296954155,
|
|
"step": 1445,
|
|
"valid_targets_mean": 2255.4,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 2.544337137840211,
|
|
"grad_norm": 0.7531758227366692,
|
|
"learning_rate": 3.213875801254314e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15978612005710602,
|
|
"step": 1450,
|
|
"valid_targets_mean": 2803.5,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 2.553116769095698,
|
|
"grad_norm": 0.6587689330819685,
|
|
"learning_rate": 3.206911332367197e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12457109987735748,
|
|
"step": 1455,
|
|
"valid_targets_mean": 2444.6,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.561896400351185,
|
|
"grad_norm": 0.6234088753409978,
|
|
"learning_rate": 3.199923770306686e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11557507514953613,
|
|
"step": 1460,
|
|
"valid_targets_mean": 2531.4,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 2.5706760316066726,
|
|
"grad_norm": 0.6620013423297401,
|
|
"learning_rate": 3.192913248773556e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14569777250289917,
|
|
"step": 1465,
|
|
"valid_targets_mean": 2821.1,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 2.5794556628621597,
|
|
"grad_norm": 0.6701368749095417,
|
|
"learning_rate": 3.1858799019078934e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0978354811668396,
|
|
"step": 1470,
|
|
"valid_targets_mean": 1593.2,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 2.588235294117647,
|
|
"grad_norm": 0.9089295720684336,
|
|
"learning_rate": 3.178823864286526e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.141740620136261,
|
|
"step": 1475,
|
|
"valid_targets_mean": 2340.8,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 2.5970149253731343,
|
|
"grad_norm": 0.6004311207892523,
|
|
"learning_rate": 3.1717452709204496e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16977515816688538,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4458.5,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 2.605794556628622,
|
|
"grad_norm": 0.6405649969254633,
|
|
"learning_rate": 3.1646442572522434e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15832647681236267,
|
|
"step": 1485,
|
|
"valid_targets_mean": 2947.5,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 2.614574187884109,
|
|
"grad_norm": 0.7121699840006188,
|
|
"learning_rate": 3.157520959153478e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18201756477355957,
|
|
"step": 1490,
|
|
"valid_targets_mean": 2536.9,
|
|
"valid_targets_min": 336
|
|
},
|
|
{
|
|
"epoch": 2.623353819139596,
|
|
"grad_norm": 0.6680605157309508,
|
|
"learning_rate": 3.1503755129221203e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18417486548423767,
|
|
"step": 1495,
|
|
"valid_targets_mean": 3527.2,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 2.6321334503950835,
|
|
"grad_norm": 0.7317595548856303,
|
|
"learning_rate": 3.1432080552799186e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09091081470251083,
|
|
"step": 1500,
|
|
"valid_targets_mean": 1580.8,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 2.6409130816505706,
|
|
"grad_norm": 0.6652666425255042,
|
|
"learning_rate": 3.136018723369791e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10995607823133469,
|
|
"step": 1505,
|
|
"valid_targets_mean": 2561.8,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 2.6496927129060577,
|
|
"grad_norm": 0.7738227342668844,
|
|
"learning_rate": 3.128807654753201e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19539481401443481,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3084.2,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 2.658472344161545,
|
|
"grad_norm": 0.5959629114876459,
|
|
"learning_rate": 3.121574987407522e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16805897653102875,
|
|
"step": 1515,
|
|
"valid_targets_mean": 3821.1,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 2.6672519754170327,
|
|
"grad_norm": 0.6743680422962108,
|
|
"learning_rate": 3.114320859723404e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14956681430339813,
|
|
"step": 1520,
|
|
"valid_targets_mean": 2740.2,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 2.67603160667252,
|
|
"grad_norm": 0.6404574785446321,
|
|
"learning_rate": 3.107045410502118e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10677892714738846,
|
|
"step": 1525,
|
|
"valid_targets_mean": 2518.1,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 2.684811237928007,
|
|
"grad_norm": 0.5926158486875281,
|
|
"learning_rate": 3.099748778952907e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1655695140361786,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4260.5,
|
|
"valid_targets_min": 1328
|
|
},
|
|
{
|
|
"epoch": 2.6935908691834944,
|
|
"grad_norm": 0.6716727811431811,
|
|
"learning_rate": 3.092431104690313e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14358776807785034,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3112.6,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 2.7023705004389815,
|
|
"grad_norm": 0.7528149428484703,
|
|
"learning_rate": 3.0850925277315193e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1425172984600067,
|
|
"step": 1540,
|
|
"valid_targets_mean": 2271.1,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 2.7111501316944686,
|
|
"grad_norm": 0.7550479725316581,
|
|
"learning_rate": 3.077733188493658e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12961634993553162,
|
|
"step": 1545,
|
|
"valid_targets_mean": 1772.5,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 2.719929762949956,
|
|
"grad_norm": 0.574473978686262,
|
|
"learning_rate": 3.07035322779113e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1449478566646576,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3720.5,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 2.7287093942054432,
|
|
"grad_norm": 0.6748930787947507,
|
|
"learning_rate": 3.062952786832912e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16915255784988403,
|
|
"step": 1555,
|
|
"valid_targets_mean": 2829.0,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 2.7374890254609308,
|
|
"grad_norm": 0.6732144505525535,
|
|
"learning_rate": 3.0555320072198497e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1620289385318756,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3061.5,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 2.746268656716418,
|
|
"grad_norm": 0.6722852626612,
|
|
"learning_rate": 3.0480910309419515e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717466950416565,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3850.9,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 2.7550482879719054,
|
|
"grad_norm": 0.6465043716047759,
|
|
"learning_rate": 3.040630000375671e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16820460557937622,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3183.4,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 2.7638279192273925,
|
|
"grad_norm": 0.7495995972046035,
|
|
"learning_rate": 3.0331490582811826e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1770264208316803,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2845.9,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 2.7726075504828795,
|
|
"grad_norm": 0.6358761576635404,
|
|
"learning_rate": 3.025648347799651e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1723857969045639,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3254.6,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 2.781387181738367,
|
|
"grad_norm": 0.7499833522365696,
|
|
"learning_rate": 3.018128012450491e-05,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19977837800979614,
|
|
"step": 1585,
|
|
"valid_targets_mean": 2824.9,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 2.790166812993854,
|
|
"grad_norm": 0.832597725890587,
|
|
"learning_rate": 3.01058819612862e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21979543566703796,
|
|
"step": 1590,
|
|
"valid_targets_mean": 2422.8,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 2.7989464442493417,
|
|
"grad_norm": 0.7306386865706785,
|
|
"learning_rate": 3.0030290431017088e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15561433136463165,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2134.2,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 2.8077260755048288,
|
|
"grad_norm": 0.882607650674962,
|
|
"learning_rate": 2.995450698007418e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1960475593805313,
|
|
"step": 1600,
|
|
"valid_targets_mean": 2221.2,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 2.8165057067603163,
|
|
"grad_norm": 0.6836834331337271,
|
|
"learning_rate": 2.9878533058506306e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12447787821292877,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2320.9,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 2.8252853380158034,
|
|
"grad_norm": 0.6269855399345545,
|
|
"learning_rate": 2.980237012000679e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12269158661365509,
|
|
"step": 1610,
|
|
"valid_targets_mean": 2350.8,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 2.8340649692712905,
|
|
"grad_norm": 0.6481580872891736,
|
|
"learning_rate": 2.972601962188563e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16077366471290588,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2871.8,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 2.842844600526778,
|
|
"grad_norm": 0.815434069979512,
|
|
"learning_rate": 2.9649483025041602e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20713718235492706,
|
|
"step": 1620,
|
|
"valid_targets_mean": 2663.2,
|
|
"valid_targets_min": 198
|
|
},
|
|
{
|
|
"epoch": 2.851624231782265,
|
|
"grad_norm": 0.6406039651312607,
|
|
"learning_rate": 2.957276179393432e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18860052525997162,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4025.0,
|
|
"valid_targets_min": 2472
|
|
},
|
|
{
|
|
"epoch": 2.860403863037752,
|
|
"grad_norm": 0.6627793586224608,
|
|
"learning_rate": 2.9495857396556203e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13482844829559326,
|
|
"step": 1630,
|
|
"valid_targets_mean": 2634.2,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 2.8691834942932397,
|
|
"grad_norm": 0.6668355344984993,
|
|
"learning_rate": 2.9418771304404408e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13088376820087433,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2870.5,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 2.877963125548727,
|
|
"grad_norm": 0.8694246526720847,
|
|
"learning_rate": 2.9341504992452642e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21429385244846344,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2322.9,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 2.8867427568042143,
|
|
"grad_norm": 0.6836442304510056,
|
|
"learning_rate": 2.9264059939122967e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12531031668186188,
|
|
"step": 1645,
|
|
"valid_targets_mean": 2444.2,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 2.8955223880597014,
|
|
"grad_norm": 0.6472009472418728,
|
|
"learning_rate": 2.9186437626257506e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17106696963310242,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3177.9,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 2.904302019315189,
|
|
"grad_norm": 0.8040497876648083,
|
|
"learning_rate": 2.910863953909008e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1079612672328949,
|
|
"step": 1655,
|
|
"valid_targets_mean": 2471.8,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 2.913081650570676,
|
|
"grad_norm": 0.6604289359346474,
|
|
"learning_rate": 2.903066716621779e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.165900319814682,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3262.6,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 2.921861281826163,
|
|
"grad_norm": 0.9037523594698488,
|
|
"learning_rate": 2.8952521999572548e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1632218360900879,
|
|
"step": 1665,
|
|
"valid_targets_mean": 1943.5,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 2.9306409130816506,
|
|
"grad_norm": 0.692263890236419,
|
|
"learning_rate": 2.887420553439252e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09210682660341263,
|
|
"step": 1670,
|
|
"valid_targets_mean": 1597.8,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 2.9394205443371377,
|
|
"grad_norm": 0.6996278875201919,
|
|
"learning_rate": 2.8795719269193503e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22036971151828766,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3300.5,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 2.9482001755926253,
|
|
"grad_norm": 0.6886583940629769,
|
|
"learning_rate": 2.8717064705740284e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15170887112617493,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2790.6,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 2.9569798068481123,
|
|
"grad_norm": 0.6401537905580414,
|
|
"learning_rate": 2.8638243349017884e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14476624131202698,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3032.0,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 2.9657594381036,
|
|
"grad_norm": 0.6606606858412375,
|
|
"learning_rate": 2.8559256707202755e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16166868805885315,
|
|
"step": 1690,
|
|
"valid_targets_mean": 2992.8,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 2.974539069359087,
|
|
"grad_norm": 0.6768423783330191,
|
|
"learning_rate": 2.8480106291633937e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13472245633602142,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3002.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 2.983318700614574,
|
|
"grad_norm": 0.7818056003477117,
|
|
"learning_rate": 2.840079361678414e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12717778980731964,
|
|
"step": 1700,
|
|
"valid_targets_mean": 1817.1,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 2.9920983318700616,
|
|
"grad_norm": 0.6968791516222846,
|
|
"learning_rate": 2.832132020023075e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15311039984226227,
|
|
"step": 1705,
|
|
"valid_targets_mean": 2335.8,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.906029385805221,
|
|
"learning_rate": 2.8241687562626814e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30455461144447327,
|
|
"step": 1710,
|
|
"valid_targets_mean": 2508.0,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.008779631255487,
|
|
"grad_norm": 0.6528173943815724,
|
|
"learning_rate": 2.8161897227671927e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19603696465492249,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3719.8,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 3.0175592625109746,
|
|
"grad_norm": 0.6877592906959695,
|
|
"learning_rate": 2.8081950722083075e-05,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13602039217948914,
|
|
"step": 1720,
|
|
"valid_targets_mean": 2923.6,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 3.0263388937664617,
|
|
"grad_norm": 0.7632442544162501,
|
|
"learning_rate": 2.800184957556545e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14628666639328003,
|
|
"step": 1725,
|
|
"valid_targets_mean": 2021.0,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 3.0351185250219492,
|
|
"grad_norm": 0.7306546001662461,
|
|
"learning_rate": 2.792159532078314e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10861854255199432,
|
|
"step": 1730,
|
|
"valid_targets_mean": 2592.0,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 3.0438981562774363,
|
|
"grad_norm": 0.7305399933658803,
|
|
"learning_rate": 2.784118949332983e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1272508203983307,
|
|
"step": 1735,
|
|
"valid_targets_mean": 2632.4,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 3.0526777875329234,
|
|
"grad_norm": 0.7539449582456732,
|
|
"learning_rate": 2.7760633631699415e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15514838695526123,
|
|
"step": 1740,
|
|
"valid_targets_mean": 2476.9,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 3.061457418788411,
|
|
"grad_norm": 0.8006095899181362,
|
|
"learning_rate": 2.7679929277256562e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15670982003211975,
|
|
"step": 1745,
|
|
"valid_targets_mean": 2279.1,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 3.070237050043898,
|
|
"grad_norm": 0.7573492369114504,
|
|
"learning_rate": 2.75990779742072e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14672686159610748,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2408.0,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 3.0790166812993855,
|
|
"grad_norm": 0.6769500565811741,
|
|
"learning_rate": 2.7518081269569023e-05,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15137916803359985,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3361.5,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 3.0877963125548726,
|
|
"grad_norm": 0.7121849733959531,
|
|
"learning_rate": 2.743694071314182e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20783770084381104,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3720.2,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 3.09657594381036,
|
|
"grad_norm": 0.56880048066459,
|
|
"learning_rate": 2.735565785747787e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11025272309780121,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3735.1,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 3.1053555750658473,
|
|
"grad_norm": 0.6338953072202468,
|
|
"learning_rate": 2.7274234257852218e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06821245700120926,
|
|
"step": 1770,
|
|
"valid_targets_mean": 1538.0,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 3.1141352063213343,
|
|
"grad_norm": 0.6607672053099521,
|
|
"learning_rate": 2.7192671472232935e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14984208345413208,
|
|
"step": 1775,
|
|
"valid_targets_mean": 2830.2,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 3.122914837576822,
|
|
"grad_norm": 0.6975109199450099,
|
|
"learning_rate": 2.711097106125127e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1469304859638214,
|
|
"step": 1780,
|
|
"valid_targets_mean": 2440.0,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 3.131694468832309,
|
|
"grad_norm": 0.7758331617542179,
|
|
"learning_rate": 2.7029134588171816e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10406479984521866,
|
|
"step": 1785,
|
|
"valid_targets_mean": 1412.4,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 3.1404741000877965,
|
|
"grad_norm": 0.7850156043698459,
|
|
"learning_rate": 2.6947163618862604e-05,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09685981273651123,
|
|
"step": 1790,
|
|
"valid_targets_mean": 1575.8,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 3.1492537313432836,
|
|
"grad_norm": 0.6394577921161971,
|
|
"learning_rate": 2.6865059721765115e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14530502259731293,
|
|
"step": 1795,
|
|
"valid_targets_mean": 2996.0,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 3.1580333625987707,
|
|
"grad_norm": 0.6923338466894932,
|
|
"learning_rate": 2.6782824467864296e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16780512034893036,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3936.0,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 3.166812993854258,
|
|
"grad_norm": 0.7299270925412854,
|
|
"learning_rate": 2.6700459430658497e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1248893067240715,
|
|
"step": 1805,
|
|
"valid_targets_mean": 2540.0,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.1755926251097453,
|
|
"grad_norm": 0.7197762531343881,
|
|
"learning_rate": 2.6617966186129332e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09952201694250107,
|
|
"step": 1810,
|
|
"valid_targets_mean": 1852.5,
|
|
"valid_targets_min": 400
|
|
},
|
|
{
|
|
"epoch": 3.184372256365233,
|
|
"grad_norm": 0.7778247716107792,
|
|
"learning_rate": 2.653534631271157e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14819130301475525,
|
|
"step": 1815,
|
|
"valid_targets_mean": 2348.9,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 3.19315188762072,
|
|
"grad_norm": 0.7503702117563548,
|
|
"learning_rate": 2.6452601391262896e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.130610853433609,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2097.0,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 3.201931518876207,
|
|
"grad_norm": 0.7361135423364917,
|
|
"learning_rate": 2.6369733005033693e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18000631034374237,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3482.2,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 3.2107111501316945,
|
|
"grad_norm": 0.6410413340677296,
|
|
"learning_rate": 2.6286742739636717e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13205955922603607,
|
|
"step": 1830,
|
|
"valid_targets_mean": 2551.2,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 3.2194907813871816,
|
|
"grad_norm": 0.5509625820177909,
|
|
"learning_rate": 2.6203632183016778e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18069367110729218,
|
|
"step": 1835,
|
|
"valid_targets_mean": 4945.2,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 3.228270412642669,
|
|
"grad_norm": 0.7023316129920258,
|
|
"learning_rate": 2.6120402925420355e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08991207927465439,
|
|
"step": 1840,
|
|
"valid_targets_mean": 1417.9,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 3.237050043898156,
|
|
"grad_norm": 0.790882572427964,
|
|
"learning_rate": 2.603705655936517e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1655176430940628,
|
|
"step": 1845,
|
|
"valid_targets_mean": 2694.5,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 3.2458296751536437,
|
|
"grad_norm": 0.6825121522903894,
|
|
"learning_rate": 2.5953594679609694e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13159725069999695,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2917.4,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 3.254609306409131,
|
|
"grad_norm": 0.6335278054499885,
|
|
"learning_rate": 2.5870018883122664e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11341814696788788,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3224.6,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.263388937664618,
|
|
"grad_norm": 0.6933474760766404,
|
|
"learning_rate": 2.5786330769052515e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15624383091926575,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3866.4,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 3.2721685689201054,
|
|
"grad_norm": 0.6546625656662219,
|
|
"learning_rate": 2.570253193869676e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1615406572818756,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3517.5,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 3.2809482001755925,
|
|
"grad_norm": 0.7201288327895353,
|
|
"learning_rate": 2.5618623995471394e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19307760894298553,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3809.1,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 3.28972783143108,
|
|
"grad_norm": 0.7632791427368979,
|
|
"learning_rate": 2.553460854488016e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15899285674095154,
|
|
"step": 1875,
|
|
"valid_targets_mean": 2430.0,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 3.298507462686567,
|
|
"grad_norm": 0.6738359699217487,
|
|
"learning_rate": 2.5450487194483895e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11192188411951065,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2538.5,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 3.3072870939420547,
|
|
"grad_norm": 0.6565521445573096,
|
|
"learning_rate": 2.53662615538697e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12946268916130066,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2590.8,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 3.3160667251975418,
|
|
"grad_norm": 0.6740972144735248,
|
|
"learning_rate": 2.5281933234620194e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1356610357761383,
|
|
"step": 1890,
|
|
"valid_targets_mean": 2970.6,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 3.324846356453029,
|
|
"grad_norm": 0.6367360377621945,
|
|
"learning_rate": 2.519750385028267e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11617624759674072,
|
|
"step": 1895,
|
|
"valid_targets_mean": 2733.0,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 3.3336259877085164,
|
|
"grad_norm": 0.6936079952331635,
|
|
"learning_rate": 2.511297501633818e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11958629637956619,
|
|
"step": 1900,
|
|
"valid_targets_mean": 2372.8,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 3.3424056189640035,
|
|
"grad_norm": 0.7255111705113595,
|
|
"learning_rate": 2.502834835017069e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09466154873371124,
|
|
"step": 1905,
|
|
"valid_targets_mean": 1770.2,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 3.3511852502194905,
|
|
"grad_norm": 1.22620625108739,
|
|
"learning_rate": 2.494362547103608e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15390850603580475,
|
|
"step": 1910,
|
|
"valid_targets_mean": 2303.2,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 3.359964881474978,
|
|
"grad_norm": 0.6413848401173828,
|
|
"learning_rate": 2.4858808000031186e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1162773072719574,
|
|
"step": 1915,
|
|
"valid_targets_mean": 2372.9,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 3.368744512730465,
|
|
"grad_norm": 0.6600307681181364,
|
|
"learning_rate": 2.477389756006276e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1289825439453125,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2793.9,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 3.3775241439859527,
|
|
"grad_norm": 0.7280502851359809,
|
|
"learning_rate": 2.4688895775816453e-05,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18088440597057343,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3029.0,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 3.3863037752414398,
|
|
"grad_norm": 0.7219375871734786,
|
|
"learning_rate": 2.4603804273725698e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10799458622932434,
|
|
"step": 1930,
|
|
"valid_targets_mean": 2289.1,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 3.3950834064969273,
|
|
"grad_norm": 0.6867974682234831,
|
|
"learning_rate": 2.45186246819406e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1022118553519249,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2208.4,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 3.4038630377524144,
|
|
"grad_norm": 0.6712379040786958,
|
|
"learning_rate": 2.4433358630296776e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07032618671655655,
|
|
"step": 1940,
|
|
"valid_targets_mean": 1582.2,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 3.4126426690079015,
|
|
"grad_norm": 0.7209901836153916,
|
|
"learning_rate": 2.434800775028419e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10483679175376892,
|
|
"step": 1945,
|
|
"valid_targets_mean": 1918.1,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 3.421422300263389,
|
|
"grad_norm": 0.7160118382361751,
|
|
"learning_rate": 2.4262573675015907e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15090754628181458,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3027.4,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 3.430201931518876,
|
|
"grad_norm": 0.9768660268890813,
|
|
"learning_rate": 2.4177058039196864e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24464115500450134,
|
|
"step": 1955,
|
|
"valid_targets_mean": 2027.8,
|
|
"valid_targets_min": 206
|
|
},
|
|
{
|
|
"epoch": 3.4389815627743636,
|
|
"grad_norm": 0.7600977959322206,
|
|
"learning_rate": 2.409146247909259e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1299198865890503,
|
|
"step": 1960,
|
|
"valid_targets_mean": 2868.1,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 3.4477611940298507,
|
|
"grad_norm": 0.7355732805281157,
|
|
"learning_rate": 2.400578863249789e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11989326775074005,
|
|
"step": 1965,
|
|
"valid_targets_mean": 2383.8,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 3.4565408252853382,
|
|
"grad_norm": 0.7253720301411531,
|
|
"learning_rate": 2.392003813870551e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07000908255577087,
|
|
"step": 1970,
|
|
"valid_targets_mean": 1503.0,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 3.4653204565408253,
|
|
"grad_norm": 0.6495789773872109,
|
|
"learning_rate": 2.3834212638474773e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17093773186206818,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3516.5,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 3.4741000877963124,
|
|
"grad_norm": 0.6063927056850842,
|
|
"learning_rate": 2.3748313774000188e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10629069805145264,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3250.8,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 3.4828797190518,
|
|
"grad_norm": 0.647339553147698,
|
|
"learning_rate": 2.366234318888002e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14240597188472748,
|
|
"step": 1985,
|
|
"valid_targets_mean": 2587.6,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 3.491659350307287,
|
|
"grad_norm": 0.6462595773735463,
|
|
"learning_rate": 2.357630252808484e-05,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1366937756538391,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3140.8,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 3.500438981562774,
|
|
"grad_norm": 0.6628886535109224,
|
|
"learning_rate": 2.3490193437926058e-05,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13515321910381317,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3315.9,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 3.5092186128182616,
|
|
"grad_norm": 0.6582731991389928,
|
|
"learning_rate": 2.3404017566024435e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17607031762599945,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3346.8,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 3.517998244073749,
|
|
"grad_norm": 0.7188528428362767,
|
|
"learning_rate": 2.331777656127851e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10719118267297745,
|
|
"step": 2005,
|
|
"valid_targets_mean": 1754.9,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 3.5267778753292363,
|
|
"grad_norm": 1.2704977656864123,
|
|
"learning_rate": 2.32314720738331e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0975673645734787,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2367.5,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 3.5355575065847233,
|
|
"grad_norm": 0.6775402761606267,
|
|
"learning_rate": 2.314510575504771e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18409329652786255,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3623.1,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 3.544337137840211,
|
|
"grad_norm": 4.0231562047505,
|
|
"learning_rate": 2.3058679257464923e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14902639389038086,
|
|
"step": 2020,
|
|
"valid_targets_mean": 2766.2,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 3.553116769095698,
|
|
"grad_norm": 0.672661668565957,
|
|
"learning_rate": 2.2972194234778794e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11060202866792679,
|
|
"step": 2025,
|
|
"valid_targets_mean": 2751.2,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 3.561896400351185,
|
|
"grad_norm": 0.6869748550844412,
|
|
"learning_rate": 2.28856523418032e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19246426224708557,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3731.2,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 3.5706760316066726,
|
|
"grad_norm": 0.8541677470800584,
|
|
"learning_rate": 2.2799055234440194e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04141032323241234,
|
|
"step": 2035,
|
|
"valid_targets_mean": 751.6,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 3.5794556628621597,
|
|
"grad_norm": 0.6478353764436875,
|
|
"learning_rate": 2.2712404569648282e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12456832826137543,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2769.4,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 3.588235294117647,
|
|
"grad_norm": 0.693663752075535,
|
|
"learning_rate": 2.2625702005410767e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.166174054145813,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3585.0,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 3.5970149253731343,
|
|
"grad_norm": 0.7280439930739538,
|
|
"learning_rate": 2.2538949200703995e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16141310334205627,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3090.5,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 3.605794556628622,
|
|
"grad_norm": 0.7092006603343005,
|
|
"learning_rate": 2.245214781546561e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15805339813232422,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3881.9,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 3.614574187884109,
|
|
"grad_norm": 0.6184694306839825,
|
|
"learning_rate": 2.236529951056281e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15235979855060577,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3185.4,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 3.623353819139596,
|
|
"grad_norm": 0.6630922438512215,
|
|
"learning_rate": 2.227840594776056e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2044130563735962,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3474.5,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 3.6321334503950835,
|
|
"grad_norm": 0.7394342646202355,
|
|
"learning_rate": 2.2191468789689794e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15666668117046356,
|
|
"step": 2070,
|
|
"valid_targets_mean": 2878.1,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 3.6409130816505706,
|
|
"grad_norm": 0.6379959213387669,
|
|
"learning_rate": 2.2104489699815587e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1134752705693245,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2776.6,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 3.6496927129060577,
|
|
"grad_norm": 0.6490141275963978,
|
|
"learning_rate": 2.201747034240537e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12108826637268066,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2621.5,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 3.658472344161545,
|
|
"grad_norm": 0.8485870834155551,
|
|
"learning_rate": 2.193041238249703e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17688895761966705,
|
|
"step": 2085,
|
|
"valid_targets_mean": 2229.5,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 3.6672519754170327,
|
|
"grad_norm": 0.6594026868058694,
|
|
"learning_rate": 2.18433174858671e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09432391822338104,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2203.1,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 3.67603160667252,
|
|
"grad_norm": 0.6958992206798327,
|
|
"learning_rate": 2.175618731899885e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15622082352638245,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3529.2,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 3.684811237928007,
|
|
"grad_norm": 0.6569958593713059,
|
|
"learning_rate": 2.1669023549050414e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1552051603794098,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3034.1,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 3.6935908691834944,
|
|
"grad_norm": 0.8064608857298108,
|
|
"learning_rate": 2.1581827843822914e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17251284420490265,
|
|
"step": 2105,
|
|
"valid_targets_mean": 1919.6,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 3.7023705004389815,
|
|
"grad_norm": 0.579481427692816,
|
|
"learning_rate": 2.149460187172849e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13674451410770416,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3313.4,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 3.7111501316944686,
|
|
"grad_norm": 0.6251099320786633,
|
|
"learning_rate": 2.1407347301758438e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11723032593727112,
|
|
"step": 2115,
|
|
"valid_targets_mean": 2754.8,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 3.719929762949956,
|
|
"grad_norm": 0.6271657702476694,
|
|
"learning_rate": 2.132006580345124e-05,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17898300290107727,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4212.1,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 3.7287093942054432,
|
|
"grad_norm": 1.2063212412872577,
|
|
"learning_rate": 2.123275904686062e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14543330669403076,
|
|
"step": 2125,
|
|
"valid_targets_mean": 2322.8,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 3.7374890254609308,
|
|
"grad_norm": 0.6524063415409936,
|
|
"learning_rate": 2.1145428702523606e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13566343486309052,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3723.2,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 3.746268656716418,
|
|
"grad_norm": 0.745766853095709,
|
|
"learning_rate": 2.1058076441428566e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14592021703720093,
|
|
"step": 2135,
|
|
"valid_targets_mean": 2859.5,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 3.7550482879719054,
|
|
"grad_norm": 0.6211239459782089,
|
|
"learning_rate": 2.0970703934983208e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14989614486694336,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3311.2,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 3.7638279192273925,
|
|
"grad_norm": 0.7039894945382092,
|
|
"learning_rate": 2.0883312854982622e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10648761689662933,
|
|
"step": 2145,
|
|
"valid_targets_mean": 1820.6,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 3.7726075504828795,
|
|
"grad_norm": 0.776412032824677,
|
|
"learning_rate": 2.0795904873577286e-05,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1812201291322708,
|
|
"step": 2150,
|
|
"valid_targets_mean": 2610.0,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 3.781387181738367,
|
|
"grad_norm": 0.6928618595080953,
|
|
"learning_rate": 2.0708481663241083e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1698172688484192,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4289.9,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 3.790166812993854,
|
|
"grad_norm": 0.6162785417942847,
|
|
"learning_rate": 2.0621044896739265e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10653495788574219,
|
|
"step": 2160,
|
|
"valid_targets_mean": 2876.8,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 3.7989464442493417,
|
|
"grad_norm": 0.8494097654129158,
|
|
"learning_rate": 2.0533596247096494e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13757789134979248,
|
|
"step": 2165,
|
|
"valid_targets_mean": 1408.2,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 3.8077260755048288,
|
|
"grad_norm": 0.7584488129341072,
|
|
"learning_rate": 2.044613738756479e-05,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19383209943771362,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3003.4,
|
|
"valid_targets_min": 400
|
|
},
|
|
{
|
|
"epoch": 3.8165057067603163,
|
|
"grad_norm": 0.6892644645538327,
|
|
"learning_rate": 2.0358669991591528e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1747075617313385,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3437.2,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 3.8252853380158034,
|
|
"grad_norm": 0.7029113425577824,
|
|
"learning_rate": 2.0271195732787433e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15954367816448212,
|
|
"step": 2180,
|
|
"valid_targets_mean": 2964.6,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 3.8340649692712905,
|
|
"grad_norm": 0.5568520693021984,
|
|
"learning_rate": 2.0183716284894533e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09529216587543488,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3077.4,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 3.842844600526778,
|
|
"grad_norm": 0.5893262728047062,
|
|
"learning_rate": 2.009623332175415e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13605013489723206,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3435.9,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 3.851624231782265,
|
|
"grad_norm": 0.6610842592068026,
|
|
"learning_rate": 2.000874851727487e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1244860291481018,
|
|
"step": 2195,
|
|
"valid_targets_mean": 2527.6,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 3.860403863037752,
|
|
"grad_norm": 0.7588669770678698,
|
|
"learning_rate": 1.99212635454005e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18913835287094116,
|
|
"step": 2200,
|
|
"valid_targets_mean": 2793.2,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 3.8691834942932397,
|
|
"grad_norm": 0.7043789547607171,
|
|
"learning_rate": 1.9833780080078063e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14726075530052185,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3010.0,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.877963125548727,
|
|
"grad_norm": 0.7439353958943266,
|
|
"learning_rate": 1.974629979522575e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07626357674598694,
|
|
"step": 2210,
|
|
"valid_targets_mean": 1294.8,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 3.8867427568042143,
|
|
"grad_norm": 0.6691861414106299,
|
|
"learning_rate": 1.9658824364700885e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18047642707824707,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4364.6,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 3.8955223880597014,
|
|
"grad_norm": 0.6554848683089958,
|
|
"learning_rate": 1.9571355462267944e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1555202752351761,
|
|
"step": 2220,
|
|
"valid_targets_mean": 2805.5,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 3.904302019315189,
|
|
"grad_norm": 0.7032652446219116,
|
|
"learning_rate": 1.9483894761566444e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12984727323055267,
|
|
"step": 2225,
|
|
"valid_targets_mean": 2371.9,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 3.913081650570676,
|
|
"grad_norm": 0.7085784164441687,
|
|
"learning_rate": 1.939644393607901e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10806065797805786,
|
|
"step": 2230,
|
|
"valid_targets_mean": 1751.9,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 3.921861281826163,
|
|
"grad_norm": 0.584747436827857,
|
|
"learning_rate": 1.9309004659099293e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1404782384634018,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3760.2,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 3.9306409130816506,
|
|
"grad_norm": 0.7431653151997626,
|
|
"learning_rate": 1.9221578603699988e-05,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16026908159255981,
|
|
"step": 2240,
|
|
"valid_targets_mean": 2453.6,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 3.9394205443371377,
|
|
"grad_norm": 0.6953565059065165,
|
|
"learning_rate": 1.913416744270078e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1488170325756073,
|
|
"step": 2245,
|
|
"valid_targets_mean": 2772.5,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 3.9482001755926253,
|
|
"grad_norm": 0.7302731272285674,
|
|
"learning_rate": 1.90467728486364e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1455080509185791,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2555.9,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 3.9569798068481123,
|
|
"grad_norm": 0.6455104190645108,
|
|
"learning_rate": 1.8959396493724554e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1445830762386322,
|
|
"step": 2255,
|
|
"valid_targets_mean": 2830.9,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 3.9657594381036,
|
|
"grad_norm": 0.6410265586778108,
|
|
"learning_rate": 1.8872040049833978e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14826273918151855,
|
|
"step": 2260,
|
|
"valid_targets_mean": 2757.5,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 3.974539069359087,
|
|
"grad_norm": 0.6117163419902112,
|
|
"learning_rate": 1.878470518845241e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16641898453235626,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3956.6,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 3.983318700614574,
|
|
"grad_norm": 0.597694266427677,
|
|
"learning_rate": 1.8697393580654654e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1622926890850067,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3878.5,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 3.9920983318700616,
|
|
"grad_norm": 0.8532684194280901,
|
|
"learning_rate": 1.861010689707054e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1656481921672821,
|
|
"step": 2275,
|
|
"valid_targets_mean": 2321.5,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.9790532102809725,
|
|
"learning_rate": 1.8522846807853036e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2912352681159973,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3215.4,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 4.0087796312554875,
|
|
"grad_norm": 0.6392252494182268,
|
|
"learning_rate": 1.8435614982646207e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09085570275783539,
|
|
"step": 2285,
|
|
"valid_targets_mean": 1737.1,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 4.017559262510974,
|
|
"grad_norm": 0.7286653971421071,
|
|
"learning_rate": 1.8348413090553356e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11699149757623672,
|
|
"step": 2290,
|
|
"valid_targets_mean": 2551.0,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 4.026338893766462,
|
|
"grad_norm": 0.6823741889167877,
|
|
"learning_rate": 1.8261242800105007e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15201464295387268,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3257.4,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 4.035118525021949,
|
|
"grad_norm": 0.6705005981342328,
|
|
"learning_rate": 1.8174105779227038e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.100313201546669,
|
|
"step": 2300,
|
|
"valid_targets_mean": 2275.0,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 4.043898156277437,
|
|
"grad_norm": 0.701024971544971,
|
|
"learning_rate": 1.8087003695208725e-05,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1007407009601593,
|
|
"step": 2305,
|
|
"valid_targets_mean": 2350.0,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 4.052677787532923,
|
|
"grad_norm": 0.7208846241166222,
|
|
"learning_rate": 1.7999938214670876e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13938146829605103,
|
|
"step": 2310,
|
|
"valid_targets_mean": 2802.0,
|
|
"valid_targets_min": 204
|
|
},
|
|
{
|
|
"epoch": 4.061457418788411,
|
|
"grad_norm": 0.6204986932650736,
|
|
"learning_rate": 1.79129110035339e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0741071030497551,
|
|
"step": 2315,
|
|
"valid_targets_mean": 2757.0,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 4.0702370500438985,
|
|
"grad_norm": 0.7859762819167806,
|
|
"learning_rate": 1.7825923726985988e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16365136206150055,
|
|
"step": 2320,
|
|
"valid_targets_mean": 2803.0,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 4.079016681299385,
|
|
"grad_norm": 0.6780058759632916,
|
|
"learning_rate": 1.773897804945119e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1530340313911438,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3254.8,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 4.087796312554873,
|
|
"grad_norm": 0.7020001697658454,
|
|
"learning_rate": 1.7652075634557614e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1309017539024353,
|
|
"step": 2330,
|
|
"valid_targets_mean": 2455.6,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 4.09657594381036,
|
|
"grad_norm": 0.6705300953226101,
|
|
"learning_rate": 1.7565218145105555e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11941774189472198,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3112.5,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 4.105355575065847,
|
|
"grad_norm": 0.6214190326364721,
|
|
"learning_rate": 1.747840724303573e-05,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07592988014221191,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2224.6,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 4.114135206321334,
|
|
"grad_norm": 0.6418403341699956,
|
|
"learning_rate": 1.7391644589397415e-05,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1077946275472641,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3065.6,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 4.122914837576822,
|
|
"grad_norm": 0.79570753302041,
|
|
"learning_rate": 1.7304931844316725e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1317206174135208,
|
|
"step": 2350,
|
|
"valid_targets_mean": 2216.9,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 4.131694468832309,
|
|
"grad_norm": 0.6269102739221749,
|
|
"learning_rate": 1.721827066696479e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14147669076919556,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3818.5,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 4.140474100087796,
|
|
"grad_norm": 0.6893942855615022,
|
|
"learning_rate": 1.7131662715526078e-05,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10011757910251617,
|
|
"step": 2360,
|
|
"valid_targets_mean": 2585.5,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 4.149253731343284,
|
|
"grad_norm": 0.7375728051981385,
|
|
"learning_rate": 1.704510964716657e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11609027534723282,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2346.4,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 4.158033362598771,
|
|
"grad_norm": 0.6604230326378752,
|
|
"learning_rate": 1.6958613118002162e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10893774777650833,
|
|
"step": 2370,
|
|
"valid_targets_mean": 2711.4,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 4.166812993854258,
|
|
"grad_norm": 0.7504657714615721,
|
|
"learning_rate": 1.6872174783066892e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11733004450798035,
|
|
"step": 2375,
|
|
"valid_targets_mean": 2687.0,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 4.175592625109745,
|
|
"grad_norm": 0.7854578672276143,
|
|
"learning_rate": 1.6785796296281317e-05,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12715031206607819,
|
|
"step": 2380,
|
|
"valid_targets_mean": 2342.8,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 4.184372256365233,
|
|
"grad_norm": 0.7135212993985506,
|
|
"learning_rate": 1.6699479310420832e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10799732059240341,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2352.4,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 4.19315188762072,
|
|
"grad_norm": 0.7768478830718591,
|
|
"learning_rate": 1.6613225477084105e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11679384112358093,
|
|
"step": 2390,
|
|
"valid_targets_mean": 2532.5,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 4.201931518876207,
|
|
"grad_norm": 0.7119006162102695,
|
|
"learning_rate": 1.6527036446661396e-05,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09228062629699707,
|
|
"step": 2395,
|
|
"valid_targets_mean": 1934.5,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 4.2107111501316945,
|
|
"grad_norm": 0.8846751301704164,
|
|
"learning_rate": 1.644091386830305e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11417409032583237,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3120.9,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 4.219490781387182,
|
|
"grad_norm": 0.8276181656189628,
|
|
"learning_rate": 1.635485938988788e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16342443227767944,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3246.5,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 4.228270412642669,
|
|
"grad_norm": 0.6756310370616314,
|
|
"learning_rate": 1.6268874657991705e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14770348370075226,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3341.6,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 4.237050043898156,
|
|
"grad_norm": 0.6771410270255209,
|
|
"learning_rate": 1.6182961317855772e-05,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10546547919511795,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2947.1,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 4.245829675153644,
|
|
"grad_norm": 0.6604923172481514,
|
|
"learning_rate": 1.6097121013355334e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1368059515953064,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3151.1,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 4.254609306409131,
|
|
"grad_norm": 0.8747748555449678,
|
|
"learning_rate": 1.6011355386968155e-05,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11443006992340088,
|
|
"step": 2425,
|
|
"valid_targets_mean": 1556.0,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 4.263388937664618,
|
|
"grad_norm": 0.7431622641560798,
|
|
"learning_rate": 1.5925666079743123e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09761566668748856,
|
|
"step": 2430,
|
|
"valid_targets_mean": 1788.5,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 4.272168568920105,
|
|
"grad_norm": 0.6403974548281689,
|
|
"learning_rate": 1.5840054731268787e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.116920605301857,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3454.2,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 4.280948200175593,
|
|
"grad_norm": 0.7179048334435277,
|
|
"learning_rate": 1.575452297964207e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15091872215270996,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3041.9,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 4.28972783143108,
|
|
"grad_norm": 0.6590286480714271,
|
|
"learning_rate": 1.566907246143685e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13773122429847717,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3258.5,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 4.298507462686567,
|
|
"grad_norm": 0.8083733162700167,
|
|
"learning_rate": 1.5583704811672692e-05,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14040136337280273,
|
|
"step": 2450,
|
|
"valid_targets_mean": 2158.8,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 4.307287093942055,
|
|
"grad_norm": 0.5859490763103692,
|
|
"learning_rate": 1.5498421663783527e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.175615131855011,
|
|
"step": 2455,
|
|
"valid_targets_mean": 5350.4,
|
|
"valid_targets_min": 4215
|
|
},
|
|
{
|
|
"epoch": 4.316066725197541,
|
|
"grad_norm": 0.7682461192990887,
|
|
"learning_rate": 1.541322464958645e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924751102924347,
|
|
"step": 2460,
|
|
"valid_targets_mean": 2125.2,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 4.324846356453029,
|
|
"grad_norm": 0.6975523334633853,
|
|
"learning_rate": 1.532811539925043e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1547008752822876,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3794.1,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 4.333625987708516,
|
|
"grad_norm": 0.8385096043959949,
|
|
"learning_rate": 1.5243095541265181e-05,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13358618319034576,
|
|
"step": 2470,
|
|
"valid_targets_mean": 2688.4,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 4.342405618964004,
|
|
"grad_norm": 0.7427657051494995,
|
|
"learning_rate": 1.5158166702409944e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12327609211206436,
|
|
"step": 2475,
|
|
"valid_targets_mean": 2138.8,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 4.3511852502194905,
|
|
"grad_norm": 0.6284973642423284,
|
|
"learning_rate": 1.507333050772243e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.136694997549057,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4143.6,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 4.359964881474978,
|
|
"grad_norm": 0.8014010892311436,
|
|
"learning_rate": 1.4988588580467652e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16160011291503906,
|
|
"step": 2485,
|
|
"valid_targets_mean": 2347.2,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 4.368744512730466,
|
|
"grad_norm": 0.7781907413752885,
|
|
"learning_rate": 1.490394254210691e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09884107112884521,
|
|
"step": 2490,
|
|
"valid_targets_mean": 1908.1,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 4.377524143985952,
|
|
"grad_norm": 0.656944302494803,
|
|
"learning_rate": 1.4819394012266761e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11444749683141708,
|
|
"step": 2495,
|
|
"valid_targets_mean": 3649.0,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 4.38630377524144,
|
|
"grad_norm": 0.7271695303903445,
|
|
"learning_rate": 1.4734944608708022e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10323821008205414,
|
|
"step": 2500,
|
|
"valid_targets_mean": 2477.2,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 4.395083406496927,
|
|
"grad_norm": 0.6178056239054158,
|
|
"learning_rate": 1.4650595947294802e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15808966755867004,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4097.6,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 4.403863037752414,
|
|
"grad_norm": 0.7367532418281286,
|
|
"learning_rate": 1.4566349641963623e-05,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16247165203094482,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3132.9,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 4.4126426690079015,
|
|
"grad_norm": 0.6180729813031709,
|
|
"learning_rate": 1.4482207304692499e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1650177538394928,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3827.1,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 4.421422300263389,
|
|
"grad_norm": 0.8647698386769249,
|
|
"learning_rate": 1.4398170545470116e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10669723153114319,
|
|
"step": 2520,
|
|
"valid_targets_mean": 1769.5,
|
|
"valid_targets_min": 176
|
|
},
|
|
{
|
|
"epoch": 4.4302019315188765,
|
|
"grad_norm": 0.6235563624819768,
|
|
"learning_rate": 1.4314240972264997e-05,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10156850516796112,
|
|
"step": 2525,
|
|
"valid_targets_mean": 2913.8,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 4.438981562774363,
|
|
"grad_norm": 0.768324822269462,
|
|
"learning_rate": 1.4230420190994794e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14304345846176147,
|
|
"step": 2530,
|
|
"valid_targets_mean": 2697.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 4.447761194029851,
|
|
"grad_norm": 0.6882005904128705,
|
|
"learning_rate": 1.4146709805495484e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1533280313014984,
|
|
"step": 2535,
|
|
"valid_targets_mean": 3195.8,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 4.456540825285338,
|
|
"grad_norm": 0.6596939525524491,
|
|
"learning_rate": 1.4063111417490748e-05,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10306426882743835,
|
|
"step": 2540,
|
|
"valid_targets_mean": 2782.6,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 4.465320456540825,
|
|
"grad_norm": 0.7569604781931635,
|
|
"learning_rate": 1.3979626626561271e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19596469402313232,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3228.6,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 4.474100087796312,
|
|
"grad_norm": 0.6685484849888578,
|
|
"learning_rate": 1.3896257030114186e-05,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14732101559638977,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3051.2,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 4.4828797190518,
|
|
"grad_norm": 0.7082642550779661,
|
|
"learning_rate": 1.3813004223352448e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12138574570417404,
|
|
"step": 2555,
|
|
"valid_targets_mean": 2587.4,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 4.4916593503072875,
|
|
"grad_norm": 0.7057406538200612,
|
|
"learning_rate": 1.3729869799244382e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14132285118103027,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3056.2,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 4.500438981562774,
|
|
"grad_norm": 0.6880514476850277,
|
|
"learning_rate": 1.3646855348493132e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11824949085712433,
|
|
"step": 2565,
|
|
"valid_targets_mean": 2404.4,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 4.509218612818262,
|
|
"grad_norm": 0.9650986934176399,
|
|
"learning_rate": 1.356396245950629e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1354825794696808,
|
|
"step": 2570,
|
|
"valid_targets_mean": 2611.9,
|
|
"valid_targets_min": 232
|
|
},
|
|
{
|
|
"epoch": 4.517998244073749,
|
|
"grad_norm": 0.7086491500122325,
|
|
"learning_rate": 1.3481192718365446e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1408531367778778,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2771.4,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 4.526777875329236,
|
|
"grad_norm": 0.6278341574797347,
|
|
"learning_rate": 1.3398547708795885e-05,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11719765514135361,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3049.8,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 4.535557506584723,
|
|
"grad_norm": 0.6274236653303449,
|
|
"learning_rate": 1.3316029012136251e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13181564211845398,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3290.0,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 4.544337137840211,
|
|
"grad_norm": 0.6942267163635538,
|
|
"learning_rate": 1.3233638207308314e-05,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13729172945022583,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3127.4,
|
|
"valid_targets_min": 336
|
|
},
|
|
{
|
|
"epoch": 4.553116769095698,
|
|
"grad_norm": 0.7229354694579719,
|
|
"learning_rate": 1.3151376870786731e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.138467937707901,
|
|
"step": 2595,
|
|
"valid_targets_mean": 2960.4,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 4.561896400351185,
|
|
"grad_norm": 0.7074926380400757,
|
|
"learning_rate": 1.3069246576568926e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12922507524490356,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3241.1,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 4.570676031606673,
|
|
"grad_norm": 0.7599271325837954,
|
|
"learning_rate": 1.2987248896144915e-05,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07306542992591858,
|
|
"step": 2605,
|
|
"valid_targets_mean": 1332.4,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 4.57945566286216,
|
|
"grad_norm": 0.6764837603356156,
|
|
"learning_rate": 1.2905385398467288e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07156918942928314,
|
|
"step": 2610,
|
|
"valid_targets_mean": 1427.8,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 4.588235294117647,
|
|
"grad_norm": 0.6960238939271556,
|
|
"learning_rate": 1.2823657649921151e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13218584656715393,
|
|
"step": 2615,
|
|
"valid_targets_mean": 2626.9,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 4.597014925373134,
|
|
"grad_norm": 0.7160071114217643,
|
|
"learning_rate": 1.2742067214294193e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11780767142772675,
|
|
"step": 2620,
|
|
"valid_targets_mean": 2101.5,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 4.605794556628622,
|
|
"grad_norm": 0.6664647765491197,
|
|
"learning_rate": 1.2660615652746718e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15881705284118652,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3631.8,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 4.614574187884109,
|
|
"grad_norm": 0.7364684940644115,
|
|
"learning_rate": 1.2579304523781821e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16874048113822937,
|
|
"step": 2630,
|
|
"valid_targets_mean": 2910.0,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 4.623353819139596,
|
|
"grad_norm": 0.6515212457593923,
|
|
"learning_rate": 1.2498135383215527e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13133281469345093,
|
|
"step": 2635,
|
|
"valid_targets_mean": 2961.9,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 4.6321334503950835,
|
|
"grad_norm": 0.6071757254078883,
|
|
"learning_rate": 1.2417109784147056e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12967140972614288,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3554.2,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 4.640913081650571,
|
|
"grad_norm": 0.6519480287922598,
|
|
"learning_rate": 1.2336229276929062e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16746513545513153,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3352.6,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 4.649692712906058,
|
|
"grad_norm": 0.6513947546076504,
|
|
"learning_rate": 1.2255495409138031e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14567255973815918,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3192.1,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 4.658472344161545,
|
|
"grad_norm": 0.7208905002616658,
|
|
"learning_rate": 1.2174909725544608e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10772593319416046,
|
|
"step": 2655,
|
|
"valid_targets_mean": 2845.0,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 4.667251975417033,
|
|
"grad_norm": 0.6673726702940969,
|
|
"learning_rate": 1.2094473768084078e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14405329525470734,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3214.5,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 4.676031606672519,
|
|
"grad_norm": 0.7030442075023986,
|
|
"learning_rate": 1.2014189075826836e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10953042656183243,
|
|
"step": 2665,
|
|
"valid_targets_mean": 2419.8,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 4.684811237928007,
|
|
"grad_norm": 0.6362013376818757,
|
|
"learning_rate": 1.1934057184948977e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10492923855781555,
|
|
"step": 2670,
|
|
"valid_targets_mean": 2947.4,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 4.693590869183494,
|
|
"grad_norm": 0.7151441888636099,
|
|
"learning_rate": 1.1854079628702853e-05,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1462075114250183,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3638.5,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 4.702370500438981,
|
|
"grad_norm": 0.7790554912915496,
|
|
"learning_rate": 1.1774257937387774e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09585490822792053,
|
|
"step": 2680,
|
|
"valid_targets_mean": 1553.5,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.711150131694469,
|
|
"grad_norm": 0.7438717267311862,
|
|
"learning_rate": 1.1694593638320701e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13532283902168274,
|
|
"step": 2685,
|
|
"valid_targets_mean": 2421.6,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 4.719929762949956,
|
|
"grad_norm": 0.7372756557789699,
|
|
"learning_rate": 1.1615088255807052e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06966099143028259,
|
|
"step": 2690,
|
|
"valid_targets_mean": 1160.6,
|
|
"valid_targets_min": 213
|
|
},
|
|
{
|
|
"epoch": 4.728709394205444,
|
|
"grad_norm": 0.6617478381593381,
|
|
"learning_rate": 1.1535743311111503e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14491809904575348,
|
|
"step": 2695,
|
|
"valid_targets_mean": 3587.5,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 4.73748902546093,
|
|
"grad_norm": 0.6592967200321452,
|
|
"learning_rate": 1.145656032242891e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11863560229539871,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3069.0,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 4.746268656716418,
|
|
"grad_norm": 0.7279123332794823,
|
|
"learning_rate": 1.1377540804855216e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1743847131729126,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3329.8,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 4.755048287971905,
|
|
"grad_norm": 0.6994091860911027,
|
|
"learning_rate": 1.1298686270358542e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12104357033967972,
|
|
"step": 2710,
|
|
"valid_targets_mean": 2895.9,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 4.763827919227392,
|
|
"grad_norm": 0.7242830532594046,
|
|
"learning_rate": 1.1219998227750141e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12763358652591705,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3106.4,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.7726075504828795,
|
|
"grad_norm": 0.7775802986907712,
|
|
"learning_rate": 1.1141478182655643e-05,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12781354784965515,
|
|
"step": 2720,
|
|
"valid_targets_mean": 2978.6,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 4.781387181738367,
|
|
"grad_norm": 0.6840960445709268,
|
|
"learning_rate": 1.106312763748617e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13776427507400513,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3079.0,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 4.790166812993855,
|
|
"grad_norm": 0.7792917901924852,
|
|
"learning_rate": 1.0984948091409617e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0997919887304306,
|
|
"step": 2730,
|
|
"valid_targets_mean": 1797.4,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 4.798946444249341,
|
|
"grad_norm": 0.6529345631105553,
|
|
"learning_rate": 1.0906941040321973e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12581142783164978,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3234.2,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 4.807726075504829,
|
|
"grad_norm": 0.7951275057972814,
|
|
"learning_rate": 1.0829107976818686e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07908165454864502,
|
|
"step": 2740,
|
|
"valid_targets_mean": 1721.8,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 4.816505706760316,
|
|
"grad_norm": 0.7853661169344167,
|
|
"learning_rate": 1.0751450390166091e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19675883650779724,
|
|
"step": 2745,
|
|
"valid_targets_mean": 3397.2,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 4.825285338015803,
|
|
"grad_norm": 0.8256039301460016,
|
|
"learning_rate": 1.0673969766272947e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22708779573440552,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3265.2,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 4.8340649692712905,
|
|
"grad_norm": 0.6485744970528148,
|
|
"learning_rate": 1.0596667587661983e-05,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07782207429409027,
|
|
"step": 2755,
|
|
"valid_targets_mean": 1831.2,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 4.842844600526778,
|
|
"grad_norm": 0.567624514834398,
|
|
"learning_rate": 1.0519545333441541e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12676523625850677,
|
|
"step": 2760,
|
|
"valid_targets_mean": 4776.9,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 4.8516242317822655,
|
|
"grad_norm": 0.8461074468231907,
|
|
"learning_rate": 1.044260447927726e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11474597454071045,
|
|
"step": 2765,
|
|
"valid_targets_mean": 1504.0,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 4.860403863037752,
|
|
"grad_norm": 0.743796895581844,
|
|
"learning_rate": 1.0365846497363868e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11657620966434479,
|
|
"step": 2770,
|
|
"valid_targets_mean": 2540.2,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 4.86918349429324,
|
|
"grad_norm": 0.6375354187707435,
|
|
"learning_rate": 1.0289272856396954e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10665443539619446,
|
|
"step": 2775,
|
|
"valid_targets_mean": 2397.8,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 4.877963125548727,
|
|
"grad_norm": 0.6660462059435743,
|
|
"learning_rate": 1.0212885021544978e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11572909355163574,
|
|
"step": 2780,
|
|
"valid_targets_mean": 2923.1,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 4.886742756804214,
|
|
"grad_norm": 0.7323725907423241,
|
|
"learning_rate": 1.0136684454421091e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.165201336145401,
|
|
"step": 2785,
|
|
"valid_targets_mean": 2881.6,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 4.895522388059701,
|
|
"grad_norm": 0.6802383596942915,
|
|
"learning_rate": 1.006067261305529e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15335635840892792,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3068.1,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 4.904302019315189,
|
|
"grad_norm": 0.7389877833611693,
|
|
"learning_rate": 9.984850951866454e-06,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15105405449867249,
|
|
"step": 2795,
|
|
"valid_targets_mean": 2633.4,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 4.9130816505706765,
|
|
"grad_norm": 0.6002229536366065,
|
|
"learning_rate": 9.90922092163455e-06,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13461145758628845,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4392.9,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 4.921861281826163,
|
|
"grad_norm": 0.7433510126902383,
|
|
"learning_rate": 9.833783969472821e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11660341918468475,
|
|
"step": 2805,
|
|
"valid_targets_mean": 1956.2,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 4.930640913081651,
|
|
"grad_norm": 0.9762562614985235,
|
|
"learning_rate": 9.758541538800187e-06,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15162953734397888,
|
|
"step": 2810,
|
|
"valid_targets_mean": 1716.1,
|
|
"valid_targets_min": 184
|
|
},
|
|
{
|
|
"epoch": 4.939420544337138,
|
|
"grad_norm": 0.7711855542008039,
|
|
"learning_rate": 9.683495069313527e-06,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13835689425468445,
|
|
"step": 2815,
|
|
"valid_targets_mean": 2482.9,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 4.948200175592625,
|
|
"grad_norm": 0.7735837195307252,
|
|
"learning_rate": 9.60864599696019e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1575012505054474,
|
|
"step": 2820,
|
|
"valid_targets_mean": 2434.6,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 4.956979806848112,
|
|
"grad_norm": 0.8610664565828234,
|
|
"learning_rate": 9.533995753910521e-06,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10658472776412964,
|
|
"step": 2825,
|
|
"valid_targets_mean": 1145.9,
|
|
"valid_targets_min": 199
|
|
},
|
|
{
|
|
"epoch": 4.9657594381036,
|
|
"grad_norm": 0.6899727880651705,
|
|
"learning_rate": 9.459545768530425e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14374637603759766,
|
|
"step": 2830,
|
|
"valid_targets_mean": 2807.2,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 4.974539069359087,
|
|
"grad_norm": 0.6498392723648464,
|
|
"learning_rate": 9.385297465354067e-06,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10918396711349487,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2290.2,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 4.983318700614574,
|
|
"grad_norm": 0.769474823441188,
|
|
"learning_rate": 9.311252265056605e-06,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15733110904693604,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3429.8,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 4.992098331870062,
|
|
"grad_norm": 0.6529635619644126,
|
|
"learning_rate": 9.237411584426971e-06,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11474388837814331,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2989.6,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.9362690269289725,
|
|
"learning_rate": 9.163776836340858e-06,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711550295352936,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3220.9,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 5.0087796312554875,
|
|
"grad_norm": 0.6845626008945171,
|
|
"learning_rate": 9.09034942973356e-06,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06309908628463745,
|
|
"step": 2855,
|
|
"valid_targets_mean": 1544.2,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 5.017559262510974,
|
|
"grad_norm": 0.8150310377472015,
|
|
"learning_rate": 9.017130769573113e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406450867652893,
|
|
"step": 2860,
|
|
"valid_targets_mean": 2278.2,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 5.026338893766462,
|
|
"grad_norm": 0.6953793946146135,
|
|
"learning_rate": 8.944122256833366e-06,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1170356273651123,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2486.8,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 5.035118525021949,
|
|
"grad_norm": 0.7700596833215524,
|
|
"learning_rate": 8.871325288467188e-06,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1443384885787964,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2233.2,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 5.043898156277437,
|
|
"grad_norm": 0.8213616692792995,
|
|
"learning_rate": 8.798741257379714e-06,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11816003173589706,
|
|
"step": 2875,
|
|
"valid_targets_mean": 2084.4,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 5.052677787532923,
|
|
"grad_norm": 0.9981609155998604,
|
|
"learning_rate": 8.726371552401758e-06,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13712292909622192,
|
|
"step": 2880,
|
|
"valid_targets_mean": 3371.9,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 5.061457418788411,
|
|
"grad_norm": 0.7623517095508452,
|
|
"learning_rate": 8.654217558263146e-06,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08934280276298523,
|
|
"step": 2885,
|
|
"valid_targets_mean": 2201.5,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 5.0702370500438985,
|
|
"grad_norm": 0.6364311991592593,
|
|
"learning_rate": 8.582280655566302e-06,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11488009244203568,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3247.0,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 5.079016681299385,
|
|
"grad_norm": 0.6805290789871049,
|
|
"learning_rate": 8.510562220759782e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08681486546993256,
|
|
"step": 2895,
|
|
"valid_targets_mean": 2223.4,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 5.087796312554873,
|
|
"grad_norm": 0.6356412201349562,
|
|
"learning_rate": 8.439063626111961e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13955260813236237,
|
|
"step": 2900,
|
|
"valid_targets_mean": 3573.1,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 5.09657594381036,
|
|
"grad_norm": 3.150680588324152,
|
|
"learning_rate": 8.367786239684767e-06,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08566398918628693,
|
|
"step": 2905,
|
|
"valid_targets_mean": 1695.6,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 5.105355575065847,
|
|
"grad_norm": 0.7689304734920328,
|
|
"learning_rate": 8.296731425307514e-06,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15702858567237854,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3077.1,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 5.114135206321334,
|
|
"grad_norm": 0.7266699420994135,
|
|
"learning_rate": 8.225900542550764e-06,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07435964792966843,
|
|
"step": 2915,
|
|
"valid_targets_mean": 1564.0,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 5.122914837576822,
|
|
"grad_norm": 0.7062557760028177,
|
|
"learning_rate": 8.155294946700402e-06,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10005843639373779,
|
|
"step": 2920,
|
|
"valid_targets_mean": 2280.4,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 5.131694468832309,
|
|
"grad_norm": 0.6879816183032202,
|
|
"learning_rate": 8.084915988731604e-06,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13793812692165375,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3276.4,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 5.140474100087796,
|
|
"grad_norm": 0.5967070736402589,
|
|
"learning_rate": 8.014765015283043e-06,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08186718821525574,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3325.8,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 5.149253731343284,
|
|
"grad_norm": 0.8196312857687569,
|
|
"learning_rate": 7.94484336863112e-06,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.100481316447258,
|
|
"step": 2935,
|
|
"valid_targets_mean": 1987.5,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 5.158033362598771,
|
|
"grad_norm": 0.6631677410499952,
|
|
"learning_rate": 7.87515238666428e-06,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1075897216796875,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3093.8,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 5.166812993854258,
|
|
"grad_norm": 0.6928306880325763,
|
|
"learning_rate": 7.805693402857362e-06,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1369820088148117,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3389.5,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 5.175592625109745,
|
|
"grad_norm": 0.7607144265056875,
|
|
"learning_rate": 7.736467746246188e-06,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10200274735689163,
|
|
"step": 2950,
|
|
"valid_targets_mean": 2421.4,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 5.184372256365233,
|
|
"grad_norm": 0.646197068812678,
|
|
"learning_rate": 7.66747674140202e-06,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14131632447242737,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4108.2,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 5.19315188762072,
|
|
"grad_norm": 0.7442510939352529,
|
|
"learning_rate": 7.598721708406296e-06,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12199489772319794,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3189.9,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 5.201931518876207,
|
|
"grad_norm": 0.8642320898557906,
|
|
"learning_rate": 7.530203962825331e-06,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09635886549949646,
|
|
"step": 2965,
|
|
"valid_targets_mean": 1704.1,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 5.2107111501316945,
|
|
"grad_norm": 0.6901658950423678,
|
|
"learning_rate": 7.4619248156851645e-06,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09930352121591568,
|
|
"step": 2970,
|
|
"valid_targets_mean": 2562.6,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 5.219490781387182,
|
|
"grad_norm": 0.7277828905339693,
|
|
"learning_rate": 7.393885573446462e-06,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11548199504613876,
|
|
"step": 2975,
|
|
"valid_targets_mean": 2300.6,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 5.228270412642669,
|
|
"grad_norm": 0.7083307080331135,
|
|
"learning_rate": 7.326087537979532e-06,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14739534258842468,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3126.5,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 5.237050043898156,
|
|
"grad_norm": 0.8246522545933862,
|
|
"learning_rate": 7.258532006539374e-06,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15068992972373962,
|
|
"step": 2985,
|
|
"valid_targets_mean": 2610.6,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 5.245829675153644,
|
|
"grad_norm": 0.7413255099421658,
|
|
"learning_rate": 7.191220271740944e-06,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0977272093296051,
|
|
"step": 2990,
|
|
"valid_targets_mean": 2218.4,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 5.254609306409131,
|
|
"grad_norm": 0.7232117319083464,
|
|
"learning_rate": 7.124153621534313e-06,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13027220964431763,
|
|
"step": 2995,
|
|
"valid_targets_mean": 2758.8,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 5.263388937664618,
|
|
"grad_norm": 0.7676239499993709,
|
|
"learning_rate": 7.0573333391801056e-06,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12074615806341171,
|
|
"step": 3000,
|
|
"valid_targets_mean": 2032.2,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 5.272168568920105,
|
|
"grad_norm": 0.9268221235527632,
|
|
"learning_rate": 6.990760703224912e-06,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12312954664230347,
|
|
"step": 3005,
|
|
"valid_targets_mean": 1181.1,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 5.280948200175593,
|
|
"grad_norm": 0.765930804081211,
|
|
"learning_rate": 6.924436987476833e-06,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12311273068189621,
|
|
"step": 3010,
|
|
"valid_targets_mean": 2562.8,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 5.28972783143108,
|
|
"grad_norm": 0.7168685776214427,
|
|
"learning_rate": 6.858363460981077e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10748713463544846,
|
|
"step": 3015,
|
|
"valid_targets_mean": 2147.4,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 5.298507462686567,
|
|
"grad_norm": 0.7165105500301602,
|
|
"learning_rate": 6.79254138799575e-06,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14707905054092407,
|
|
"step": 3020,
|
|
"valid_targets_mean": 3254.4,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 5.307287093942055,
|
|
"grad_norm": 0.792930817122442,
|
|
"learning_rate": 6.7269720279675755e-06,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1160925030708313,
|
|
"step": 3025,
|
|
"valid_targets_mean": 2005.0,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 5.316066725197541,
|
|
"grad_norm": 0.7095383576527031,
|
|
"learning_rate": 6.661656635507865e-06,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12982550263404846,
|
|
"step": 3030,
|
|
"valid_targets_mean": 2502.9,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 5.324846356453029,
|
|
"grad_norm": 0.8538866098925912,
|
|
"learning_rate": 6.596596460368476e-06,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12829452753067017,
|
|
"step": 3035,
|
|
"valid_targets_mean": 1773.8,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 5.333625987708516,
|
|
"grad_norm": 0.7248819968515889,
|
|
"learning_rate": 6.531792747417916e-06,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09272044897079468,
|
|
"step": 3040,
|
|
"valid_targets_mean": 2204.5,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 5.342405618964004,
|
|
"grad_norm": 0.8696301589108733,
|
|
"learning_rate": 6.467246736617511e-06,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15029314160346985,
|
|
"step": 3045,
|
|
"valid_targets_mean": 2057.4,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 5.3511852502194905,
|
|
"grad_norm": 0.706837837816733,
|
|
"learning_rate": 6.4029596629977006e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386263519525528,
|
|
"step": 3050,
|
|
"valid_targets_mean": 3030.8,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 5.359964881474978,
|
|
"grad_norm": 0.7428563639815615,
|
|
"learning_rate": 6.338932756634355e-06,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11363402009010315,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2213.0,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 5.368744512730466,
|
|
"grad_norm": 0.7000612016938114,
|
|
"learning_rate": 6.275167242625331e-06,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12018229812383652,
|
|
"step": 3060,
|
|
"valid_targets_mean": 2763.2,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 5.377524143985952,
|
|
"grad_norm": 0.6877906020678198,
|
|
"learning_rate": 6.211664341066925e-06,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09447683393955231,
|
|
"step": 3065,
|
|
"valid_targets_mean": 2308.2,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 5.38630377524144,
|
|
"grad_norm": 0.6029038472671969,
|
|
"learning_rate": 6.148425267030606e-06,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11764822155237198,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4519.4,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 5.395083406496927,
|
|
"grad_norm": 0.7016651046550577,
|
|
"learning_rate": 6.085451230539729e-06,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12614849209785461,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3153.5,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 5.403863037752414,
|
|
"grad_norm": 0.8023475235639694,
|
|
"learning_rate": 6.0227434365464036e-06,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10818611830472946,
|
|
"step": 3080,
|
|
"valid_targets_mean": 2189.6,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 5.4126426690079015,
|
|
"grad_norm": 0.733640170560249,
|
|
"learning_rate": 5.960303084908395e-06,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11381937563419342,
|
|
"step": 3085,
|
|
"valid_targets_mean": 2243.4,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 5.421422300263389,
|
|
"grad_norm": 0.8282082833946843,
|
|
"learning_rate": 5.898131370366242e-06,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10247036069631577,
|
|
"step": 3090,
|
|
"valid_targets_mean": 2079.9,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 5.4302019315188765,
|
|
"grad_norm": 0.7220495146510695,
|
|
"learning_rate": 5.836229482520312e-06,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12418776750564575,
|
|
"step": 3095,
|
|
"valid_targets_mean": 2606.5,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 5.438981562774363,
|
|
"grad_norm": 0.7149710439339285,
|
|
"learning_rate": 5.774598605808101e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14423775672912598,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2986.2,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 5.447761194029851,
|
|
"grad_norm": 0.6920309820438574,
|
|
"learning_rate": 5.713239919481544e-06,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.111282117664814,
|
|
"step": 3105,
|
|
"valid_targets_mean": 2442.9,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 5.456540825285338,
|
|
"grad_norm": 0.749167801436116,
|
|
"learning_rate": 5.6521545975844496e-06,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12038739025592804,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3060.4,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 5.465320456540825,
|
|
"grad_norm": 0.7483025619839044,
|
|
"learning_rate": 5.591343808930052e-06,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08093801140785217,
|
|
"step": 3115,
|
|
"valid_targets_mean": 1761.9,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 5.474100087796312,
|
|
"grad_norm": 0.8339294881234727,
|
|
"learning_rate": 5.530808717078631e-06,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17357511818408966,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2423.1,
|
|
"valid_targets_min": 336
|
|
},
|
|
{
|
|
"epoch": 5.4828797190518,
|
|
"grad_norm": 0.8716479926341156,
|
|
"learning_rate": 5.470550480315244e-06,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15192192792892456,
|
|
"step": 3125,
|
|
"valid_targets_mean": 2680.6,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 5.4916593503072875,
|
|
"grad_norm": 0.8074922397007114,
|
|
"learning_rate": 5.410570251627587e-06,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09964276850223541,
|
|
"step": 3130,
|
|
"valid_targets_mean": 1747.1,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 5.500438981562774,
|
|
"grad_norm": 0.810278256373859,
|
|
"learning_rate": 5.350869178683913e-06,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13908156752586365,
|
|
"step": 3135,
|
|
"valid_targets_mean": 2567.0,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 5.509218612818262,
|
|
"grad_norm": 0.771808245184934,
|
|
"learning_rate": 5.291448403811082e-06,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11983799934387207,
|
|
"step": 3140,
|
|
"valid_targets_mean": 2369.5,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 5.517998244073749,
|
|
"grad_norm": 0.727911713732212,
|
|
"learning_rate": 5.232309063972691e-06,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11078071594238281,
|
|
"step": 3145,
|
|
"valid_targets_mean": 2130.0,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 5.526777875329236,
|
|
"grad_norm": 0.7129394851822094,
|
|
"learning_rate": 5.173452290747349e-06,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12691935896873474,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2838.5,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 5.535557506584723,
|
|
"grad_norm": 0.7232413542279253,
|
|
"learning_rate": 5.114879210306967e-06,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05897502601146698,
|
|
"step": 3155,
|
|
"valid_targets_mean": 1377.9,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 5.544337137840211,
|
|
"grad_norm": 0.777920253090825,
|
|
"learning_rate": 5.056590943395294e-06,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14894390106201172,
|
|
"step": 3160,
|
|
"valid_targets_mean": 2085.4,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 5.553116769095698,
|
|
"grad_norm": 0.7307002789690921,
|
|
"learning_rate": 4.998588605306387e-06,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15054315328598022,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3311.1,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 5.561896400351185,
|
|
"grad_norm": 0.7044024793475986,
|
|
"learning_rate": 4.9408733058633295e-06,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.146099254488945,
|
|
"step": 3170,
|
|
"valid_targets_mean": 3117.4,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 5.570676031606673,
|
|
"grad_norm": 0.7332364406237776,
|
|
"learning_rate": 4.883446149396971e-06,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13157638907432556,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3245.1,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 5.57945566286216,
|
|
"grad_norm": 0.6511965126099875,
|
|
"learning_rate": 4.826308234724808e-06,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12790685892105103,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3451.6,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 5.588235294117647,
|
|
"grad_norm": 0.6974718621500186,
|
|
"learning_rate": 4.769460655129931e-06,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10919584333896637,
|
|
"step": 3185,
|
|
"valid_targets_mean": 2691.6,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 5.597014925373134,
|
|
"grad_norm": 0.8661988799996094,
|
|
"learning_rate": 4.712904498340167e-06,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11948838084936142,
|
|
"step": 3190,
|
|
"valid_targets_mean": 2594.1,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 5.605794556628622,
|
|
"grad_norm": 0.7160618961579391,
|
|
"learning_rate": 4.656640846507185e-06,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09667355567216873,
|
|
"step": 3195,
|
|
"valid_targets_mean": 2234.8,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 5.614574187884109,
|
|
"grad_norm": 0.8031122544548323,
|
|
"learning_rate": 4.600670776185858e-06,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.116808220744133,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3002.0,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 5.623353819139596,
|
|
"grad_norm": 0.7248897629831066,
|
|
"learning_rate": 4.544995358313631e-06,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12569546699523926,
|
|
"step": 3205,
|
|
"valid_targets_mean": 2447.4,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 5.6321334503950835,
|
|
"grad_norm": 0.6334525029159978,
|
|
"learning_rate": 4.489615658190034e-06,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10597018152475357,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4071.4,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 5.640913081650571,
|
|
"grad_norm": 0.6196674806808649,
|
|
"learning_rate": 4.434532735456307e-06,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1308397650718689,
|
|
"step": 3215,
|
|
"valid_targets_mean": 3558.8,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 5.649692712906058,
|
|
"grad_norm": 0.8126522616587258,
|
|
"learning_rate": 4.379747644075123e-06,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13204844295978546,
|
|
"step": 3220,
|
|
"valid_targets_mean": 2186.0,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 5.658472344161545,
|
|
"grad_norm": 0.6707748195196616,
|
|
"learning_rate": 4.3252614323103924e-06,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09661723673343658,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2315.2,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 5.667251975417033,
|
|
"grad_norm": 0.9202942195351633,
|
|
"learning_rate": 4.27107514270727e-06,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13739491999149323,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2023.8,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 5.676031606672519,
|
|
"grad_norm": 0.6613109056464578,
|
|
"learning_rate": 4.217189812072131e-06,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10393986105918884,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2684.2,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 5.684811237928007,
|
|
"grad_norm": 0.7581049077251178,
|
|
"learning_rate": 4.163606471452785e-06,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10509642213582993,
|
|
"step": 3240,
|
|
"valid_targets_mean": 1715.0,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 5.693590869183494,
|
|
"grad_norm": 0.682522326339099,
|
|
"learning_rate": 4.110326146118737e-06,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13832658529281616,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3423.6,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 5.702370500438981,
|
|
"grad_norm": 0.7707934729266745,
|
|
"learning_rate": 4.057349855541557e-06,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13524311780929565,
|
|
"step": 3250,
|
|
"valid_targets_mean": 2916.5,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 5.711150131694469,
|
|
"grad_norm": 0.8480637936316099,
|
|
"learning_rate": 4.004678613375365e-06,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06236063688993454,
|
|
"step": 3255,
|
|
"valid_targets_mean": 1185.2,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 5.719929762949956,
|
|
"grad_norm": 0.7396931297503908,
|
|
"learning_rate": 3.952313427437493e-06,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13160938024520874,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3061.6,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 5.728709394205444,
|
|
"grad_norm": 0.9127986010736439,
|
|
"learning_rate": 3.900255299689115e-06,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10916270315647125,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3747.2,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 5.73748902546093,
|
|
"grad_norm": 0.7064409787504479,
|
|
"learning_rate": 3.848505226216146e-06,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09848909825086594,
|
|
"step": 3270,
|
|
"valid_targets_mean": 2758.4,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 5.746268656716418,
|
|
"grad_norm": 0.6605609569944353,
|
|
"learning_rate": 3.797064197210152e-06,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09406692534685135,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2742.4,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 5.755048287971905,
|
|
"grad_norm": 0.9115754810363103,
|
|
"learning_rate": 3.7459331969494004e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10581913590431213,
|
|
"step": 3280,
|
|
"valid_targets_mean": 2075.5,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 5.763827919227392,
|
|
"grad_norm": 0.7730041647713429,
|
|
"learning_rate": 3.6951132037800476e-06,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10891464352607727,
|
|
"step": 3285,
|
|
"valid_targets_mean": 2306.1,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 5.7726075504828795,
|
|
"grad_norm": 0.9407400388552039,
|
|
"learning_rate": 3.644605190097401e-06,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273517608642578,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3353.6,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 5.781387181738367,
|
|
"grad_norm": 0.6780782464305192,
|
|
"learning_rate": 3.594410122327301e-06,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0939575582742691,
|
|
"step": 3295,
|
|
"valid_targets_mean": 2265.0,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 5.790166812993855,
|
|
"grad_norm": 0.7129593355821223,
|
|
"learning_rate": 3.544528960907685e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11338739097118378,
|
|
"step": 3300,
|
|
"valid_targets_mean": 2751.9,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 5.798946444249341,
|
|
"grad_norm": 0.6940876653977383,
|
|
"learning_rate": 3.4949626602701337e-06,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09998984634876251,
|
|
"step": 3305,
|
|
"valid_targets_mean": 2515.9,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 5.807726075504829,
|
|
"grad_norm": 0.8312604260531397,
|
|
"learning_rate": 3.4457121688216664e-06,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11648060381412506,
|
|
"step": 3310,
|
|
"valid_targets_mean": 2855.4,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 5.816505706760316,
|
|
"grad_norm": 1.043995623602021,
|
|
"learning_rate": 3.3967784289265726e-06,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14934533834457397,
|
|
"step": 3315,
|
|
"valid_targets_mean": 1986.5,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 5.825285338015803,
|
|
"grad_norm": 0.7995169322637671,
|
|
"learning_rate": 3.3481623768883888e-06,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13963598012924194,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2312.9,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 5.8340649692712905,
|
|
"grad_norm": 0.660217814950819,
|
|
"learning_rate": 3.299864942931952e-06,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13991346955299377,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3623.9,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 5.842844600526778,
|
|
"grad_norm": 0.6927058182347668,
|
|
"learning_rate": 3.251887051185665e-06,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11998097598552704,
|
|
"step": 3330,
|
|
"valid_targets_mean": 2350.2,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 5.8516242317822655,
|
|
"grad_norm": 0.9010450080103095,
|
|
"learning_rate": 3.204229619663739e-06,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19559866189956665,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2341.6,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 5.860403863037752,
|
|
"grad_norm": 0.7493608670109543,
|
|
"learning_rate": 3.156893560248688e-06,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11450110375881195,
|
|
"step": 3340,
|
|
"valid_targets_mean": 2780.1,
|
|
"valid_targets_min": 199
|
|
},
|
|
{
|
|
"epoch": 5.86918349429324,
|
|
"grad_norm": 0.73998910463322,
|
|
"learning_rate": 3.1098797786738433e-06,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06091228872537613,
|
|
"step": 3345,
|
|
"valid_targets_mean": 1382.2,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 5.877963125548727,
|
|
"grad_norm": 0.8270076104599803,
|
|
"learning_rate": 3.063189174506047e-06,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09963943064212799,
|
|
"step": 3350,
|
|
"valid_targets_mean": 2994.9,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.886742756804214,
|
|
"grad_norm": 0.6140697991374259,
|
|
"learning_rate": 3.0168226411284207e-06,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14505399763584137,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3885.5,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 5.895522388059701,
|
|
"grad_norm": 0.8230700484357936,
|
|
"learning_rate": 2.9707810657232893e-06,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1456972360610962,
|
|
"step": 3360,
|
|
"valid_targets_mean": 2731.4,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 5.904302019315189,
|
|
"grad_norm": 0.7023650821952465,
|
|
"learning_rate": 2.925065329255177e-06,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12229949980974197,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3071.6,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 5.9130816505706765,
|
|
"grad_norm": 0.8592461617709489,
|
|
"learning_rate": 2.8796763064540003e-06,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09128420054912567,
|
|
"step": 3370,
|
|
"valid_targets_mean": 1685.4,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 5.921861281826163,
|
|
"grad_norm": 0.8552689723020216,
|
|
"learning_rate": 2.834614865798275e-06,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1374320387840271,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2341.2,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 5.930640913081651,
|
|
"grad_norm": 0.6368206817625013,
|
|
"learning_rate": 2.7898818694985343e-06,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06457676738500595,
|
|
"step": 3380,
|
|
"valid_targets_mean": 1865.9,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 5.939420544337138,
|
|
"grad_norm": 1.127798606311695,
|
|
"learning_rate": 2.74547817348082e-06,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10669637471437454,
|
|
"step": 3385,
|
|
"valid_targets_mean": 2143.0,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 5.948200175592625,
|
|
"grad_norm": 0.6694722928880568,
|
|
"learning_rate": 2.7014046273703144e-06,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08792329579591751,
|
|
"step": 3390,
|
|
"valid_targets_mean": 2293.8,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 5.956979806848112,
|
|
"grad_norm": 0.801383250022421,
|
|
"learning_rate": 2.6576620744750536e-06,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14679785072803497,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3045.5,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 5.9657594381036,
|
|
"grad_norm": 0.755465909862208,
|
|
"learning_rate": 2.614251351769843e-06,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06720340996980667,
|
|
"step": 3400,
|
|
"valid_targets_mean": 1471.6,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 5.974539069359087,
|
|
"grad_norm": 0.8006863584077799,
|
|
"learning_rate": 2.5711732898801846e-06,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10917264968156815,
|
|
"step": 3405,
|
|
"valid_targets_mean": 1864.8,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 5.983318700614574,
|
|
"grad_norm": 0.6611483188020353,
|
|
"learning_rate": 2.5284287130664308e-06,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1663253903388977,
|
|
"step": 3410,
|
|
"valid_targets_mean": 4600.1,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 5.992098331870062,
|
|
"grad_norm": 0.6705004419208503,
|
|
"learning_rate": 2.486018439207987e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10869936645030975,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3060.1,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.9768593569117328,
|
|
"learning_rate": 2.443943279787668e-06,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22519835829734802,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2416.0,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 6.0087796312554875,
|
|
"grad_norm": 0.6885370377847849,
|
|
"learning_rate": 2.4022040398761793e-06,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1587909311056137,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3244.5,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 6.017559262510974,
|
|
"grad_norm": 0.8430863853012266,
|
|
"learning_rate": 2.360801518116702e-06,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12957683205604553,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2080.2,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 6.026338893766462,
|
|
"grad_norm": 0.6984256300365793,
|
|
"learning_rate": 2.3197365067096024e-06,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08601324260234833,
|
|
"step": 3435,
|
|
"valid_targets_mean": 1751.5,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 6.035118525021949,
|
|
"grad_norm": 0.8398024070419722,
|
|
"learning_rate": 2.2790097913973154e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1461915373802185,
|
|
"step": 3440,
|
|
"valid_targets_mean": 3281.2,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 6.043898156277437,
|
|
"grad_norm": 0.815611124657908,
|
|
"learning_rate": 2.2386221514492502e-06,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10512498766183853,
|
|
"step": 3445,
|
|
"valid_targets_mean": 2078.0,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 6.052677787532923,
|
|
"grad_norm": 0.7010556143177307,
|
|
"learning_rate": 2.1985743596469344e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442563384771347,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3700.4,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 6.061457418788411,
|
|
"grad_norm": 0.7153878534486958,
|
|
"learning_rate": 2.158867182269191e-06,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08420448750257492,
|
|
"step": 3455,
|
|
"valid_targets_mean": 2221.8,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 6.0702370500438985,
|
|
"grad_norm": 0.77166681050214,
|
|
"learning_rate": 2.1195013790775e-06,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13577553629875183,
|
|
"step": 3460,
|
|
"valid_targets_mean": 2782.2,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 6.079016681299385,
|
|
"grad_norm": 0.6574041016417845,
|
|
"learning_rate": 2.080477703301429e-06,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1018177717924118,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3371.4,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 6.087796312554873,
|
|
"grad_norm": 0.707507981512508,
|
|
"learning_rate": 2.0417969016242757e-06,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10029999166727066,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2692.9,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 6.09657594381036,
|
|
"grad_norm": 1.0073404503181405,
|
|
"learning_rate": 2.0034597141687164e-06,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14564797282218933,
|
|
"step": 3475,
|
|
"valid_targets_mean": 2583.0,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 6.105355575065847,
|
|
"grad_norm": 0.6581586823860129,
|
|
"learning_rate": 1.965466874482689e-06,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1269952952861786,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3602.5,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 6.114135206321334,
|
|
"grad_norm": 0.6859511665750566,
|
|
"learning_rate": 1.927819109525346e-06,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12032485008239746,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3602.2,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 6.122914837576822,
|
|
"grad_norm": 0.8703699557036584,
|
|
"learning_rate": 1.8905171396531363e-06,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1438479721546173,
|
|
"step": 3490,
|
|
"valid_targets_mean": 2371.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 6.131694468832309,
|
|
"grad_norm": 0.7729541583551431,
|
|
"learning_rate": 1.8535616786060284e-06,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1426084041595459,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3437.5,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 6.140474100087796,
|
|
"grad_norm": 0.7878930934474074,
|
|
"learning_rate": 1.816953433493862e-06,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060608599334955215,
|
|
"step": 3500,
|
|
"valid_targets_mean": 1292.5,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 6.149253731343284,
|
|
"grad_norm": 0.8706334695764008,
|
|
"learning_rate": 1.7806931047827914e-06,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1064988523721695,
|
|
"step": 3505,
|
|
"valid_targets_mean": 1954.0,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 6.158033362598771,
|
|
"grad_norm": 0.7588105849335235,
|
|
"learning_rate": 1.7447813862819153e-06,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09260734915733337,
|
|
"step": 3510,
|
|
"valid_targets_mean": 1974.4,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 6.166812993854258,
|
|
"grad_norm": 0.7707172599244738,
|
|
"learning_rate": 1.7092189651299818e-06,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14388254284858704,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3286.0,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 6.175592625109745,
|
|
"grad_norm": 0.8021283912135745,
|
|
"learning_rate": 1.6740065217822453e-06,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0838511735200882,
|
|
"step": 3520,
|
|
"valid_targets_mean": 1693.5,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 6.184372256365233,
|
|
"grad_norm": 0.6692415719529806,
|
|
"learning_rate": 1.6391447299974506e-06,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0797465592622757,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2080.6,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 6.19315188762072,
|
|
"grad_norm": 0.8229344654657074,
|
|
"learning_rate": 1.6046342568249307e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1573319286108017,
|
|
"step": 3530,
|
|
"valid_targets_mean": 2809.6,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 6.201931518876207,
|
|
"grad_norm": 0.6984032972795241,
|
|
"learning_rate": 1.5704757625918454e-06,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10845072567462921,
|
|
"step": 3535,
|
|
"valid_targets_mean": 2928.6,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 6.2107111501316945,
|
|
"grad_norm": 0.8541130883528398,
|
|
"learning_rate": 1.5366699008905727e-06,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05587174743413925,
|
|
"step": 3540,
|
|
"valid_targets_mean": 941.6,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 6.219490781387182,
|
|
"grad_norm": 0.7515951301149579,
|
|
"learning_rate": 1.503217318566157e-06,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14807815849781036,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3097.8,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 6.228270412642669,
|
|
"grad_norm": 0.6034220829639785,
|
|
"learning_rate": 1.4701186557039648e-06,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311323344707489,
|
|
"step": 3550,
|
|
"valid_targets_mean": 4673.1,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 6.237050043898156,
|
|
"grad_norm": 0.6235394563804212,
|
|
"learning_rate": 1.4373745456174359e-06,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08958350121974945,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3506.0,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 6.245829675153644,
|
|
"grad_norm": 0.7874619904238769,
|
|
"learning_rate": 1.4049856148359542e-06,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378445327281952,
|
|
"step": 3560,
|
|
"valid_targets_mean": 2566.9,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 6.254609306409131,
|
|
"grad_norm": 0.8266759770430957,
|
|
"learning_rate": 1.3729524830928508e-06,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11242090165615082,
|
|
"step": 3565,
|
|
"valid_targets_mean": 2005.4,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 6.263388937664618,
|
|
"grad_norm": 0.7007626569046193,
|
|
"learning_rate": 1.3412757633135854e-06,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07441128045320511,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2732.9,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 6.272168568920105,
|
|
"grad_norm": 0.7214329525164257,
|
|
"learning_rate": 1.3099560616039674e-06,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0957804024219513,
|
|
"step": 3575,
|
|
"valid_targets_mean": 2559.8,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 6.280948200175593,
|
|
"grad_norm": 0.6661081397573556,
|
|
"learning_rate": 1.2789939772386007e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11385385692119598,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3202.0,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 6.28972783143108,
|
|
"grad_norm": 0.7191504587312675,
|
|
"learning_rate": 1.2483901026493861e-06,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10483043640851974,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3336.0,
|
|
"valid_targets_min": 174
|
|
},
|
|
{
|
|
"epoch": 6.298507462686567,
|
|
"grad_norm": 0.7361266633333147,
|
|
"learning_rate": 1.2181450234142122e-06,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10227947682142258,
|
|
"step": 3590,
|
|
"valid_targets_mean": 2499.1,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 6.307287093942055,
|
|
"grad_norm": 0.6992696440665829,
|
|
"learning_rate": 1.188259318245728e-06,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13242176175117493,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3065.9,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 6.316066725197541,
|
|
"grad_norm": 0.791401572430832,
|
|
"learning_rate": 1.1587335589802918e-06,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10407061874866486,
|
|
"step": 3600,
|
|
"valid_targets_mean": 2056.0,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 6.324846356453029,
|
|
"grad_norm": 0.7386217162298632,
|
|
"learning_rate": 1.129568310566993e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13897651433944702,
|
|
"step": 3605,
|
|
"valid_targets_mean": 2768.1,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 6.333625987708516,
|
|
"grad_norm": 0.625026064933491,
|
|
"learning_rate": 1.100764131056904e-06,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12321414798498154,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3780.2,
|
|
"valid_targets_min": 929
|
|
},
|
|
{
|
|
"epoch": 6.342405618964004,
|
|
"grad_norm": 0.7694815234440899,
|
|
"learning_rate": 1.0723215715923296e-06,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13807536661624908,
|
|
"step": 3615,
|
|
"valid_targets_mean": 2807.6,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 6.3511852502194905,
|
|
"grad_norm": 0.6989039928019662,
|
|
"learning_rate": 1.0442411763963188e-06,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13113310933113098,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3766.5,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 6.359964881474978,
|
|
"grad_norm": 0.790816983131548,
|
|
"learning_rate": 1.0165234827622216e-06,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10573418438434601,
|
|
"step": 3625,
|
|
"valid_targets_mean": 2126.1,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 6.368744512730466,
|
|
"grad_norm": 0.6613177086559875,
|
|
"learning_rate": 9.891690210434235e-07,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08005513995885849,
|
|
"step": 3630,
|
|
"valid_targets_mean": 2872.8,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 6.377524143985952,
|
|
"grad_norm": 0.667840530726726,
|
|
"learning_rate": 9.621783146431741e-07,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12524107098579407,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3424.5,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 6.38630377524144,
|
|
"grad_norm": 0.7055368242277743,
|
|
"learning_rate": 9.355518800046171e-07,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10701599717140198,
|
|
"step": 3640,
|
|
"valid_targets_mean": 2713.0,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 6.395083406496927,
|
|
"grad_norm": 0.7133595590266818,
|
|
"learning_rate": 9.092902266008496e-07,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1703338772058487,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3455.9,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 6.403863037752414,
|
|
"grad_norm": 0.627697457021527,
|
|
"learning_rate": 8.833938569252276e-07,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10767993330955505,
|
|
"step": 3650,
|
|
"valid_targets_mean": 4283.5,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 6.4126426690079015,
|
|
"grad_norm": 0.5943556618491385,
|
|
"learning_rate": 8.578632664817177e-07,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07428453862667084,
|
|
"step": 3655,
|
|
"valid_targets_mean": 2392.8,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 6.421422300263389,
|
|
"grad_norm": 0.6600423448516453,
|
|
"learning_rate": 8.326989437754274e-07,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14013662934303284,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3510.8,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 6.4302019315188765,
|
|
"grad_norm": 0.6910269498421224,
|
|
"learning_rate": 8.079013703032612e-07,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10757927596569061,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3238.4,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 6.438981562774363,
|
|
"grad_norm": 0.7446875368323815,
|
|
"learning_rate": 7.834710205447038e-07,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13762310147285461,
|
|
"step": 3670,
|
|
"valid_targets_mean": 2652.5,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 6.447761194029851,
|
|
"grad_norm": 0.7318570677608963,
|
|
"learning_rate": 7.59408361952727e-07,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09591084718704224,
|
|
"step": 3675,
|
|
"valid_targets_mean": 1971.2,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 6.456540825285338,
|
|
"grad_norm": 0.721976514613169,
|
|
"learning_rate": 7.357138549448861e-07,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16027334332466125,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3696.0,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 6.465320456540825,
|
|
"grad_norm": 0.8064607330817087,
|
|
"learning_rate": 7.1238795289446e-07,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10751006007194519,
|
|
"step": 3685,
|
|
"valid_targets_mean": 2231.1,
|
|
"valid_targets_min": 188
|
|
},
|
|
{
|
|
"epoch": 6.474100087796312,
|
|
"grad_norm": 0.8571563067398605,
|
|
"learning_rate": 6.894311021218115e-07,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12169455736875534,
|
|
"step": 3690,
|
|
"valid_targets_mean": 2142.5,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 6.4828797190518,
|
|
"grad_norm": 0.7181368825731264,
|
|
"learning_rate": 6.66843741885832e-07,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08929352462291718,
|
|
"step": 3695,
|
|
"valid_targets_mean": 2246.6,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 6.4916593503072875,
|
|
"grad_norm": 0.6576235760786786,
|
|
"learning_rate": 6.446263043755441e-07,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1251218318939209,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3336.1,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 6.500438981562774,
|
|
"grad_norm": 0.7914084282260981,
|
|
"learning_rate": 6.22779214701823e-07,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11329592764377594,
|
|
"step": 3705,
|
|
"valid_targets_mean": 2212.2,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 6.509218612818262,
|
|
"grad_norm": 0.7887057088017461,
|
|
"learning_rate": 6.013028908892749e-07,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11884763836860657,
|
|
"step": 3710,
|
|
"valid_targets_mean": 2091.9,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 6.517998244073749,
|
|
"grad_norm": 0.640039245389866,
|
|
"learning_rate": 5.801977438682271e-07,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12221872806549072,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3463.0,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 6.526777875329236,
|
|
"grad_norm": 0.7096836765450981,
|
|
"learning_rate": 5.594641774668663e-07,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1190369725227356,
|
|
"step": 3720,
|
|
"valid_targets_mean": 3604.4,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 6.535557506584723,
|
|
"grad_norm": 0.8528925953115493,
|
|
"learning_rate": 5.391025884035239e-07,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13277220726013184,
|
|
"step": 3725,
|
|
"valid_targets_mean": 2051.4,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 6.544337137840211,
|
|
"grad_norm": 0.7767043126503024,
|
|
"learning_rate": 5.191133662790693e-07,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1179177463054657,
|
|
"step": 3730,
|
|
"valid_targets_mean": 2684.8,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 6.553116769095698,
|
|
"grad_norm": 0.9366306054167101,
|
|
"learning_rate": 4.994968935694644e-07,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07106693089008331,
|
|
"step": 3735,
|
|
"valid_targets_mean": 1271.0,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 6.561896400351185,
|
|
"grad_norm": 0.7273552140122239,
|
|
"learning_rate": 4.802535456184431e-07,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10659492015838623,
|
|
"step": 3740,
|
|
"valid_targets_mean": 2653.4,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 6.570676031606673,
|
|
"grad_norm": 0.7690199563755111,
|
|
"learning_rate": 4.6138369063032815e-07,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14221766591072083,
|
|
"step": 3745,
|
|
"valid_targets_mean": 3319.8,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 6.57945566286216,
|
|
"grad_norm": 1.016264207523438,
|
|
"learning_rate": 4.428876896629897e-07,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07530868053436279,
|
|
"step": 3750,
|
|
"valid_targets_mean": 1157.9,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 6.588235294117647,
|
|
"grad_norm": 0.7127332953033202,
|
|
"learning_rate": 4.2476589662093116e-07,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14323770999908447,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3564.8,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 6.597014925373134,
|
|
"grad_norm": 0.6247370132533101,
|
|
"learning_rate": 4.070186582485214e-07,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10359257459640503,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3836.8,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 6.605794556628622,
|
|
"grad_norm": 0.7060519964370161,
|
|
"learning_rate": 3.8964631412336195e-07,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09632022678852081,
|
|
"step": 3765,
|
|
"valid_targets_mean": 2485.0,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 6.614574187884109,
|
|
"grad_norm": 0.8120154453229218,
|
|
"learning_rate": 3.726491966497858e-07,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09183450788259506,
|
|
"step": 3770,
|
|
"valid_targets_mean": 1766.5,
|
|
"valid_targets_min": 400
|
|
},
|
|
{
|
|
"epoch": 6.623353819139596,
|
|
"grad_norm": 0.6716123828771584,
|
|
"learning_rate": 3.560276310524913e-07,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11318805813789368,
|
|
"step": 3775,
|
|
"valid_targets_mean": 2781.9,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 6.6321334503950835,
|
|
"grad_norm": 0.8582161352032508,
|
|
"learning_rate": 3.397819353703402e-07,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12848101556301117,
|
|
"step": 3780,
|
|
"valid_targets_mean": 1999.9,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 6.640913081650571,
|
|
"grad_norm": 0.829924782958819,
|
|
"learning_rate": 3.2391242045024305e-07,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14838725328445435,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3002.9,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 6.649692712906058,
|
|
"grad_norm": 0.6708006166174201,
|
|
"learning_rate": 3.0841938994123467e-07,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0861341580748558,
|
|
"step": 3790,
|
|
"valid_targets_mean": 2055.5,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 6.658472344161545,
|
|
"grad_norm": 0.6551841431356863,
|
|
"learning_rate": 2.933031402886588e-07,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13714119791984558,
|
|
"step": 3795,
|
|
"valid_targets_mean": 2971.5,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 6.667251975417033,
|
|
"grad_norm": 1.0272569655925115,
|
|
"learning_rate": 2.78563960728484e-07,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17977437376976013,
|
|
"step": 3800,
|
|
"valid_targets_mean": 2960.1,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 6.676031606672519,
|
|
"grad_norm": 0.8165734270614932,
|
|
"learning_rate": 2.642021332817879e-07,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10762757807970047,
|
|
"step": 3805,
|
|
"valid_targets_mean": 2613.1,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 6.684811237928007,
|
|
"grad_norm": 0.7307648618264396,
|
|
"learning_rate": 2.502179327493437e-07,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1097162663936615,
|
|
"step": 3810,
|
|
"valid_targets_mean": 2767.6,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 6.693590869183494,
|
|
"grad_norm": 0.7018610527447973,
|
|
"learning_rate": 2.366116267063756e-07,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.122999407351017,
|
|
"step": 3815,
|
|
"valid_targets_mean": 2550.8,
|
|
"valid_targets_min": 214
|
|
},
|
|
{
|
|
"epoch": 6.702370500438981,
|
|
"grad_norm": 0.8137362103988176,
|
|
"learning_rate": 2.2338347549742956e-07,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1029479131102562,
|
|
"step": 3820,
|
|
"valid_targets_mean": 2387.0,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 6.711150131694469,
|
|
"grad_norm": 0.6320847621507237,
|
|
"learning_rate": 2.105337322313994e-07,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1221184954047203,
|
|
"step": 3825,
|
|
"valid_targets_mean": 4413.8,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 6.719929762949956,
|
|
"grad_norm": 0.7558940007760717,
|
|
"learning_rate": 1.980626427766752e-07,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12101258337497711,
|
|
"step": 3830,
|
|
"valid_targets_mean": 2558.4,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 6.728709394205444,
|
|
"grad_norm": 0.7457655218251878,
|
|
"learning_rate": 1.8597044575644708e-07,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14594140648841858,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3724.6,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 6.73748902546093,
|
|
"grad_norm": 0.8671965865991125,
|
|
"learning_rate": 1.7425737254413544e-07,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11601061373949051,
|
|
"step": 3840,
|
|
"valid_targets_mean": 2074.1,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 6.746268656716418,
|
|
"grad_norm": 0.6773277742272612,
|
|
"learning_rate": 1.6292364725896127e-07,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09854355454444885,
|
|
"step": 3845,
|
|
"valid_targets_mean": 2333.1,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 6.755048287971905,
|
|
"grad_norm": 0.8217673538248672,
|
|
"learning_rate": 1.5196948676166946e-07,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14113372564315796,
|
|
"step": 3850,
|
|
"valid_targets_mean": 2715.1,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 6.763827919227392,
|
|
"grad_norm": 0.7299244747128262,
|
|
"learning_rate": 1.4139510065035888e-07,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07521846890449524,
|
|
"step": 3855,
|
|
"valid_targets_mean": 1934.5,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 6.7726075504828795,
|
|
"grad_norm": 0.7135541426935954,
|
|
"learning_rate": 1.3120069125649005e-07,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10608363151550293,
|
|
"step": 3860,
|
|
"valid_targets_mean": 2584.9,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 6.781387181738367,
|
|
"grad_norm": 0.812793635692517,
|
|
"learning_rate": 1.2138645364101032e-07,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14473184943199158,
|
|
"step": 3865,
|
|
"valid_targets_mean": 2222.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 6.790166812993855,
|
|
"grad_norm": 0.77502721579719,
|
|
"learning_rate": 1.1195257559061257e-07,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11466886848211288,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2619.4,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 6.798946444249341,
|
|
"grad_norm": 0.6761276472092218,
|
|
"learning_rate": 1.0289923761415355e-07,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11029920727014542,
|
|
"step": 3875,
|
|
"valid_targets_mean": 2862.6,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 6.807726075504829,
|
|
"grad_norm": 0.6799806147614018,
|
|
"learning_rate": 9.422661293918777e-08,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09013142436742783,
|
|
"step": 3880,
|
|
"valid_targets_mean": 2754.4,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 6.816505706760316,
|
|
"grad_norm": 0.8683247753256154,
|
|
"learning_rate": 8.593486750866797e-08,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12953446805477142,
|
|
"step": 3885,
|
|
"valid_targets_mean": 2155.0,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 6.825285338015803,
|
|
"grad_norm": 0.6880324221863211,
|
|
"learning_rate": 7.802415997775425e-08,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0993674099445343,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3105.6,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 6.8340649692712905,
|
|
"grad_norm": 0.7165674230554778,
|
|
"learning_rate": 7.049464171079212e-08,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09922979772090912,
|
|
"step": 3895,
|
|
"valid_targets_mean": 2504.4,
|
|
"valid_targets_min": 215
|
|
},
|
|
{
|
|
"epoch": 6.842844600526778,
|
|
"grad_norm": 0.7444146015206777,
|
|
"learning_rate": 6.334645677840811e-08,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07099230587482452,
|
|
"step": 3900,
|
|
"valid_targets_mean": 1686.6,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 6.8516242317822655,
|
|
"grad_norm": 0.7892942623659744,
|
|
"learning_rate": 5.657974195475202e-08,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10610377788543701,
|
|
"step": 3905,
|
|
"valid_targets_mean": 2148.1,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 6.860403863037752,
|
|
"grad_norm": 0.6932669661864974,
|
|
"learning_rate": 5.019462671488562e-08,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10635873675346375,
|
|
"step": 3910,
|
|
"valid_targets_mean": 3097.5,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 6.86918349429324,
|
|
"grad_norm": 0.707915486632403,
|
|
"learning_rate": 4.4191233232300235e-08,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08999186754226685,
|
|
"step": 3915,
|
|
"valid_targets_mean": 2164.9,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 6.877963125548727,
|
|
"grad_norm": 0.8965323489882348,
|
|
"learning_rate": 3.856967637658748e-08,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12418054044246674,
|
|
"step": 3920,
|
|
"valid_targets_mean": 1721.1,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 6.886742756804214,
|
|
"grad_norm": 0.7136898190990361,
|
|
"learning_rate": 3.333006371122993e-08,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10464788973331451,
|
|
"step": 3925,
|
|
"valid_targets_mean": 2912.9,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 6.895522388059701,
|
|
"grad_norm": 0.7650587690409318,
|
|
"learning_rate": 2.847249549154496e-08,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1291654109954834,
|
|
"step": 3930,
|
|
"valid_targets_mean": 2245.8,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 6.904302019315189,
|
|
"grad_norm": 0.8094160406106423,
|
|
"learning_rate": 2.3997064662779624e-08,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14951848983764648,
|
|
"step": 3935,
|
|
"valid_targets_mean": 2740.2,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 6.9130816505706765,
|
|
"grad_norm": 0.7615406908775477,
|
|
"learning_rate": 1.990385685831431e-08,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13741973042488098,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3031.4,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 6.921861281826163,
|
|
"grad_norm": 0.6920562774726263,
|
|
"learning_rate": 1.6192950398032924e-08,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14877726137638092,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4001.2,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 6.930640913081651,
|
|
"grad_norm": 0.8003855056538967,
|
|
"learning_rate": 1.286441628682633e-08,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08416864275932312,
|
|
"step": 3950,
|
|
"valid_targets_mean": 1574.9,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 6.939420544337138,
|
|
"grad_norm": 0.7231442370627252,
|
|
"learning_rate": 9.918318213231193e-09,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08636089414358139,
|
|
"step": 3955,
|
|
"valid_targets_mean": 2426.6,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 6.948200175592625,
|
|
"grad_norm": 0.7946514716083326,
|
|
"learning_rate": 7.354712548210963e-09,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11088542640209198,
|
|
"step": 3960,
|
|
"valid_targets_mean": 1965.1,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 6.956979806848112,
|
|
"grad_norm": 1.1546385449434307,
|
|
"learning_rate": 5.173648344074522e-09,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13096728920936584,
|
|
"step": 3965,
|
|
"valid_targets_mean": 2620.1,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 6.9657594381036,
|
|
"grad_norm": 0.6716515618108556,
|
|
"learning_rate": 3.3751673335458147e-09,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1208876296877861,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3534.8,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 6.974539069359087,
|
|
"grad_norm": 0.7619961996661716,
|
|
"learning_rate": 1.959303928953382e-09,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14055666327476501,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3317.6,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 6.983318700614574,
|
|
"grad_norm": 0.7575730319309756,
|
|
"learning_rate": 9.260852215842165e-10,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07773347198963165,
|
|
"step": 3980,
|
|
"valid_targets_mean": 1942.1,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 6.992098331870062,
|
|
"grad_norm": 0.9667345884929217,
|
|
"learning_rate": 2.755309811575124e-10,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10300040990114212,
|
|
"step": 3985,
|
|
"valid_targets_mean": 1167.1,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 1.1457179950949647,
|
|
"learning_rate": 7.653655451633768e-12,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23403209447860718,
|
|
"step": 3990,
|
|
"valid_targets_mean": 2128.6,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23403209447860718,
|
|
"step": 3990,
|
|
"total_flos": 1.6286243582157783e+18,
|
|
"train_loss": 0.29326175941261734,
|
|
"train_runtime": 44635.9087,
|
|
"train_samples_per_second": 1.429,
|
|
"train_steps_per_second": 0.089,
|
|
"valid_targets_mean": 2128.6,
|
|
"valid_targets_min": 1017
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 3990,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.6286243582157783e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|