8760 lines
243 KiB
JSON
8760 lines
243 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3962,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008841732979664015,
|
|
"grad_norm": 16.43759145601646,
|
|
"learning_rate": 4.030226700251889e-07,
|
|
"loss": 0.9913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4914407730102539,
|
|
"step": 5,
|
|
"valid_targets_mean": 3975.9,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 0.01768346595932803,
|
|
"grad_norm": 17.984303937601393,
|
|
"learning_rate": 9.068010075566751e-07,
|
|
"loss": 1.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5117278099060059,
|
|
"step": 10,
|
|
"valid_targets_mean": 2293.8,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 0.026525198938992044,
|
|
"grad_norm": 17.430054844624365,
|
|
"learning_rate": 1.4105793450881613e-06,
|
|
"loss": 1.0664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48715919256210327,
|
|
"step": 15,
|
|
"valid_targets_mean": 3742.1,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 0.03536693191865606,
|
|
"grad_norm": 13.157985816910156,
|
|
"learning_rate": 1.9143576826196476e-06,
|
|
"loss": 1.0244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48635387420654297,
|
|
"step": 20,
|
|
"valid_targets_mean": 3909.9,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 0.04420866489832007,
|
|
"grad_norm": 7.039459793103061,
|
|
"learning_rate": 2.4181360201511335e-06,
|
|
"loss": 0.9523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5950973629951477,
|
|
"step": 25,
|
|
"valid_targets_mean": 4854.0,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 0.05305039787798409,
|
|
"grad_norm": 4.37502689672069,
|
|
"learning_rate": 2.92191435768262e-06,
|
|
"loss": 0.8882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4377889037132263,
|
|
"step": 30,
|
|
"valid_targets_mean": 3904.2,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 0.0618921308576481,
|
|
"grad_norm": 2.2984916066569143,
|
|
"learning_rate": 3.425692695214106e-06,
|
|
"loss": 0.8596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3730544149875641,
|
|
"step": 35,
|
|
"valid_targets_mean": 4253.4,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 0.07073386383731212,
|
|
"grad_norm": 1.749502864868188,
|
|
"learning_rate": 3.9294710327455925e-06,
|
|
"loss": 0.8071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37501755356788635,
|
|
"step": 40,
|
|
"valid_targets_mean": 3304.0,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 0.07957559681697612,
|
|
"grad_norm": 1.4297619383408833,
|
|
"learning_rate": 4.433249370277078e-06,
|
|
"loss": 0.7654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531526982784271,
|
|
"step": 45,
|
|
"valid_targets_mean": 2443.0,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 0.08841732979664015,
|
|
"grad_norm": 1.2107916623135075,
|
|
"learning_rate": 4.937027707808565e-06,
|
|
"loss": 0.7661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26174238324165344,
|
|
"step": 50,
|
|
"valid_targets_mean": 1962.4,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 0.09725906277630415,
|
|
"grad_norm": 1.1177863382586022,
|
|
"learning_rate": 5.440806045340051e-06,
|
|
"loss": 0.7088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2441769391298294,
|
|
"step": 55,
|
|
"valid_targets_mean": 1324.0,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 0.10610079575596817,
|
|
"grad_norm": 1.3240485836200855,
|
|
"learning_rate": 5.944584382871537e-06,
|
|
"loss": 0.7418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45804762840270996,
|
|
"step": 60,
|
|
"valid_targets_mean": 2281.6,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 0.11494252873563218,
|
|
"grad_norm": 0.9887476322800939,
|
|
"learning_rate": 6.448362720403023e-06,
|
|
"loss": 0.7786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3559702932834625,
|
|
"step": 65,
|
|
"valid_targets_mean": 2576.8,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 0.1237842617152962,
|
|
"grad_norm": 0.8313614619463878,
|
|
"learning_rate": 6.9521410579345095e-06,
|
|
"loss": 0.7667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32869428396224976,
|
|
"step": 70,
|
|
"valid_targets_mean": 2681.1,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 0.13262599469496023,
|
|
"grad_norm": 1.2928448614693209,
|
|
"learning_rate": 7.455919395465996e-06,
|
|
"loss": 0.7316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4524758458137512,
|
|
"step": 75,
|
|
"valid_targets_mean": 2086.6,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 0.14146772767462423,
|
|
"grad_norm": 0.9039110142607959,
|
|
"learning_rate": 7.959697732997482e-06,
|
|
"loss": 0.7216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41223078966140747,
|
|
"step": 80,
|
|
"valid_targets_mean": 2522.0,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 0.15030946065428824,
|
|
"grad_norm": 1.1531713116573887,
|
|
"learning_rate": 8.463476070528968e-06,
|
|
"loss": 0.6962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31595444679260254,
|
|
"step": 85,
|
|
"valid_targets_mean": 2714.1,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 0.15915119363395225,
|
|
"grad_norm": 0.7279561415401078,
|
|
"learning_rate": 8.967254408060454e-06,
|
|
"loss": 0.7161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45249640941619873,
|
|
"step": 90,
|
|
"valid_targets_mean": 4901.6,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 0.16799292661361626,
|
|
"grad_norm": 0.7881029215112537,
|
|
"learning_rate": 9.47103274559194e-06,
|
|
"loss": 0.706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31990885734558105,
|
|
"step": 95,
|
|
"valid_targets_mean": 2807.6,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 0.1768346595932803,
|
|
"grad_norm": 0.7086656082236659,
|
|
"learning_rate": 9.974811083123427e-06,
|
|
"loss": 0.6583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270596981048584,
|
|
"step": 100,
|
|
"valid_targets_mean": 2979.0,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 0.1856763925729443,
|
|
"grad_norm": 0.8874052957709296,
|
|
"learning_rate": 1.0478589420654912e-05,
|
|
"loss": 0.6697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24225851893424988,
|
|
"step": 105,
|
|
"valid_targets_mean": 1624.0,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 0.1945181255526083,
|
|
"grad_norm": 0.8682741626307907,
|
|
"learning_rate": 1.0982367758186399e-05,
|
|
"loss": 0.6139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38384777307510376,
|
|
"step": 110,
|
|
"valid_targets_mean": 2603.8,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 0.20335985853227231,
|
|
"grad_norm": 0.7893830841438993,
|
|
"learning_rate": 1.1486146095717885e-05,
|
|
"loss": 0.6651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3616026043891907,
|
|
"step": 115,
|
|
"valid_targets_mean": 5229.1,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.21220159151193635,
|
|
"grad_norm": 0.8435136463460109,
|
|
"learning_rate": 1.1989924433249371e-05,
|
|
"loss": 0.7056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35327407717704773,
|
|
"step": 120,
|
|
"valid_targets_mean": 3276.9,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 0.22104332449160036,
|
|
"grad_norm": 0.8326030890475514,
|
|
"learning_rate": 1.2493702770780859e-05,
|
|
"loss": 0.6327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29393622279167175,
|
|
"step": 125,
|
|
"valid_targets_mean": 3015.4,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 0.22988505747126436,
|
|
"grad_norm": 0.5791447791458552,
|
|
"learning_rate": 1.2997481108312344e-05,
|
|
"loss": 0.6477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31838011741638184,
|
|
"step": 130,
|
|
"valid_targets_mean": 4220.1,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.23872679045092837,
|
|
"grad_norm": 0.7833412749104234,
|
|
"learning_rate": 1.350125944584383e-05,
|
|
"loss": 0.6188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3599586486816406,
|
|
"step": 135,
|
|
"valid_targets_mean": 3493.0,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 0.2475685234305924,
|
|
"grad_norm": 0.6930132817315054,
|
|
"learning_rate": 1.4005037783375318e-05,
|
|
"loss": 0.6729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3735961318016052,
|
|
"step": 140,
|
|
"valid_targets_mean": 3788.9,
|
|
"valid_targets_min": 1905
|
|
},
|
|
{
|
|
"epoch": 0.2564102564102564,
|
|
"grad_norm": 0.6900812305728492,
|
|
"learning_rate": 1.4508816120906802e-05,
|
|
"loss": 0.5868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900860905647278,
|
|
"step": 145,
|
|
"valid_targets_mean": 3795.8,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 0.26525198938992045,
|
|
"grad_norm": 0.668587653722343,
|
|
"learning_rate": 1.5012594458438288e-05,
|
|
"loss": 0.6371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23740538954734802,
|
|
"step": 150,
|
|
"valid_targets_mean": 3166.9,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 0.27409372236958446,
|
|
"grad_norm": 0.6857654288467885,
|
|
"learning_rate": 1.5516372795969776e-05,
|
|
"loss": 0.6119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49299874901771545,
|
|
"step": 155,
|
|
"valid_targets_mean": 5421.9,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 0.28293545534924847,
|
|
"grad_norm": 0.7011124721817551,
|
|
"learning_rate": 1.602015113350126e-05,
|
|
"loss": 0.6135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2792966365814209,
|
|
"step": 160,
|
|
"valid_targets_mean": 3557.1,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 0.2917771883289125,
|
|
"grad_norm": 0.7181568311757064,
|
|
"learning_rate": 1.6523929471032747e-05,
|
|
"loss": 0.6163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3261794447898865,
|
|
"step": 165,
|
|
"valid_targets_mean": 3577.6,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 0.3006189213085765,
|
|
"grad_norm": 0.6383260621735827,
|
|
"learning_rate": 1.7027707808564233e-05,
|
|
"loss": 0.6208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2658522427082062,
|
|
"step": 170,
|
|
"valid_targets_mean": 4118.4,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 0.3094606542882405,
|
|
"grad_norm": 0.873748558543848,
|
|
"learning_rate": 1.753148614609572e-05,
|
|
"loss": 0.6582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27786922454833984,
|
|
"step": 175,
|
|
"valid_targets_mean": 2538.9,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 0.3183023872679045,
|
|
"grad_norm": 0.7741218070486372,
|
|
"learning_rate": 1.8035264483627205e-05,
|
|
"loss": 0.661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3300468325614929,
|
|
"step": 180,
|
|
"valid_targets_mean": 3783.0,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 0.3271441202475685,
|
|
"grad_norm": 0.7153501560841902,
|
|
"learning_rate": 1.8539042821158694e-05,
|
|
"loss": 0.613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30669140815734863,
|
|
"step": 185,
|
|
"valid_targets_mean": 2831.9,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 0.3359858532272325,
|
|
"grad_norm": 0.8733028655615769,
|
|
"learning_rate": 1.9042821158690177e-05,
|
|
"loss": 0.6091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4316086769104004,
|
|
"step": 190,
|
|
"valid_targets_mean": 3605.1,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 0.3448275862068966,
|
|
"grad_norm": 0.8909617924940159,
|
|
"learning_rate": 1.9546599496221663e-05,
|
|
"loss": 0.7227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29832324385643005,
|
|
"step": 195,
|
|
"valid_targets_mean": 2360.1,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 0.3536693191865606,
|
|
"grad_norm": 0.7676692325940812,
|
|
"learning_rate": 2.0050377833753152e-05,
|
|
"loss": 0.6012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.349293053150177,
|
|
"step": 200,
|
|
"valid_targets_mean": 3419.4,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 0.3625110521662246,
|
|
"grad_norm": 0.6966533272502462,
|
|
"learning_rate": 2.0554156171284634e-05,
|
|
"loss": 0.597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34834665060043335,
|
|
"step": 205,
|
|
"valid_targets_mean": 4089.6,
|
|
"valid_targets_min": 2040
|
|
},
|
|
{
|
|
"epoch": 0.3713527851458886,
|
|
"grad_norm": 0.9033467905392003,
|
|
"learning_rate": 2.1057934508816124e-05,
|
|
"loss": 0.6181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3413087725639343,
|
|
"step": 210,
|
|
"valid_targets_mean": 2691.6,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 0.3801945181255526,
|
|
"grad_norm": 1.709473285401154,
|
|
"learning_rate": 2.156171284634761e-05,
|
|
"loss": 0.6068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899201512336731,
|
|
"step": 215,
|
|
"valid_targets_mean": 3180.1,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 0.3890362511052166,
|
|
"grad_norm": 0.7521210734695193,
|
|
"learning_rate": 2.2065491183879092e-05,
|
|
"loss": 0.5964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3372876048088074,
|
|
"step": 220,
|
|
"valid_targets_mean": 3230.0,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 0.3978779840848806,
|
|
"grad_norm": 0.8333691908125227,
|
|
"learning_rate": 2.256926952141058e-05,
|
|
"loss": 0.6837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3745100498199463,
|
|
"step": 225,
|
|
"valid_targets_mean": 2964.1,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 0.40671971706454463,
|
|
"grad_norm": 0.6656840184265772,
|
|
"learning_rate": 2.3073047858942067e-05,
|
|
"loss": 0.603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19232776761054993,
|
|
"step": 230,
|
|
"valid_targets_mean": 2728.2,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 0.4155614500442087,
|
|
"grad_norm": 0.7255450538392068,
|
|
"learning_rate": 2.3576826196473553e-05,
|
|
"loss": 0.597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33167585730552673,
|
|
"step": 235,
|
|
"valid_targets_mean": 3975.6,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 0.4244031830238727,
|
|
"grad_norm": 0.7811965432597004,
|
|
"learning_rate": 2.408060453400504e-05,
|
|
"loss": 0.6116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3258014917373657,
|
|
"step": 240,
|
|
"valid_targets_mean": 4064.9,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 0.4332449160035367,
|
|
"grad_norm": 0.7283721698739326,
|
|
"learning_rate": 2.4584382871536528e-05,
|
|
"loss": 0.6241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866038680076599,
|
|
"step": 245,
|
|
"valid_targets_mean": 2983.4,
|
|
"valid_targets_min": 1332
|
|
},
|
|
{
|
|
"epoch": 0.4420866489832007,
|
|
"grad_norm": 0.8218129094311453,
|
|
"learning_rate": 2.508816120906801e-05,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33566445112228394,
|
|
"step": 250,
|
|
"valid_targets_mean": 3558.0,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 0.4509283819628647,
|
|
"grad_norm": 0.7072939560249456,
|
|
"learning_rate": 2.55919395465995e-05,
|
|
"loss": 0.5453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31181544065475464,
|
|
"step": 255,
|
|
"valid_targets_mean": 3208.2,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 0.45977011494252873,
|
|
"grad_norm": 1.0523349102236121,
|
|
"learning_rate": 2.6095717884130986e-05,
|
|
"loss": 0.6377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4069092869758606,
|
|
"step": 260,
|
|
"valid_targets_mean": 3098.2,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 0.46861184792219274,
|
|
"grad_norm": 0.6450257152535478,
|
|
"learning_rate": 2.659949622166247e-05,
|
|
"loss": 0.623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27830231189727783,
|
|
"step": 265,
|
|
"valid_targets_mean": 4357.8,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 0.47745358090185674,
|
|
"grad_norm": 0.7522066599890925,
|
|
"learning_rate": 2.7103274559193958e-05,
|
|
"loss": 0.5872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18803122639656067,
|
|
"step": 270,
|
|
"valid_targets_mean": 2124.2,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 0.48629531388152075,
|
|
"grad_norm": 0.884412534841973,
|
|
"learning_rate": 2.760705289672544e-05,
|
|
"loss": 0.616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3839704692363739,
|
|
"step": 275,
|
|
"valid_targets_mean": 2501.4,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 0.4951370468611848,
|
|
"grad_norm": 0.8891118328744042,
|
|
"learning_rate": 2.811083123425693e-05,
|
|
"loss": 0.6361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3874850273132324,
|
|
"step": 280,
|
|
"valid_targets_mean": 2678.1,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 0.5039787798408488,
|
|
"grad_norm": 0.8262092882520712,
|
|
"learning_rate": 2.8614609571788415e-05,
|
|
"loss": 0.605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38311082124710083,
|
|
"step": 285,
|
|
"valid_targets_mean": 3422.0,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 0.5128205128205128,
|
|
"grad_norm": 0.709275574520186,
|
|
"learning_rate": 2.91183879093199e-05,
|
|
"loss": 0.6046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18932947516441345,
|
|
"step": 290,
|
|
"valid_targets_mean": 2332.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.5216622458001768,
|
|
"grad_norm": 0.7786332977338201,
|
|
"learning_rate": 2.9622166246851387e-05,
|
|
"loss": 0.6052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35691413283348083,
|
|
"step": 295,
|
|
"valid_targets_mean": 3540.5,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 0.5305039787798409,
|
|
"grad_norm": 0.7101179367848214,
|
|
"learning_rate": 3.0125944584382876e-05,
|
|
"loss": 0.6084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17877447605133057,
|
|
"step": 300,
|
|
"valid_targets_mean": 2138.0,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 0.5393457117595049,
|
|
"grad_norm": 0.7234460485976838,
|
|
"learning_rate": 3.062972292191436e-05,
|
|
"loss": 0.5926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23484167456626892,
|
|
"step": 305,
|
|
"valid_targets_mean": 2718.2,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 0.5481874447391689,
|
|
"grad_norm": 0.8316290022850806,
|
|
"learning_rate": 3.113350125944585e-05,
|
|
"loss": 0.5576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605739235877991,
|
|
"step": 310,
|
|
"valid_targets_mean": 2342.1,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 0.5570291777188329,
|
|
"grad_norm": 0.7443583580419373,
|
|
"learning_rate": 3.1637279596977334e-05,
|
|
"loss": 0.5824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3526822626590729,
|
|
"step": 315,
|
|
"valid_targets_mean": 2949.9,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 0.5658709106984969,
|
|
"grad_norm": 0.9191837030143825,
|
|
"learning_rate": 3.214105793450882e-05,
|
|
"loss": 0.5984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35114723443984985,
|
|
"step": 320,
|
|
"valid_targets_mean": 2730.0,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 0.5747126436781609,
|
|
"grad_norm": 0.7133394084017918,
|
|
"learning_rate": 3.2644836272040306e-05,
|
|
"loss": 0.5448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3304479122161865,
|
|
"step": 325,
|
|
"valid_targets_mean": 4008.5,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 0.583554376657825,
|
|
"grad_norm": 0.7542186085539615,
|
|
"learning_rate": 3.314861460957179e-05,
|
|
"loss": 0.5912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35879507660865784,
|
|
"step": 330,
|
|
"valid_targets_mean": 3446.0,
|
|
"valid_targets_min": 1762
|
|
},
|
|
{
|
|
"epoch": 0.5923961096374889,
|
|
"grad_norm": 0.8806283364510236,
|
|
"learning_rate": 3.365239294710328e-05,
|
|
"loss": 0.546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2994898855686188,
|
|
"step": 335,
|
|
"valid_targets_mean": 2490.4,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 0.601237842617153,
|
|
"grad_norm": 0.7925231456510854,
|
|
"learning_rate": 3.415617128463476e-05,
|
|
"loss": 0.5375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27049198746681213,
|
|
"step": 340,
|
|
"valid_targets_mean": 3216.2,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 0.610079575596817,
|
|
"grad_norm": 0.8151714993878474,
|
|
"learning_rate": 3.465994962216625e-05,
|
|
"loss": 0.5658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3359305262565613,
|
|
"step": 345,
|
|
"valid_targets_mean": 2819.4,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 0.618921308576481,
|
|
"grad_norm": 0.6987780960519617,
|
|
"learning_rate": 3.516372795969774e-05,
|
|
"loss": 0.5964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22273704409599304,
|
|
"step": 350,
|
|
"valid_targets_mean": 3495.1,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 0.627763041556145,
|
|
"grad_norm": 0.82782268216302,
|
|
"learning_rate": 3.566750629722922e-05,
|
|
"loss": 0.5694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.282134473323822,
|
|
"step": 355,
|
|
"valid_targets_mean": 2781.4,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 0.636604774535809,
|
|
"grad_norm": 0.7171815954941675,
|
|
"learning_rate": 3.617128463476071e-05,
|
|
"loss": 0.542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24987182021141052,
|
|
"step": 360,
|
|
"valid_targets_mean": 2726.6,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 0.6454465075154731,
|
|
"grad_norm": 0.6554762528349989,
|
|
"learning_rate": 3.667506297229219e-05,
|
|
"loss": 0.5764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21046540141105652,
|
|
"step": 365,
|
|
"valid_targets_mean": 2741.9,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.654288240495137,
|
|
"grad_norm": 0.7169452163324568,
|
|
"learning_rate": 3.717884130982368e-05,
|
|
"loss": 0.5865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35952502489089966,
|
|
"step": 370,
|
|
"valid_targets_mean": 4276.4,
|
|
"valid_targets_min": 1909
|
|
},
|
|
{
|
|
"epoch": 0.6631299734748011,
|
|
"grad_norm": 0.7498682876453387,
|
|
"learning_rate": 3.7682619647355165e-05,
|
|
"loss": 0.6103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.398249089717865,
|
|
"step": 375,
|
|
"valid_targets_mean": 3914.2,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 0.671971706454465,
|
|
"grad_norm": 0.7431702929941265,
|
|
"learning_rate": 3.8186397984886654e-05,
|
|
"loss": 0.5234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36680591106414795,
|
|
"step": 380,
|
|
"valid_targets_mean": 3600.5,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 0.6808134394341291,
|
|
"grad_norm": 0.7032240193017438,
|
|
"learning_rate": 3.8690176322418137e-05,
|
|
"loss": 0.5644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25154560804367065,
|
|
"step": 385,
|
|
"valid_targets_mean": 3544.6,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 0.6896551724137931,
|
|
"grad_norm": 0.7824728334071819,
|
|
"learning_rate": 3.9193954659949626e-05,
|
|
"loss": 0.587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563400864601135,
|
|
"step": 390,
|
|
"valid_targets_mean": 2525.2,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 0.6984969053934571,
|
|
"grad_norm": 0.5938754028594158,
|
|
"learning_rate": 3.969773299748111e-05,
|
|
"loss": 0.594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36213839054107666,
|
|
"step": 395,
|
|
"valid_targets_mean": 4965.6,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 0.7073386383731212,
|
|
"grad_norm": 0.8560713693271218,
|
|
"learning_rate": 3.999996893720319e-05,
|
|
"loss": 0.5368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28493037819862366,
|
|
"step": 400,
|
|
"valid_targets_mean": 2209.5,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 0.7161803713527851,
|
|
"grad_norm": 0.6574999517551925,
|
|
"learning_rate": 3.999961948184714e-05,
|
|
"loss": 0.5683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27876657247543335,
|
|
"step": 405,
|
|
"valid_targets_mean": 4695.0,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 0.7250221043324492,
|
|
"grad_norm": 0.7910350694289134,
|
|
"learning_rate": 3.999888174944605e-05,
|
|
"loss": 0.543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29521095752716064,
|
|
"step": 410,
|
|
"valid_targets_mean": 2854.2,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 0.7338638373121131,
|
|
"grad_norm": 0.7327617592282828,
|
|
"learning_rate": 3.9997755754322415e-05,
|
|
"loss": 0.5316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23895704746246338,
|
|
"step": 415,
|
|
"valid_targets_mean": 3236.0,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 0.7427055702917772,
|
|
"grad_norm": 0.683762113730083,
|
|
"learning_rate": 3.9996241518336554e-05,
|
|
"loss": 0.5805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2622902989387512,
|
|
"step": 420,
|
|
"valid_targets_mean": 3360.2,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 0.7515473032714411,
|
|
"grad_norm": 0.6712193589006633,
|
|
"learning_rate": 3.999433907088618e-05,
|
|
"loss": 0.5688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33563822507858276,
|
|
"step": 425,
|
|
"valid_targets_mean": 4434.6,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 0.7603890362511052,
|
|
"grad_norm": 0.8978969987784136,
|
|
"learning_rate": 3.999204844890582e-05,
|
|
"loss": 0.5326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21699407696723938,
|
|
"step": 430,
|
|
"valid_targets_mean": 1812.0,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 0.7692307692307693,
|
|
"grad_norm": 0.7936562082241674,
|
|
"learning_rate": 3.998936969686614e-05,
|
|
"loss": 0.565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2873769700527191,
|
|
"step": 435,
|
|
"valid_targets_mean": 2613.8,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 0.7780725022104332,
|
|
"grad_norm": 0.6494725254339784,
|
|
"learning_rate": 3.9986302866773e-05,
|
|
"loss": 0.561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27636170387268066,
|
|
"step": 440,
|
|
"valid_targets_mean": 3525.8,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 0.7869142351900973,
|
|
"grad_norm": 0.7239196043024896,
|
|
"learning_rate": 3.998284801816654e-05,
|
|
"loss": 0.5455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27213937044143677,
|
|
"step": 445,
|
|
"valid_targets_mean": 2831.9,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 0.7957559681697612,
|
|
"grad_norm": 0.6995801380039492,
|
|
"learning_rate": 3.997900521811995e-05,
|
|
"loss": 0.5635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28841322660446167,
|
|
"step": 450,
|
|
"valid_targets_mean": 2727.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 0.8045977011494253,
|
|
"grad_norm": 0.7321413221468167,
|
|
"learning_rate": 3.997477454123818e-05,
|
|
"loss": 0.5781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708323299884796,
|
|
"step": 455,
|
|
"valid_targets_mean": 3125.6,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 0.8134394341290893,
|
|
"grad_norm": 0.834232877873331,
|
|
"learning_rate": 3.997015606965656e-05,
|
|
"loss": 0.5392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3525821566581726,
|
|
"step": 460,
|
|
"valid_targets_mean": 2664.1,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 0.8222811671087533,
|
|
"grad_norm": 0.8321360896222851,
|
|
"learning_rate": 3.996514989303911e-05,
|
|
"loss": 0.6028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32734864950180054,
|
|
"step": 465,
|
|
"valid_targets_mean": 2494.2,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 0.8311229000884174,
|
|
"grad_norm": 0.6935337345612683,
|
|
"learning_rate": 3.9959756108576853e-05,
|
|
"loss": 0.5558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24893325567245483,
|
|
"step": 470,
|
|
"valid_targets_mean": 2565.8,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 0.8399646330680813,
|
|
"grad_norm": 0.8571991620574829,
|
|
"learning_rate": 3.995397482098591e-05,
|
|
"loss": 0.5884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36881160736083984,
|
|
"step": 475,
|
|
"valid_targets_mean": 2929.9,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 0.8488063660477454,
|
|
"grad_norm": 0.7459479118876244,
|
|
"learning_rate": 3.994780614250548e-05,
|
|
"loss": 0.5396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3047880232334137,
|
|
"step": 480,
|
|
"valid_targets_mean": 3779.6,
|
|
"valid_targets_min": 2634
|
|
},
|
|
{
|
|
"epoch": 0.8576480990274093,
|
|
"grad_norm": 0.7142768197579014,
|
|
"learning_rate": 3.994125019289566e-05,
|
|
"loss": 0.5359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2945425808429718,
|
|
"step": 485,
|
|
"valid_targets_mean": 3228.9,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 0.8664898320070734,
|
|
"grad_norm": 0.6309840561705765,
|
|
"learning_rate": 3.99343070994351e-05,
|
|
"loss": 0.5466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996477782726288,
|
|
"step": 490,
|
|
"valid_targets_mean": 4352.0,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 0.8753315649867374,
|
|
"grad_norm": 0.72422812308097,
|
|
"learning_rate": 3.992697699691857e-05,
|
|
"loss": 0.5754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3740094304084778,
|
|
"step": 495,
|
|
"valid_targets_mean": 3836.0,
|
|
"valid_targets_min": 1609
|
|
},
|
|
{
|
|
"epoch": 0.8841732979664014,
|
|
"grad_norm": 0.8938896424491545,
|
|
"learning_rate": 3.9919260027654304e-05,
|
|
"loss": 0.612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3498684763908386,
|
|
"step": 500,
|
|
"valid_targets_mean": 2197.9,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 0.8930150309460654,
|
|
"grad_norm": 0.6203004592548081,
|
|
"learning_rate": 3.991115634146123e-05,
|
|
"loss": 0.5209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26198410987854004,
|
|
"step": 505,
|
|
"valid_targets_mean": 3834.1,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 0.9018567639257294,
|
|
"grad_norm": 0.8296110311313931,
|
|
"learning_rate": 3.990266609566614e-05,
|
|
"loss": 0.5822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23738741874694824,
|
|
"step": 510,
|
|
"valid_targets_mean": 1985.0,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 0.9106984969053935,
|
|
"grad_norm": 0.6830643774979811,
|
|
"learning_rate": 3.9893789455100514e-05,
|
|
"loss": 0.5182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2690092921257019,
|
|
"step": 515,
|
|
"valid_targets_mean": 3141.5,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 0.9195402298850575,
|
|
"grad_norm": 0.6736971639546346,
|
|
"learning_rate": 3.988452659209745e-05,
|
|
"loss": 0.5723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22991755604743958,
|
|
"step": 520,
|
|
"valid_targets_mean": 2799.1,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 0.9283819628647215,
|
|
"grad_norm": 0.7173030371663041,
|
|
"learning_rate": 3.9874877686488197e-05,
|
|
"loss": 0.5628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34016090631484985,
|
|
"step": 525,
|
|
"valid_targets_mean": 3242.2,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 0.9372236958443855,
|
|
"grad_norm": 0.7040214824132874,
|
|
"learning_rate": 3.986484292559877e-05,
|
|
"loss": 0.5405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545274496078491,
|
|
"step": 530,
|
|
"valid_targets_mean": 3138.1,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 0.9460654288240495,
|
|
"grad_norm": 0.8218611125304961,
|
|
"learning_rate": 3.9854422504246224e-05,
|
|
"loss": 0.5626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32547083497047424,
|
|
"step": 535,
|
|
"valid_targets_mean": 2763.6,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 0.9549071618037135,
|
|
"grad_norm": 0.6214205266111335,
|
|
"learning_rate": 3.984361662473494e-05,
|
|
"loss": 0.5616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16753855347633362,
|
|
"step": 540,
|
|
"valid_targets_mean": 2641.5,
|
|
"valid_targets_min": 1835
|
|
},
|
|
{
|
|
"epoch": 0.9637488947833776,
|
|
"grad_norm": 0.5778657679333664,
|
|
"learning_rate": 3.9832425496852644e-05,
|
|
"loss": 0.5737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28100019693374634,
|
|
"step": 545,
|
|
"valid_targets_mean": 4093.9,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.9725906277630415,
|
|
"grad_norm": 0.7198881631279556,
|
|
"learning_rate": 3.982084933786639e-05,
|
|
"loss": 0.5622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2683573067188263,
|
|
"step": 550,
|
|
"valid_targets_mean": 3153.2,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 0.9814323607427056,
|
|
"grad_norm": 0.7294048125531857,
|
|
"learning_rate": 3.9808888372518285e-05,
|
|
"loss": 0.523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22038066387176514,
|
|
"step": 555,
|
|
"valid_targets_mean": 2419.9,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 0.9902740937223696,
|
|
"grad_norm": 0.7554402239187559,
|
|
"learning_rate": 3.979654283302115e-05,
|
|
"loss": 0.561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33254435658454895,
|
|
"step": 560,
|
|
"valid_targets_mean": 3315.4,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 0.9991158267020336,
|
|
"grad_norm": 0.9531782119454033,
|
|
"learning_rate": 3.9783812959054054e-05,
|
|
"loss": 0.54,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2633528709411621,
|
|
"step": 565,
|
|
"valid_targets_mean": 2802.5,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 1.0070733863837311,
|
|
"grad_norm": 0.7219142907133131,
|
|
"learning_rate": 3.9770698997757564e-05,
|
|
"loss": 0.5341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.360717236995697,
|
|
"step": 570,
|
|
"valid_targets_mean": 3532.2,
|
|
"valid_targets_min": 1467
|
|
},
|
|
{
|
|
"epoch": 1.0159151193633953,
|
|
"grad_norm": 0.6832964222244942,
|
|
"learning_rate": 3.975720120372905e-05,
|
|
"loss": 0.5604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3534933924674988,
|
|
"step": 575,
|
|
"valid_targets_mean": 4285.2,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 1.0247568523430592,
|
|
"grad_norm": 0.7104879022906077,
|
|
"learning_rate": 3.974331983901766e-05,
|
|
"loss": 0.5139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23602774739265442,
|
|
"step": 580,
|
|
"valid_targets_mean": 3051.0,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 1.0335985853227232,
|
|
"grad_norm": 1.0289475556516747,
|
|
"learning_rate": 3.972905517311929e-05,
|
|
"loss": 0.5025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2523854076862335,
|
|
"step": 585,
|
|
"valid_targets_mean": 4518.4,
|
|
"valid_targets_min": 1272
|
|
},
|
|
{
|
|
"epoch": 1.0424403183023874,
|
|
"grad_norm": 0.6070195547061199,
|
|
"learning_rate": 3.9714407482971335e-05,
|
|
"loss": 0.503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1908818781375885,
|
|
"step": 590,
|
|
"valid_targets_mean": 3155.4,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 1.0512820512820513,
|
|
"grad_norm": 0.8043171275547331,
|
|
"learning_rate": 3.969937705294728e-05,
|
|
"loss": 0.5078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33768388628959656,
|
|
"step": 595,
|
|
"valid_targets_mean": 3010.2,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 1.0601237842617153,
|
|
"grad_norm": 0.7749567786956729,
|
|
"learning_rate": 3.968396417485125e-05,
|
|
"loss": 0.5508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2937096357345581,
|
|
"step": 600,
|
|
"valid_targets_mean": 2740.4,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 1.0689655172413792,
|
|
"grad_norm": 0.6956637921840406,
|
|
"learning_rate": 3.966816914791226e-05,
|
|
"loss": 0.5343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3460909426212311,
|
|
"step": 605,
|
|
"valid_targets_mean": 4797.0,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 1.0778072502210434,
|
|
"grad_norm": 0.8116625470978949,
|
|
"learning_rate": 3.965199227877846e-05,
|
|
"loss": 0.5254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32285335659980774,
|
|
"step": 610,
|
|
"valid_targets_mean": 2429.6,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 1.0866489832007074,
|
|
"grad_norm": 0.7159169923970375,
|
|
"learning_rate": 3.963543388151117e-05,
|
|
"loss": 0.5176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541963458061218,
|
|
"step": 615,
|
|
"valid_targets_mean": 4086.0,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.0954907161803713,
|
|
"grad_norm": 0.851138769147014,
|
|
"learning_rate": 3.9618494277578806e-05,
|
|
"loss": 0.5254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1655346155166626,
|
|
"step": 620,
|
|
"valid_targets_mean": 1852.5,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 1.1043324491600353,
|
|
"grad_norm": 0.7278081843736119,
|
|
"learning_rate": 3.960117379585057e-05,
|
|
"loss": 0.56,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3517983555793762,
|
|
"step": 625,
|
|
"valid_targets_mean": 4008.0,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 1.1131741821396994,
|
|
"grad_norm": 0.719616659459927,
|
|
"learning_rate": 3.958347277259015e-05,
|
|
"loss": 0.4971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31380581855773926,
|
|
"step": 630,
|
|
"valid_targets_mean": 3580.6,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 1.1220159151193634,
|
|
"grad_norm": 0.63160465301193,
|
|
"learning_rate": 3.956539155144912e-05,
|
|
"loss": 0.5234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2329138219356537,
|
|
"step": 635,
|
|
"valid_targets_mean": 4057.8,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 1.1308576480990273,
|
|
"grad_norm": 0.6825427065012454,
|
|
"learning_rate": 3.9546930483460326e-05,
|
|
"loss": 0.516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27975958585739136,
|
|
"step": 640,
|
|
"valid_targets_mean": 3343.4,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 1.1396993810786915,
|
|
"grad_norm": 0.699265105549181,
|
|
"learning_rate": 3.952808992703102e-05,
|
|
"loss": 0.5083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23706836998462677,
|
|
"step": 645,
|
|
"valid_targets_mean": 2710.6,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 1.1485411140583555,
|
|
"grad_norm": 0.8045984010228572,
|
|
"learning_rate": 3.9508870247935964e-05,
|
|
"loss": 0.5351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1919955015182495,
|
|
"step": 650,
|
|
"valid_targets_mean": 1751.8,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 1.1573828470380194,
|
|
"grad_norm": 0.6012250412087359,
|
|
"learning_rate": 3.948927181931024e-05,
|
|
"loss": 0.52,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263777494430542,
|
|
"step": 655,
|
|
"valid_targets_mean": 4245.5,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 1.1662245800176834,
|
|
"grad_norm": 0.5992821189180728,
|
|
"learning_rate": 3.94692950216421e-05,
|
|
"loss": 0.5235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23570390045642853,
|
|
"step": 660,
|
|
"valid_targets_mean": 3959.2,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 1.1750663129973475,
|
|
"grad_norm": 0.610845100602426,
|
|
"learning_rate": 3.944894024276552e-05,
|
|
"loss": 0.5026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1973716914653778,
|
|
"step": 665,
|
|
"valid_targets_mean": 3387.4,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.1839080459770115,
|
|
"grad_norm": 0.7207065986679495,
|
|
"learning_rate": 3.9428207877852684e-05,
|
|
"loss": 0.5115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17878985404968262,
|
|
"step": 670,
|
|
"valid_targets_mean": 2142.6,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 1.1927497789566754,
|
|
"grad_norm": 0.7934727448753174,
|
|
"learning_rate": 3.940709832940632e-05,
|
|
"loss": 0.5465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502514123916626,
|
|
"step": 675,
|
|
"valid_targets_mean": 2718.2,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 1.2015915119363396,
|
|
"grad_norm": 0.7998141403870273,
|
|
"learning_rate": 3.938561200725189e-05,
|
|
"loss": 0.513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2249201387166977,
|
|
"step": 680,
|
|
"valid_targets_mean": 2136.0,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 1.2104332449160036,
|
|
"grad_norm": 0.6227661283561698,
|
|
"learning_rate": 3.9363749328529594e-05,
|
|
"loss": 0.5482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259385883808136,
|
|
"step": 685,
|
|
"valid_targets_mean": 3826.6,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 1.2192749778956675,
|
|
"grad_norm": 0.6206992015741707,
|
|
"learning_rate": 3.934151071768634e-05,
|
|
"loss": 0.5537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900537848472595,
|
|
"step": 690,
|
|
"valid_targets_mean": 4350.5,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.2281167108753315,
|
|
"grad_norm": 0.5635939679566414,
|
|
"learning_rate": 3.931889660646744e-05,
|
|
"loss": 0.5482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2819467782974243,
|
|
"step": 695,
|
|
"valid_targets_mean": 4096.9,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 1.2369584438549956,
|
|
"grad_norm": 0.6692534359398423,
|
|
"learning_rate": 3.9295907433908264e-05,
|
|
"loss": 0.5415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925758361816406,
|
|
"step": 700,
|
|
"valid_targets_mean": 4126.9,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 1.2458001768346596,
|
|
"grad_norm": 0.7190209534372782,
|
|
"learning_rate": 3.9272543646325703e-05,
|
|
"loss": 0.5317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19594435393810272,
|
|
"step": 705,
|
|
"valid_targets_mean": 2114.2,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 1.2546419098143236,
|
|
"grad_norm": 0.7405430028094008,
|
|
"learning_rate": 3.924880569730951e-05,
|
|
"loss": 0.5281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31587642431259155,
|
|
"step": 710,
|
|
"valid_targets_mean": 3316.2,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 1.2634836427939877,
|
|
"grad_norm": 0.7318705208252564,
|
|
"learning_rate": 3.9224694047713475e-05,
|
|
"loss": 0.51,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1906820833683014,
|
|
"step": 715,
|
|
"valid_targets_mean": 2082.6,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 1.2723253757736517,
|
|
"grad_norm": 0.6136614929326403,
|
|
"learning_rate": 3.920020916564652e-05,
|
|
"loss": 0.504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2813029885292053,
|
|
"step": 720,
|
|
"valid_targets_mean": 3609.0,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 1.2811671087533156,
|
|
"grad_norm": 0.7619702273083326,
|
|
"learning_rate": 3.917535152646356e-05,
|
|
"loss": 0.57,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29840072989463806,
|
|
"step": 725,
|
|
"valid_targets_mean": 3075.8,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 1.2900088417329796,
|
|
"grad_norm": 0.6340553976724329,
|
|
"learning_rate": 3.915012161275633e-05,
|
|
"loss": 0.5579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35292381048202515,
|
|
"step": 730,
|
|
"valid_targets_mean": 4762.0,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 1.2988505747126438,
|
|
"grad_norm": 0.6314054240931711,
|
|
"learning_rate": 3.912451991434395e-05,
|
|
"loss": 0.5253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579632103443146,
|
|
"step": 735,
|
|
"valid_targets_mean": 3673.4,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 1.3076923076923077,
|
|
"grad_norm": 0.5775959962805894,
|
|
"learning_rate": 3.909854692826348e-05,
|
|
"loss": 0.5043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20548564195632935,
|
|
"step": 740,
|
|
"valid_targets_mean": 3846.4,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 1.3165340406719717,
|
|
"grad_norm": 0.655758917865089,
|
|
"learning_rate": 3.9072203158760215e-05,
|
|
"loss": 0.5015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677724361419678,
|
|
"step": 745,
|
|
"valid_targets_mean": 3850.2,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 1.3253757736516358,
|
|
"grad_norm": 0.8148294806624113,
|
|
"learning_rate": 3.904548911727793e-05,
|
|
"loss": 0.5228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3152320384979248,
|
|
"step": 750,
|
|
"valid_targets_mean": 3107.8,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 1.3342175066312998,
|
|
"grad_norm": 0.8146168712526004,
|
|
"learning_rate": 3.901840532244897e-05,
|
|
"loss": 0.4892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3347662687301636,
|
|
"step": 755,
|
|
"valid_targets_mean": 3066.6,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 1.3430592396109637,
|
|
"grad_norm": 0.7139126711376225,
|
|
"learning_rate": 3.899095230008411e-05,
|
|
"loss": 0.4955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2709360718727112,
|
|
"step": 760,
|
|
"valid_targets_mean": 3221.8,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 1.3519009725906277,
|
|
"grad_norm": 0.7341120923859575,
|
|
"learning_rate": 3.896313058316242e-05,
|
|
"loss": 0.5131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2554779648780823,
|
|
"step": 765,
|
|
"valid_targets_mean": 3690.6,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 1.3607427055702916,
|
|
"grad_norm": 0.7351206776886251,
|
|
"learning_rate": 3.8934940711820876e-05,
|
|
"loss": 0.5282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2886347770690918,
|
|
"step": 770,
|
|
"valid_targets_mean": 3220.2,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 1.3695844385499558,
|
|
"grad_norm": 0.6815412632856188,
|
|
"learning_rate": 3.8906383233343886e-05,
|
|
"loss": 0.5301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2817658483982086,
|
|
"step": 775,
|
|
"valid_targets_mean": 3736.1,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 1.3784261715296198,
|
|
"grad_norm": 0.6083778824304598,
|
|
"learning_rate": 3.887745870215268e-05,
|
|
"loss": 0.5448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20323900878429413,
|
|
"step": 780,
|
|
"valid_targets_mean": 3170.0,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 1.387267904509284,
|
|
"grad_norm": 0.701861952740278,
|
|
"learning_rate": 3.884816767979449e-05,
|
|
"loss": 0.5251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28836336731910706,
|
|
"step": 785,
|
|
"valid_targets_mean": 3401.8,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 1.396109637488948,
|
|
"grad_norm": 0.6895304322708055,
|
|
"learning_rate": 3.881851073493174e-05,
|
|
"loss": 0.5549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803962528705597,
|
|
"step": 790,
|
|
"valid_targets_mean": 3533.6,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 1.4049513704686118,
|
|
"grad_norm": 0.8606543782305618,
|
|
"learning_rate": 3.878848844333091e-05,
|
|
"loss": 0.4985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2970787286758423,
|
|
"step": 795,
|
|
"valid_targets_mean": 3216.2,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 1.4137931034482758,
|
|
"grad_norm": 0.748948338513349,
|
|
"learning_rate": 3.875810138785144e-05,
|
|
"loss": 0.517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20471307635307312,
|
|
"step": 800,
|
|
"valid_targets_mean": 2873.0,
|
|
"valid_targets_min": 1791
|
|
},
|
|
{
|
|
"epoch": 1.4226348364279398,
|
|
"grad_norm": 0.6282655364858861,
|
|
"learning_rate": 3.872735015843435e-05,
|
|
"loss": 0.5369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569165825843811,
|
|
"step": 805,
|
|
"valid_targets_mean": 4159.0,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 1.431476569407604,
|
|
"grad_norm": 0.6800159245193907,
|
|
"learning_rate": 3.8696235352090827e-05,
|
|
"loss": 0.4849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21237662434577942,
|
|
"step": 810,
|
|
"valid_targets_mean": 2913.5,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 1.4403183023872679,
|
|
"grad_norm": 0.7491849980098594,
|
|
"learning_rate": 3.8664757572890624e-05,
|
|
"loss": 0.5431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31936168670654297,
|
|
"step": 815,
|
|
"valid_targets_mean": 3183.8,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 1.449160035366932,
|
|
"grad_norm": 0.6726532976813931,
|
|
"learning_rate": 3.863291743195031e-05,
|
|
"loss": 0.5368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3247930407524109,
|
|
"step": 820,
|
|
"valid_targets_mean": 5290.1,
|
|
"valid_targets_min": 1596
|
|
},
|
|
{
|
|
"epoch": 1.458001768346596,
|
|
"grad_norm": 0.6414216904520894,
|
|
"learning_rate": 3.860071554742144e-05,
|
|
"loss": 0.5517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23261283338069916,
|
|
"step": 825,
|
|
"valid_targets_mean": 2761.5,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 1.46684350132626,
|
|
"grad_norm": 0.6133136558161559,
|
|
"learning_rate": 3.856815254447854e-05,
|
|
"loss": 0.538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27468377351760864,
|
|
"step": 830,
|
|
"valid_targets_mean": 4492.1,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 1.475685234305924,
|
|
"grad_norm": 0.6535282175604957,
|
|
"learning_rate": 3.853522905530698e-05,
|
|
"loss": 0.4972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28734225034713745,
|
|
"step": 835,
|
|
"valid_targets_mean": 4208.9,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 1.4845269672855879,
|
|
"grad_norm": 0.7163769356371759,
|
|
"learning_rate": 3.850194571909065e-05,
|
|
"loss": 0.5515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21620656549930573,
|
|
"step": 840,
|
|
"valid_targets_mean": 2672.6,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 1.493368700265252,
|
|
"grad_norm": 0.6339266507899239,
|
|
"learning_rate": 3.8468303181999625e-05,
|
|
"loss": 0.5241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22579747438430786,
|
|
"step": 845,
|
|
"valid_targets_mean": 3050.4,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 1.502210433244916,
|
|
"grad_norm": 0.6670589515749927,
|
|
"learning_rate": 3.843430209717758e-05,
|
|
"loss": 0.501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25417882204055786,
|
|
"step": 850,
|
|
"valid_targets_mean": 2978.0,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 1.5110521662245802,
|
|
"grad_norm": 0.6632769562515543,
|
|
"learning_rate": 3.839994312472911e-05,
|
|
"loss": 0.5055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.244868203997612,
|
|
"step": 855,
|
|
"valid_targets_mean": 3159.6,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 1.5198938992042441,
|
|
"grad_norm": 0.7476673252142214,
|
|
"learning_rate": 3.836522693170692e-05,
|
|
"loss": 0.5333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.322563499212265,
|
|
"step": 860,
|
|
"valid_targets_mean": 3074.9,
|
|
"valid_targets_min": 1925
|
|
},
|
|
{
|
|
"epoch": 1.528735632183908,
|
|
"grad_norm": 1.182699133656073,
|
|
"learning_rate": 3.833015419209888e-05,
|
|
"loss": 0.5195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.233692467212677,
|
|
"step": 865,
|
|
"valid_targets_mean": 2652.8,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 1.537577365163572,
|
|
"grad_norm": 0.5963050788873372,
|
|
"learning_rate": 3.8294725586814925e-05,
|
|
"loss": 0.5379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31656789779663086,
|
|
"step": 870,
|
|
"valid_targets_mean": 4504.1,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 1.546419098143236,
|
|
"grad_norm": 0.7076807714391373,
|
|
"learning_rate": 3.8258941803673845e-05,
|
|
"loss": 0.4895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21023747324943542,
|
|
"step": 875,
|
|
"valid_targets_mean": 2384.8,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 1.5552608311229,
|
|
"grad_norm": 0.7242194753984217,
|
|
"learning_rate": 3.822280353738995e-05,
|
|
"loss": 0.5278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15555477142333984,
|
|
"step": 880,
|
|
"valid_targets_mean": 1807.2,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 1.564102564102564,
|
|
"grad_norm": 0.680725926895947,
|
|
"learning_rate": 3.818631148955954e-05,
|
|
"loss": 0.5183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23022373020648956,
|
|
"step": 885,
|
|
"valid_targets_mean": 3061.0,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 1.5729442970822283,
|
|
"grad_norm": 0.6589551518144537,
|
|
"learning_rate": 3.814946636864732e-05,
|
|
"loss": 0.5016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3253025710582733,
|
|
"step": 890,
|
|
"valid_targets_mean": 4214.9,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.5817860300618922,
|
|
"grad_norm": 0.695071470979805,
|
|
"learning_rate": 3.8112268889972635e-05,
|
|
"loss": 0.5002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27869266271591187,
|
|
"step": 895,
|
|
"valid_targets_mean": 3458.9,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 1.5906277630415562,
|
|
"grad_norm": 0.6394253917274505,
|
|
"learning_rate": 3.807471977569558e-05,
|
|
"loss": 0.5247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28981101512908936,
|
|
"step": 900,
|
|
"valid_targets_mean": 3587.0,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.5994694960212201,
|
|
"grad_norm": 0.6407279388785145,
|
|
"learning_rate": 3.803681975480298e-05,
|
|
"loss": 0.5339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30930954217910767,
|
|
"step": 905,
|
|
"valid_targets_mean": 4241.2,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 1.608311229000884,
|
|
"grad_norm": 0.6262370337590383,
|
|
"learning_rate": 3.799856956309424e-05,
|
|
"loss": 0.5043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15924446284770966,
|
|
"step": 910,
|
|
"valid_targets_mean": 2515.5,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 1.617152961980548,
|
|
"grad_norm": 0.5799667981131503,
|
|
"learning_rate": 3.7959969943167064e-05,
|
|
"loss": 0.4945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2013358175754547,
|
|
"step": 915,
|
|
"valid_targets_mean": 3255.0,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 1.6259946949602122,
|
|
"grad_norm": 0.7839747219704393,
|
|
"learning_rate": 3.7921021644403024e-05,
|
|
"loss": 0.513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39778536558151245,
|
|
"step": 920,
|
|
"valid_targets_mean": 3479.8,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 1.6348364279398764,
|
|
"grad_norm": 1.0927382964698964,
|
|
"learning_rate": 3.788172542295303e-05,
|
|
"loss": 0.4882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3143307566642761,
|
|
"step": 925,
|
|
"valid_targets_mean": 3410.4,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 1.6436781609195403,
|
|
"grad_norm": 0.71590844445193,
|
|
"learning_rate": 3.784208204172262e-05,
|
|
"loss": 0.4901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3356863856315613,
|
|
"step": 930,
|
|
"valid_targets_mean": 3325.4,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 1.6525198938992043,
|
|
"grad_norm": 0.6973512644137526,
|
|
"learning_rate": 3.7802092270357196e-05,
|
|
"loss": 0.5329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.272472620010376,
|
|
"step": 935,
|
|
"valid_targets_mean": 2601.2,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 1.6613616268788682,
|
|
"grad_norm": 0.6008785059006879,
|
|
"learning_rate": 3.7761756885227046e-05,
|
|
"loss": 0.491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2572758197784424,
|
|
"step": 940,
|
|
"valid_targets_mean": 4301.5,
|
|
"valid_targets_min": 2006
|
|
},
|
|
{
|
|
"epoch": 1.6702033598585322,
|
|
"grad_norm": 0.5744209498415186,
|
|
"learning_rate": 3.772107666941226e-05,
|
|
"loss": 0.5167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26439422369003296,
|
|
"step": 945,
|
|
"valid_targets_mean": 4495.4,
|
|
"valid_targets_min": 2496
|
|
},
|
|
{
|
|
"epoch": 1.6790450928381961,
|
|
"grad_norm": 0.6465740308526822,
|
|
"learning_rate": 3.768005241268757e-05,
|
|
"loss": 0.5639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25202465057373047,
|
|
"step": 950,
|
|
"valid_targets_mean": 3123.2,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 1.6878868258178603,
|
|
"grad_norm": 0.7051342522734189,
|
|
"learning_rate": 3.7638684911506993e-05,
|
|
"loss": 0.5478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1941746175289154,
|
|
"step": 955,
|
|
"valid_targets_mean": 2783.6,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 1.6967285587975243,
|
|
"grad_norm": 0.6946362639039605,
|
|
"learning_rate": 3.7596974968988366e-05,
|
|
"loss": 0.4773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674318552017212,
|
|
"step": 960,
|
|
"valid_targets_mean": 2750.2,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 1.7055702917771884,
|
|
"grad_norm": 0.6323893584576677,
|
|
"learning_rate": 3.755492339489775e-05,
|
|
"loss": 0.5122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2906394600868225,
|
|
"step": 965,
|
|
"valid_targets_mean": 4283.9,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 1.7144120247568524,
|
|
"grad_norm": 0.7054390689371448,
|
|
"learning_rate": 3.751253100563373e-05,
|
|
"loss": 0.4883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21445819735527039,
|
|
"step": 970,
|
|
"valid_targets_mean": 2264.2,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 1.7232537577365163,
|
|
"grad_norm": 0.7148446633987146,
|
|
"learning_rate": 3.746979862421154e-05,
|
|
"loss": 0.492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3135313391685486,
|
|
"step": 975,
|
|
"valid_targets_mean": 4694.2,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 1.7320954907161803,
|
|
"grad_norm": 0.7119677167199814,
|
|
"learning_rate": 3.742672708024711e-05,
|
|
"loss": 0.5171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30675750970840454,
|
|
"step": 980,
|
|
"valid_targets_mean": 2838.9,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.7409372236958442,
|
|
"grad_norm": 0.6482828478440643,
|
|
"learning_rate": 3.7383317209940936e-05,
|
|
"loss": 0.5151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1551651656627655,
|
|
"step": 985,
|
|
"valid_targets_mean": 1954.4,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 1.7497789566755084,
|
|
"grad_norm": 0.7314955066133645,
|
|
"learning_rate": 3.7339569856061864e-05,
|
|
"loss": 0.5084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22127586603164673,
|
|
"step": 990,
|
|
"valid_targets_mean": 2476.5,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 1.7586206896551724,
|
|
"grad_norm": 1.098815890414233,
|
|
"learning_rate": 3.7295485867930715e-05,
|
|
"loss": 0.509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23905551433563232,
|
|
"step": 995,
|
|
"valid_targets_mean": 2440.1,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 1.7674624226348365,
|
|
"grad_norm": 0.6764041908437947,
|
|
"learning_rate": 3.72510661014038e-05,
|
|
"loss": 0.5291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24814735352993011,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3079.6,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 1.7763041556145005,
|
|
"grad_norm": 0.5186517018593776,
|
|
"learning_rate": 3.720631141885633e-05,
|
|
"loss": 0.5058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20242926478385925,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4913.6,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 1.7851458885941645,
|
|
"grad_norm": 0.7772370190529831,
|
|
"learning_rate": 3.716122268916561e-05,
|
|
"loss": 0.4809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603060007095337,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3256.9,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 1.7939876215738284,
|
|
"grad_norm": 0.5469582973910361,
|
|
"learning_rate": 3.711580078769424e-05,
|
|
"loss": 0.5089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27079907059669495,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4628.6,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 1.8028293545534924,
|
|
"grad_norm": 0.6974198275003209,
|
|
"learning_rate": 3.707004659627308e-05,
|
|
"loss": 0.4978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31995758414268494,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3138.6,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 1.8116710875331565,
|
|
"grad_norm": 0.6920590455597191,
|
|
"learning_rate": 3.702396100318414e-05,
|
|
"loss": 0.5847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28330913186073303,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3097.5,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 1.8205128205128205,
|
|
"grad_norm": 0.7496027840560946,
|
|
"learning_rate": 3.697754490314335e-05,
|
|
"loss": 0.5644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2400278002023697,
|
|
"step": 1030,
|
|
"valid_targets_mean": 2275.6,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 1.8293545534924847,
|
|
"grad_norm": 0.6856958367471065,
|
|
"learning_rate": 3.693079919728314e-05,
|
|
"loss": 0.4957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2846917510032654,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3595.0,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 1.8381962864721486,
|
|
"grad_norm": 0.5935125821966083,
|
|
"learning_rate": 3.688372479313503e-05,
|
|
"loss": 0.5339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36108407378196716,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4035.4,
|
|
"valid_targets_min": 1791
|
|
},
|
|
{
|
|
"epoch": 1.8470380194518126,
|
|
"grad_norm": 0.7416423484223358,
|
|
"learning_rate": 3.683632260461191e-05,
|
|
"loss": 0.5628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22433266043663025,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3904.5,
|
|
"valid_targets_min": 1328
|
|
},
|
|
{
|
|
"epoch": 1.8558797524314765,
|
|
"grad_norm": 0.6245382430385877,
|
|
"learning_rate": 3.6788593551990395e-05,
|
|
"loss": 0.4787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25612908601760864,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3262.2,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 1.8647214854111405,
|
|
"grad_norm": 0.6171431811555288,
|
|
"learning_rate": 3.674053856189289e-05,
|
|
"loss": 0.5058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35327455401420593,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4502.4,
|
|
"valid_targets_min": 2273
|
|
},
|
|
{
|
|
"epoch": 1.8735632183908046,
|
|
"grad_norm": 0.6939023212762249,
|
|
"learning_rate": 3.66921585672696e-05,
|
|
"loss": 0.5145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15852488577365875,
|
|
"step": 1060,
|
|
"valid_targets_mean": 1888.6,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 1.8824049513704686,
|
|
"grad_norm": 0.7982037605123552,
|
|
"learning_rate": 3.664345450738048e-05,
|
|
"loss": 0.4853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16701588034629822,
|
|
"step": 1065,
|
|
"valid_targets_mean": 1619.1,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 1.8912466843501328,
|
|
"grad_norm": 0.608658314659487,
|
|
"learning_rate": 3.659442732777694e-05,
|
|
"loss": 0.5163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004249930381775,
|
|
"step": 1070,
|
|
"valid_targets_mean": 5308.4,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 1.9000884173297967,
|
|
"grad_norm": 0.7906418390790385,
|
|
"learning_rate": 3.6545077980283516e-05,
|
|
"loss": 0.5004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33155688643455505,
|
|
"step": 1075,
|
|
"valid_targets_mean": 2850.4,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 1.9089301503094607,
|
|
"grad_norm": 0.7032647160707013,
|
|
"learning_rate": 3.649540742297937e-05,
|
|
"loss": 0.5095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2129090428352356,
|
|
"step": 1080,
|
|
"valid_targets_mean": 2458.6,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 1.9177718832891246,
|
|
"grad_norm": 0.6238133398192949,
|
|
"learning_rate": 3.6445416620179715e-05,
|
|
"loss": 0.4988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.209529310464859,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3219.8,
|
|
"valid_targets_min": 1912
|
|
},
|
|
{
|
|
"epoch": 1.9266136162687886,
|
|
"grad_norm": 0.7568137310829532,
|
|
"learning_rate": 3.6395106542417066e-05,
|
|
"loss": 0.5155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3271511495113373,
|
|
"step": 1090,
|
|
"valid_targets_mean": 2758.9,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 1.9354553492484527,
|
|
"grad_norm": 0.6192848137574003,
|
|
"learning_rate": 3.6344478166422435e-05,
|
|
"loss": 0.4981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642071545124054,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3695.9,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 1.9442970822281167,
|
|
"grad_norm": 0.7309050802016251,
|
|
"learning_rate": 3.6293532475106325e-05,
|
|
"loss": 0.482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29965919256210327,
|
|
"step": 1100,
|
|
"valid_targets_mean": 2750.0,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 1.9531388152077809,
|
|
"grad_norm": 1.0623788457569725,
|
|
"learning_rate": 3.6242270457539695e-05,
|
|
"loss": 0.5106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2692793607711792,
|
|
"step": 1105,
|
|
"valid_targets_mean": 2734.0,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 1.9619805481874448,
|
|
"grad_norm": 0.7151768641889232,
|
|
"learning_rate": 3.6190693108934685e-05,
|
|
"loss": 0.4966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22051319479942322,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2504.2,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 1.9708222811671088,
|
|
"grad_norm": 0.6957676386448085,
|
|
"learning_rate": 3.613880143062539e-05,
|
|
"loss": 0.5274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3472147583961487,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4914.9,
|
|
"valid_targets_min": 2074
|
|
},
|
|
{
|
|
"epoch": 1.9796640141467727,
|
|
"grad_norm": 0.5806190920195576,
|
|
"learning_rate": 3.6086596430048355e-05,
|
|
"loss": 0.544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17797411978244781,
|
|
"step": 1120,
|
|
"valid_targets_mean": 2610.4,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 1.9885057471264367,
|
|
"grad_norm": 0.79939207014776,
|
|
"learning_rate": 3.603407912072303e-05,
|
|
"loss": 0.4664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3026364743709564,
|
|
"step": 1125,
|
|
"valid_targets_mean": 2167.8,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 1.9973474801061006,
|
|
"grad_norm": 0.6158400263989293,
|
|
"learning_rate": 3.598125052223209e-05,
|
|
"loss": 0.4927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575317621231079,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3856.8,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 2.0053050397877983,
|
|
"grad_norm": 0.6506509574828292,
|
|
"learning_rate": 3.592811166020166e-05,
|
|
"loss": 0.461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540547847747803,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3808.8,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 2.0141467727674622,
|
|
"grad_norm": 0.7334124808950342,
|
|
"learning_rate": 3.5874663566281386e-05,
|
|
"loss": 0.4984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2256934642791748,
|
|
"step": 1140,
|
|
"valid_targets_mean": 2785.5,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 2.0229885057471266,
|
|
"grad_norm": 0.5825400538253857,
|
|
"learning_rate": 3.582090727812441e-05,
|
|
"loss": 0.4376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22117963433265686,
|
|
"step": 1145,
|
|
"valid_targets_mean": 5034.1,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 2.0318302387267906,
|
|
"grad_norm": 0.7871465022788394,
|
|
"learning_rate": 3.576684383936721e-05,
|
|
"loss": 0.4825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28266459703445435,
|
|
"step": 1150,
|
|
"valid_targets_mean": 2724.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 2.0406719717064545,
|
|
"grad_norm": 0.6415857615517944,
|
|
"learning_rate": 3.571247429960939e-05,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30939337611198425,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4437.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.0495137046861185,
|
|
"grad_norm": 0.6177273757478585,
|
|
"learning_rate": 3.5657799714393226e-05,
|
|
"loss": 0.4842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17025458812713623,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3223.8,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 2.0583554376657824,
|
|
"grad_norm": 0.833706903246219,
|
|
"learning_rate": 3.560282114518324e-05,
|
|
"loss": 0.4675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26239508390426636,
|
|
"step": 1165,
|
|
"valid_targets_mean": 2714.1,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 2.0671971706454464,
|
|
"grad_norm": 0.7225359837532538,
|
|
"learning_rate": 3.554753965934556e-05,
|
|
"loss": 0.4865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35689157247543335,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4220.5,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 2.0760389036251103,
|
|
"grad_norm": 0.7784056460093122,
|
|
"learning_rate": 3.5491956330127206e-05,
|
|
"loss": 0.5007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3038363456726074,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3015.6,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 2.0848806366047747,
|
|
"grad_norm": 0.6565097352886429,
|
|
"learning_rate": 3.543607223663524e-05,
|
|
"loss": 0.4824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24286240339279175,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3437.2,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 2.0937223695844387,
|
|
"grad_norm": 0.6492970939073816,
|
|
"learning_rate": 3.537988846381585e-05,
|
|
"loss": 0.4959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.303530752658844,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4899.6,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 2.1025641025641026,
|
|
"grad_norm": 0.7925259880545755,
|
|
"learning_rate": 3.532340610243325e-05,
|
|
"loss": 0.5028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27291086316108704,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3204.2,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 2.1114058355437666,
|
|
"grad_norm": 0.6589724671449128,
|
|
"learning_rate": 3.526662624904852e-05,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2381266951560974,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3589.5,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 2.1202475685234305,
|
|
"grad_norm": 0.7528452910033151,
|
|
"learning_rate": 3.5209550005998314e-05,
|
|
"loss": 0.4621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670268714427948,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3119.5,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 2.1290893015030945,
|
|
"grad_norm": 0.5446738881024474,
|
|
"learning_rate": 3.515217848137347e-05,
|
|
"loss": 0.5064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26400917768478394,
|
|
"step": 1205,
|
|
"valid_targets_mean": 5657.0,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 2.1379310344827585,
|
|
"grad_norm": 0.8894408640488581,
|
|
"learning_rate": 3.509451278899748e-05,
|
|
"loss": 0.4645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17728659510612488,
|
|
"step": 1210,
|
|
"valid_targets_mean": 2654.2,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 2.146772767462423,
|
|
"grad_norm": 0.7198826676958434,
|
|
"learning_rate": 3.503655404840488e-05,
|
|
"loss": 0.4721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26327475905418396,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3134.6,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 2.155614500442087,
|
|
"grad_norm": 0.8407871800228818,
|
|
"learning_rate": 3.497830338481949e-05,
|
|
"loss": 0.4882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.295600950717926,
|
|
"step": 1220,
|
|
"valid_targets_mean": 2603.4,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 2.1644562334217508,
|
|
"grad_norm": 0.6328345124649135,
|
|
"learning_rate": 3.491976192913262e-05,
|
|
"loss": 0.4497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22263498604297638,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3946.1,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 2.1732979664014147,
|
|
"grad_norm": 0.7203331611468773,
|
|
"learning_rate": 3.486093081788106e-05,
|
|
"loss": 0.4413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2181500494480133,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2403.6,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 2.1821396993810787,
|
|
"grad_norm": 1.2624507413601598,
|
|
"learning_rate": 3.480181119322504e-05,
|
|
"loss": 0.5526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21810586750507355,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3192.0,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 2.1909814323607426,
|
|
"grad_norm": 0.6145928910538749,
|
|
"learning_rate": 3.474240420292606e-05,
|
|
"loss": 0.4818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26425665616989136,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4262.5,
|
|
"valid_targets_min": 2190
|
|
},
|
|
{
|
|
"epoch": 2.1998231653404066,
|
|
"grad_norm": 0.7136638714717287,
|
|
"learning_rate": 3.4682711000324586e-05,
|
|
"loss": 0.4567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22414344549179077,
|
|
"step": 1245,
|
|
"valid_targets_mean": 3129.0,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 2.2086648983200705,
|
|
"grad_norm": 0.6530586834174523,
|
|
"learning_rate": 3.462273274431769e-05,
|
|
"loss": 0.4573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845330536365509,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3753.6,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 2.217506631299735,
|
|
"grad_norm": 0.6049021187332466,
|
|
"learning_rate": 3.456247059933653e-05,
|
|
"loss": 0.4824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23369532823562622,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3603.4,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 2.226348364279399,
|
|
"grad_norm": 0.7452682692760721,
|
|
"learning_rate": 3.450192573532373e-05,
|
|
"loss": 0.5275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37186628580093384,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3171.0,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 2.235190097259063,
|
|
"grad_norm": 0.6365702923616391,
|
|
"learning_rate": 3.444109932771072e-05,
|
|
"loss": 0.4654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261175274848938,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4370.4,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 2.2440318302387268,
|
|
"grad_norm": 0.5681744054075202,
|
|
"learning_rate": 3.4379992557394846e-05,
|
|
"loss": 0.5126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20251402258872986,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4562.1,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 2.2528735632183907,
|
|
"grad_norm": 0.7410762502762586,
|
|
"learning_rate": 3.4318606610716484e-05,
|
|
"loss": 0.5167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20561201870441437,
|
|
"step": 1275,
|
|
"valid_targets_mean": 2180.5,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 2.2617152961980547,
|
|
"grad_norm": 0.5452558320658369,
|
|
"learning_rate": 3.4256942679436015e-05,
|
|
"loss": 0.4922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19533419609069824,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3569.5,
|
|
"valid_targets_min": 1534
|
|
},
|
|
{
|
|
"epoch": 2.270557029177719,
|
|
"grad_norm": 0.7468519348908569,
|
|
"learning_rate": 3.419500196071067e-05,
|
|
"loss": 0.4754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2667611241340637,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2884.8,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 2.279398762157383,
|
|
"grad_norm": 0.6333342302390959,
|
|
"learning_rate": 3.413278565707128e-05,
|
|
"loss": 0.4993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15673427283763885,
|
|
"step": 1290,
|
|
"valid_targets_mean": 2277.5,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 2.288240495137047,
|
|
"grad_norm": 0.6520958194331384,
|
|
"learning_rate": 3.407029497639896e-05,
|
|
"loss": 0.4577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2410448044538498,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3485.1,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 2.297082228116711,
|
|
"grad_norm": 0.7282805052878496,
|
|
"learning_rate": 3.400753113190164e-05,
|
|
"loss": 0.5239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.356065034866333,
|
|
"step": 1300,
|
|
"valid_targets_mean": 2978.1,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 2.305923961096375,
|
|
"grad_norm": 0.7494491120423273,
|
|
"learning_rate": 3.394449534209049e-05,
|
|
"loss": 0.475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870040535926819,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3106.1,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 2.314765694076039,
|
|
"grad_norm": 0.7866263743420345,
|
|
"learning_rate": 3.388118883075632e-05,
|
|
"loss": 0.4645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3140355348587036,
|
|
"step": 1310,
|
|
"valid_targets_mean": 2766.9,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 2.323607427055703,
|
|
"grad_norm": 0.6490756330304519,
|
|
"learning_rate": 3.381761282694576e-05,
|
|
"loss": 0.4587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1746228039264679,
|
|
"step": 1315,
|
|
"valid_targets_mean": 1878.6,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 2.3324491600353667,
|
|
"grad_norm": 0.5903200212592301,
|
|
"learning_rate": 3.375376856493744e-05,
|
|
"loss": 0.474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2511587142944336,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4894.8,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 2.341290893015031,
|
|
"grad_norm": 2.254093889370584,
|
|
"learning_rate": 3.368965728421802e-05,
|
|
"loss": 0.4457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.161946639418602,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2834.8,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 2.350132625994695,
|
|
"grad_norm": 0.9142370689095928,
|
|
"learning_rate": 3.362528022945811e-05,
|
|
"loss": 0.4635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2488413155078888,
|
|
"step": 1330,
|
|
"valid_targets_mean": 1759.2,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 2.358974358974359,
|
|
"grad_norm": 0.6231014207273252,
|
|
"learning_rate": 3.3560638650488116e-05,
|
|
"loss": 0.481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24347589910030365,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3643.8,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 2.367816091954023,
|
|
"grad_norm": 0.7798321013057438,
|
|
"learning_rate": 3.3495733802273973e-05,
|
|
"loss": 0.4947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27684855461120605,
|
|
"step": 1340,
|
|
"valid_targets_mean": 2807.4,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 2.376657824933687,
|
|
"grad_norm": 0.6835229910495109,
|
|
"learning_rate": 3.3430566944892797e-05,
|
|
"loss": 0.4769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20981459319591522,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2786.4,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 2.385499557913351,
|
|
"grad_norm": 0.7027687967506704,
|
|
"learning_rate": 3.3365139343508394e-05,
|
|
"loss": 0.5036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24709592759609222,
|
|
"step": 1350,
|
|
"valid_targets_mean": 2751.4,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 2.394341290893015,
|
|
"grad_norm": 0.6262006292592385,
|
|
"learning_rate": 3.329945226834672e-05,
|
|
"loss": 0.4645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18077601492404938,
|
|
"step": 1355,
|
|
"valid_targets_mean": 2843.1,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 2.4031830238726792,
|
|
"grad_norm": 0.6277485840526359,
|
|
"learning_rate": 3.3233506994671226e-05,
|
|
"loss": 0.4951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642451524734497,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3429.4,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 2.412024756852343,
|
|
"grad_norm": 0.6280596559682324,
|
|
"learning_rate": 3.316730480275804e-05,
|
|
"loss": 0.4464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22805391252040863,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3516.9,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 2.420866489832007,
|
|
"grad_norm": 0.6174264929077061,
|
|
"learning_rate": 3.3100846977871205e-05,
|
|
"loss": 0.4913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2780839502811432,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4372.0,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 2.429708222811671,
|
|
"grad_norm": 0.656729892977582,
|
|
"learning_rate": 3.303413481023767e-05,
|
|
"loss": 0.4994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19668014347553253,
|
|
"step": 1375,
|
|
"valid_targets_mean": 2811.8,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 2.438549955791335,
|
|
"grad_norm": 0.7226257651578697,
|
|
"learning_rate": 3.296716959502222e-05,
|
|
"loss": 0.4784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22153441607952118,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2547.9,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 2.447391688770999,
|
|
"grad_norm": 0.7651983874835208,
|
|
"learning_rate": 3.2899952632302406e-05,
|
|
"loss": 0.4922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2565222978591919,
|
|
"step": 1385,
|
|
"valid_targets_mean": 2316.4,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 2.456233421750663,
|
|
"grad_norm": 0.629453575504003,
|
|
"learning_rate": 3.2832485227043224e-05,
|
|
"loss": 0.4593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19831818342208862,
|
|
"step": 1390,
|
|
"valid_targets_mean": 3157.9,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 2.465075154730327,
|
|
"grad_norm": 0.6047254462995167,
|
|
"learning_rate": 3.276476868907185e-05,
|
|
"loss": 0.4621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29224517941474915,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4613.8,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 2.4739168877099913,
|
|
"grad_norm": 0.7333279372759346,
|
|
"learning_rate": 3.2696804333052134e-05,
|
|
"loss": 0.497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1974330097436905,
|
|
"step": 1400,
|
|
"valid_targets_mean": 2125.1,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 2.4827586206896552,
|
|
"grad_norm": 0.7776273687671615,
|
|
"learning_rate": 3.2628593478459166e-05,
|
|
"loss": 0.5055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2529323399066925,
|
|
"step": 1405,
|
|
"valid_targets_mean": 2489.1,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 2.491600353669319,
|
|
"grad_norm": 0.7273539498207023,
|
|
"learning_rate": 3.256013744955359e-05,
|
|
"loss": 0.4718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30455833673477173,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3809.0,
|
|
"valid_targets_min": 1703
|
|
},
|
|
{
|
|
"epoch": 2.500442086648983,
|
|
"grad_norm": 0.564269031629325,
|
|
"learning_rate": 3.249143757535593e-05,
|
|
"loss": 0.4583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21314361691474915,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3426.6,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 2.509283819628647,
|
|
"grad_norm": 0.5557251931105729,
|
|
"learning_rate": 3.242249518962075e-05,
|
|
"loss": 0.4994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18279360234737396,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3213.4,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 2.5181255526083115,
|
|
"grad_norm": 0.550481771742281,
|
|
"learning_rate": 3.2353311630810824e-05,
|
|
"loss": 0.4564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30488190054893494,
|
|
"step": 1425,
|
|
"valid_targets_mean": 5551.5,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 2.5269672855879755,
|
|
"grad_norm": 0.7806815136684526,
|
|
"learning_rate": 3.2283888242071084e-05,
|
|
"loss": 0.4889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23620276153087616,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2326.1,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 2.5358090185676394,
|
|
"grad_norm": 0.6636248793061827,
|
|
"learning_rate": 3.221422637120259e-05,
|
|
"loss": 0.4679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1364372968673706,
|
|
"step": 1435,
|
|
"valid_targets_mean": 1784.4,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 2.5446507515473034,
|
|
"grad_norm": 0.689385789889328,
|
|
"learning_rate": 3.2144327370636346e-05,
|
|
"loss": 0.454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20073749125003815,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3205.1,
|
|
"valid_targets_min": 381
|
|
},
|
|
{
|
|
"epoch": 2.5534924845269673,
|
|
"grad_norm": 0.6328008971941544,
|
|
"learning_rate": 3.2074192597407035e-05,
|
|
"loss": 0.4613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3022666573524475,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4333.4,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 2.5623342175066313,
|
|
"grad_norm": 0.59131850878343,
|
|
"learning_rate": 3.200382341312669e-05,
|
|
"loss": 0.4492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17799311876296997,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3369.5,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 2.571175950486295,
|
|
"grad_norm": 0.7805466539055157,
|
|
"learning_rate": 3.193322118395825e-05,
|
|
"loss": 0.4892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27352964878082275,
|
|
"step": 1455,
|
|
"valid_targets_mean": 2186.5,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 2.580017683465959,
|
|
"grad_norm": 0.6876454265526983,
|
|
"learning_rate": 3.1862387280589035e-05,
|
|
"loss": 0.4779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3268851041793823,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3722.8,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 2.588859416445623,
|
|
"grad_norm": 0.6736850428355156,
|
|
"learning_rate": 3.179132307820415e-05,
|
|
"loss": 0.4415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23798906803131104,
|
|
"step": 1465,
|
|
"valid_targets_mean": 2838.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 2.5977011494252875,
|
|
"grad_norm": 0.596929830688196,
|
|
"learning_rate": 3.1720029956459765e-05,
|
|
"loss": 0.4591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674519419670105,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4175.8,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 2.6065428824049515,
|
|
"grad_norm": 0.5748031569774971,
|
|
"learning_rate": 3.1648509299456354e-05,
|
|
"loss": 0.4262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2019733488559723,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3431.9,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 2.6153846153846154,
|
|
"grad_norm": 0.6572317898083503,
|
|
"learning_rate": 3.15767624957118e-05,
|
|
"loss": 0.5071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3238973915576935,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4004.2,
|
|
"valid_targets_min": 1921
|
|
},
|
|
{
|
|
"epoch": 2.6242263483642794,
|
|
"grad_norm": 0.6374727405441111,
|
|
"learning_rate": 3.150479093813444e-05,
|
|
"loss": 0.4698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257358580827713,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3410.9,
|
|
"valid_targets_min": 1841
|
|
},
|
|
{
|
|
"epoch": 2.6330680813439433,
|
|
"grad_norm": 0.7370301376235759,
|
|
"learning_rate": 3.143259602399605e-05,
|
|
"loss": 0.4813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24704976379871368,
|
|
"step": 1490,
|
|
"valid_targets_mean": 2845.5,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 2.6419098143236073,
|
|
"grad_norm": 0.6458521957650634,
|
|
"learning_rate": 3.13601791549047e-05,
|
|
"loss": 0.4572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21648521721363068,
|
|
"step": 1495,
|
|
"valid_targets_mean": 2931.2,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 2.6507515473032717,
|
|
"grad_norm": 0.6857098017912163,
|
|
"learning_rate": 3.128754173677753e-05,
|
|
"loss": 0.4732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17468172311782837,
|
|
"step": 1500,
|
|
"valid_targets_mean": 2179.5,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 2.6595932802829356,
|
|
"grad_norm": 0.7760295420887048,
|
|
"learning_rate": 3.121468517981348e-05,
|
|
"loss": 0.4793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2126651257276535,
|
|
"step": 1505,
|
|
"valid_targets_mean": 2069.2,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 2.6684350132625996,
|
|
"grad_norm": 0.7256165385608961,
|
|
"learning_rate": 3.1141610898465886e-05,
|
|
"loss": 0.4586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24016624689102173,
|
|
"step": 1510,
|
|
"valid_targets_mean": 2405.6,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 2.6772767462422635,
|
|
"grad_norm": 0.8041492470671632,
|
|
"learning_rate": 3.106832031141505e-05,
|
|
"loss": 0.4882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2537711560726166,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2232.1,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 2.6861184792219275,
|
|
"grad_norm": 0.6768740030703516,
|
|
"learning_rate": 3.099481484154066e-05,
|
|
"loss": 0.4694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2770196199417114,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3394.1,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 2.6949602122015914,
|
|
"grad_norm": 0.7614885023952586,
|
|
"learning_rate": 3.092109591589421e-05,
|
|
"loss": 0.47,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24348753690719604,
|
|
"step": 1525,
|
|
"valid_targets_mean": 1985.0,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 2.7038019451812554,
|
|
"grad_norm": 0.6973530090116009,
|
|
"learning_rate": 3.084716496567125e-05,
|
|
"loss": 0.4691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2216261327266693,
|
|
"step": 1530,
|
|
"valid_targets_mean": 2607.2,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 2.7126436781609193,
|
|
"grad_norm": 0.6643269820883152,
|
|
"learning_rate": 3.0773023426183646e-05,
|
|
"loss": 0.5029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21378345787525177,
|
|
"step": 1535,
|
|
"valid_targets_mean": 2372.9,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 2.7214854111405833,
|
|
"grad_norm": 0.6302383408350807,
|
|
"learning_rate": 3.069867273683166e-05,
|
|
"loss": 0.4813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18604375422000885,
|
|
"step": 1540,
|
|
"valid_targets_mean": 2405.8,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 2.7303271441202477,
|
|
"grad_norm": 0.7402043292567106,
|
|
"learning_rate": 3.062411434107607e-05,
|
|
"loss": 0.4961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27701494097709656,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3043.9,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 2.7391688770999116,
|
|
"grad_norm": 0.7333854416465081,
|
|
"learning_rate": 3.0549349686410086e-05,
|
|
"loss": 0.4807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1735975593328476,
|
|
"step": 1550,
|
|
"valid_targets_mean": 1787.8,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 2.7480106100795756,
|
|
"grad_norm": 0.5787318917413554,
|
|
"learning_rate": 3.0474380224331292e-05,
|
|
"loss": 0.4601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26284363865852356,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3658.6,
|
|
"valid_targets_min": 1467
|
|
},
|
|
{
|
|
"epoch": 2.7568523430592395,
|
|
"grad_norm": 0.5153363716994127,
|
|
"learning_rate": 3.039920741031342e-05,
|
|
"loss": 0.452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1932397484779358,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3985.8,
|
|
"valid_targets_min": 2070
|
|
},
|
|
{
|
|
"epoch": 2.7656940760389035,
|
|
"grad_norm": 0.7429137625185662,
|
|
"learning_rate": 3.0323832703778168e-05,
|
|
"loss": 0.477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2431662678718567,
|
|
"step": 1565,
|
|
"valid_targets_mean": 2817.0,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 2.774535809018568,
|
|
"grad_norm": 0.6481250155554005,
|
|
"learning_rate": 3.024825756806677e-05,
|
|
"loss": 0.4784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212037980556488,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3536.4,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 2.783377541998232,
|
|
"grad_norm": 0.5946835818022652,
|
|
"learning_rate": 3.017248347041168e-05,
|
|
"loss": 0.4674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17257541418075562,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3216.1,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 2.792219274977896,
|
|
"grad_norm": 0.7079495479617642,
|
|
"learning_rate": 3.0096511881908007e-05,
|
|
"loss": 0.476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25472956895828247,
|
|
"step": 1580,
|
|
"valid_targets_mean": 2929.1,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 2.8010610079575597,
|
|
"grad_norm": 0.6686876235075944,
|
|
"learning_rate": 3.0020344277485017e-05,
|
|
"loss": 0.48,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24332070350646973,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3509.1,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 2.8099027409372237,
|
|
"grad_norm": 0.5961265521760359,
|
|
"learning_rate": 2.994398213587746e-05,
|
|
"loss": 0.4652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16399900615215302,
|
|
"step": 1590,
|
|
"valid_targets_mean": 2690.2,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 2.8187444739168876,
|
|
"grad_norm": 0.6932732714841346,
|
|
"learning_rate": 2.9867426939596876e-05,
|
|
"loss": 0.4838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23998896777629852,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2742.6,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 2.8275862068965516,
|
|
"grad_norm": 0.6373428022728593,
|
|
"learning_rate": 2.9790680174902818e-05,
|
|
"loss": 0.4958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28616029024124146,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3503.4,
|
|
"valid_targets_min": 1984
|
|
},
|
|
{
|
|
"epoch": 2.8364279398762156,
|
|
"grad_norm": 0.5701727639748247,
|
|
"learning_rate": 2.971374333177398e-05,
|
|
"loss": 0.4531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2033827304840088,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3583.5,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 2.8452696728558795,
|
|
"grad_norm": 0.6938709515640729,
|
|
"learning_rate": 2.963661790387928e-05,
|
|
"loss": 0.4687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16415715217590332,
|
|
"step": 1610,
|
|
"valid_targets_mean": 1516.6,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 2.854111405835544,
|
|
"grad_norm": 0.6819822107535831,
|
|
"learning_rate": 2.9559305388548884e-05,
|
|
"loss": 0.4605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30489426851272583,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3739.5,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 2.862953138815208,
|
|
"grad_norm": 0.6256616922874445,
|
|
"learning_rate": 2.948180728674508e-05,
|
|
"loss": 0.4769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562219798564911,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3812.9,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 2.871794871794872,
|
|
"grad_norm": 0.6400236041741094,
|
|
"learning_rate": 2.9404125103033207e-05,
|
|
"loss": 0.4986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29524898529052734,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4371.4,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 2.8806366047745358,
|
|
"grad_norm": 0.6341608498880749,
|
|
"learning_rate": 2.9326260345552384e-05,
|
|
"loss": 0.4532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2372559905052185,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3315.1,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 2.8894783377541997,
|
|
"grad_norm": 0.6276072795012839,
|
|
"learning_rate": 2.924821452598628e-05,
|
|
"loss": 0.4596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32003140449523926,
|
|
"step": 1635,
|
|
"valid_targets_mean": 4198.9,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 2.898320070733864,
|
|
"grad_norm": 0.7616482436393485,
|
|
"learning_rate": 2.916998915953373e-05,
|
|
"loss": 0.4898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20601674914360046,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2052.5,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 2.907161803713528,
|
|
"grad_norm": 0.5882065240627146,
|
|
"learning_rate": 2.9091585764879334e-05,
|
|
"loss": 0.4942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24223053455352783,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4031.1,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 2.916003536693192,
|
|
"grad_norm": 0.6054495841065836,
|
|
"learning_rate": 2.9013005864163967e-05,
|
|
"loss": 0.4683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18661022186279297,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3359.5,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 2.924845269672856,
|
|
"grad_norm": 0.6838185112738919,
|
|
"learning_rate": 2.8934250982955245e-05,
|
|
"loss": 0.5104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21030931174755096,
|
|
"step": 1655,
|
|
"valid_targets_mean": 2463.9,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 2.93368700265252,
|
|
"grad_norm": 0.5869513062851025,
|
|
"learning_rate": 2.8855322650217878e-05,
|
|
"loss": 0.4734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2261839509010315,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4131.2,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 2.942528735632184,
|
|
"grad_norm": 0.6635908796868752,
|
|
"learning_rate": 2.877622239828402e-05,
|
|
"loss": 0.4742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128895163536072,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3485.6,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 2.951370468611848,
|
|
"grad_norm": 0.5866931808918497,
|
|
"learning_rate": 2.8696951762823482e-05,
|
|
"loss": 0.4536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27576667070388794,
|
|
"step": 1670,
|
|
"valid_targets_mean": 5840.1,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 2.9602122015915118,
|
|
"grad_norm": 0.7047207677935268,
|
|
"learning_rate": 2.8617512282813962e-05,
|
|
"loss": 0.4773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672806978225708,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3142.0,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 2.9690539345711757,
|
|
"grad_norm": 0.6805728312782533,
|
|
"learning_rate": 2.8537905500511115e-05,
|
|
"loss": 0.4409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22739487886428833,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2617.9,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 2.97789566755084,
|
|
"grad_norm": 0.6399928488222026,
|
|
"learning_rate": 2.845813296141867e-05,
|
|
"loss": 0.4841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33402326703071594,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3724.4,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 2.986737400530504,
|
|
"grad_norm": 0.657324357265603,
|
|
"learning_rate": 2.8378196214258368e-05,
|
|
"loss": 0.4642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23617523908615112,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3832.5,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 2.995579133510168,
|
|
"grad_norm": 0.7665365085077115,
|
|
"learning_rate": 2.8298096810939947e-05,
|
|
"loss": 0.4763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22332416474819183,
|
|
"step": 1695,
|
|
"valid_targets_mean": 2495.6,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 3.0035366931918657,
|
|
"grad_norm": 0.6631145203555193,
|
|
"learning_rate": 2.821783630653097e-05,
|
|
"loss": 0.4496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41785138845443726,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4816.5,
|
|
"valid_targets_min": 2915
|
|
},
|
|
{
|
|
"epoch": 3.0123784261715296,
|
|
"grad_norm": 0.6337435624348,
|
|
"learning_rate": 2.8137416259226647e-05,
|
|
"loss": 0.4547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2246091365814209,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3841.2,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 3.0212201591511936,
|
|
"grad_norm": 0.6936797200273987,
|
|
"learning_rate": 2.805683823031962e-05,
|
|
"loss": 0.4457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1735125035047531,
|
|
"step": 1710,
|
|
"valid_targets_mean": 2731.0,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 3.0300618921308575,
|
|
"grad_norm": 0.7891995315990228,
|
|
"learning_rate": 2.797610378416958e-05,
|
|
"loss": 0.4231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16492772102355957,
|
|
"step": 1715,
|
|
"valid_targets_mean": 2113.0,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 3.0389036251105215,
|
|
"grad_norm": 0.7386808391583857,
|
|
"learning_rate": 2.789521448817297e-05,
|
|
"loss": 0.4209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26257234811782837,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3744.8,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 3.047745358090186,
|
|
"grad_norm": 0.8448422789924801,
|
|
"learning_rate": 2.7814171912732505e-05,
|
|
"loss": 0.4595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2198343276977539,
|
|
"step": 1725,
|
|
"valid_targets_mean": 2466.9,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 3.05658709106985,
|
|
"grad_norm": 0.5394050393186041,
|
|
"learning_rate": 2.7732977631226707e-05,
|
|
"loss": 0.4599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1662464141845703,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3657.5,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 3.065428824049514,
|
|
"grad_norm": 0.7249259150149697,
|
|
"learning_rate": 2.7651633219979354e-05,
|
|
"loss": 0.4421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2690415382385254,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3475.8,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 3.0742705570291777,
|
|
"grad_norm": 0.5613143824306901,
|
|
"learning_rate": 2.757014025822887e-05,
|
|
"loss": 0.4157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1862303614616394,
|
|
"step": 1740,
|
|
"valid_targets_mean": 5056.2,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 3.0831122900088417,
|
|
"grad_norm": 0.6722692705575857,
|
|
"learning_rate": 2.7488500328097676e-05,
|
|
"loss": 0.4646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23648124933242798,
|
|
"step": 1745,
|
|
"valid_targets_mean": 2867.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 3.0919540229885056,
|
|
"grad_norm": 0.6814783628326924,
|
|
"learning_rate": 2.740671501456147e-05,
|
|
"loss": 0.4398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12536823749542236,
|
|
"step": 1750,
|
|
"valid_targets_mean": 1615.6,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 3.1007957559681696,
|
|
"grad_norm": 0.6898698530287688,
|
|
"learning_rate": 2.732478590541846e-05,
|
|
"loss": 0.4825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14612561464309692,
|
|
"step": 1755,
|
|
"valid_targets_mean": 2145.4,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 3.109637488947834,
|
|
"grad_norm": 0.7057545284878672,
|
|
"learning_rate": 2.7242714591258515e-05,
|
|
"loss": 0.4489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2028021216392517,
|
|
"step": 1760,
|
|
"valid_targets_mean": 2730.9,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 3.118479221927498,
|
|
"grad_norm": 0.7145345954957985,
|
|
"learning_rate": 2.716050266543233e-05,
|
|
"loss": 0.447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2143976241350174,
|
|
"step": 1765,
|
|
"valid_targets_mean": 2795.2,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 3.127320954907162,
|
|
"grad_norm": 0.6387454256981671,
|
|
"learning_rate": 2.707815172402045e-05,
|
|
"loss": 0.4511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2258991003036499,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4250.8,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 3.136162687886826,
|
|
"grad_norm": 0.6217741057104463,
|
|
"learning_rate": 2.6995663365802297e-05,
|
|
"loss": 0.4653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2483760118484497,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4210.9,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 3.14500442086649,
|
|
"grad_norm": 0.6457517067956914,
|
|
"learning_rate": 2.6913039192225143e-05,
|
|
"loss": 0.4716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32969963550567627,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4354.4,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 3.1538461538461537,
|
|
"grad_norm": 0.6429893214891368,
|
|
"learning_rate": 2.683028080737301e-05,
|
|
"loss": 0.4321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21806424856185913,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3157.8,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 3.1626878868258177,
|
|
"grad_norm": 0.692439432500108,
|
|
"learning_rate": 2.6747389817935513e-05,
|
|
"loss": 0.4697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31872475147247314,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3962.4,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 3.171529619805482,
|
|
"grad_norm": 0.655636164934992,
|
|
"learning_rate": 2.6664367833176706e-05,
|
|
"loss": 0.4435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24065600335597992,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4039.6,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 3.180371352785146,
|
|
"grad_norm": 0.6356274580947101,
|
|
"learning_rate": 2.6581216464903783e-05,
|
|
"loss": 0.447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21401917934417725,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3759.8,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 3.18921308576481,
|
|
"grad_norm": 0.6755187398673654,
|
|
"learning_rate": 2.649793732743585e-05,
|
|
"loss": 0.4587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2595488727092743,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4247.6,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 3.198054818744474,
|
|
"grad_norm": 0.6601036816629008,
|
|
"learning_rate": 2.641453203757253e-05,
|
|
"loss": 0.4575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24937765300273895,
|
|
"step": 1810,
|
|
"valid_targets_mean": 5081.9,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 3.206896551724138,
|
|
"grad_norm": 0.7345675030768746,
|
|
"learning_rate": 2.6331002214562612e-05,
|
|
"loss": 0.4535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856788635253906,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3258.6,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 3.215738284703802,
|
|
"grad_norm": 0.6443854997962501,
|
|
"learning_rate": 2.624734948007259e-05,
|
|
"loss": 0.4782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19514860212802887,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2690.8,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 3.224580017683466,
|
|
"grad_norm": 0.6505251815752711,
|
|
"learning_rate": 2.616357545815518e-05,
|
|
"loss": 0.4341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18154998123645782,
|
|
"step": 1825,
|
|
"valid_targets_mean": 2853.5,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.2334217506631298,
|
|
"grad_norm": 0.639089706648105,
|
|
"learning_rate": 2.6079681775217833e-05,
|
|
"loss": 0.4494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1521425098180771,
|
|
"step": 1830,
|
|
"valid_targets_mean": 2546.0,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 3.242263483642794,
|
|
"grad_norm": 0.8346126474494834,
|
|
"learning_rate": 2.5995670059991085e-05,
|
|
"loss": 0.4564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20812271535396576,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2109.9,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 3.251105216622458,
|
|
"grad_norm": 0.6974668474879332,
|
|
"learning_rate": 2.5911541943497e-05,
|
|
"loss": 0.4148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21064886450767517,
|
|
"step": 1840,
|
|
"valid_targets_mean": 3080.6,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 3.259946949602122,
|
|
"grad_norm": 0.7071438987421578,
|
|
"learning_rate": 2.582729905901747e-05,
|
|
"loss": 0.4271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3176460564136505,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3679.4,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 3.268788682581786,
|
|
"grad_norm": 0.8145462041803657,
|
|
"learning_rate": 2.574294304206254e-05,
|
|
"loss": 0.4524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27845755219459534,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2862.0,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 3.27763041556145,
|
|
"grad_norm": 0.7045266012648904,
|
|
"learning_rate": 2.56584755303386e-05,
|
|
"loss": 0.4439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2471604347229004,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3010.8,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 3.286472148541114,
|
|
"grad_norm": 0.7384311436163473,
|
|
"learning_rate": 2.5573898163716663e-05,
|
|
"loss": 0.4215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20069456100463867,
|
|
"step": 1860,
|
|
"valid_targets_mean": 2391.2,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 3.2953138815207783,
|
|
"grad_norm": 0.766502608504117,
|
|
"learning_rate": 2.5489212584200446e-05,
|
|
"loss": 0.448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2833523452281952,
|
|
"step": 1865,
|
|
"valid_targets_mean": 2948.2,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 3.3041556145004423,
|
|
"grad_norm": 0.9146044226587593,
|
|
"learning_rate": 2.5404420435894578e-05,
|
|
"loss": 0.449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265963613986969,
|
|
"step": 1870,
|
|
"valid_targets_mean": 2752.6,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 3.312997347480106,
|
|
"grad_norm": 0.557404768668353,
|
|
"learning_rate": 2.5319523364972606e-05,
|
|
"loss": 0.447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22600683569908142,
|
|
"step": 1875,
|
|
"valid_targets_mean": 5350.0,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 3.32183908045977,
|
|
"grad_norm": 0.7993682463700219,
|
|
"learning_rate": 2.5234523019645086e-05,
|
|
"loss": 0.4362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509821057319641,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2375.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 3.330680813439434,
|
|
"grad_norm": 0.58659727071677,
|
|
"learning_rate": 2.5149421050127556e-05,
|
|
"loss": 0.4341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19044440984725952,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4073.1,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 3.339522546419098,
|
|
"grad_norm": 0.7845792945000336,
|
|
"learning_rate": 2.5064219108608525e-05,
|
|
"loss": 0.4351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1808401346206665,
|
|
"step": 1890,
|
|
"valid_targets_mean": 1915.0,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 3.348364279398762,
|
|
"grad_norm": 0.7289729636417263,
|
|
"learning_rate": 2.497891884921735e-05,
|
|
"loss": 0.4492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19839441776275635,
|
|
"step": 1895,
|
|
"valid_targets_mean": 2404.8,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 3.357206012378426,
|
|
"grad_norm": 0.6820854233253595,
|
|
"learning_rate": 2.4893521927992182e-05,
|
|
"loss": 0.4703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27235740423202515,
|
|
"step": 1900,
|
|
"valid_targets_mean": 2999.2,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 3.3660477453580904,
|
|
"grad_norm": 0.5855412571643762,
|
|
"learning_rate": 2.4808030002847775e-05,
|
|
"loss": 0.4376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19236628711223602,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3963.6,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 3.3748894783377543,
|
|
"grad_norm": 0.7338094054626427,
|
|
"learning_rate": 2.472244473354332e-05,
|
|
"loss": 0.4769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22928746044635773,
|
|
"step": 1910,
|
|
"valid_targets_mean": 2708.8,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 3.3837312113174183,
|
|
"grad_norm": 0.7995982234481557,
|
|
"learning_rate": 2.4636767781650183e-05,
|
|
"loss": 0.4615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549368739128113,
|
|
"step": 1915,
|
|
"valid_targets_mean": 2276.2,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 3.3925729442970822,
|
|
"grad_norm": 0.6376992232354394,
|
|
"learning_rate": 2.4551000810519714e-05,
|
|
"loss": 0.4899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908145785331726,
|
|
"step": 1920,
|
|
"valid_targets_mean": 4513.8,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 3.401414677276746,
|
|
"grad_norm": 0.7776074545649672,
|
|
"learning_rate": 2.4465145485250888e-05,
|
|
"loss": 0.4519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23057112097740173,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3097.4,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 3.41025641025641,
|
|
"grad_norm": 0.7652952479222617,
|
|
"learning_rate": 2.437920347265802e-05,
|
|
"loss": 0.4214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17387378215789795,
|
|
"step": 1930,
|
|
"valid_targets_mean": 1660.1,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 3.419098143236074,
|
|
"grad_norm": 0.6605377290323325,
|
|
"learning_rate": 2.4293176441238396e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23506543040275574,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3702.8,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 3.4279398762157385,
|
|
"grad_norm": 0.5979410720887383,
|
|
"learning_rate": 2.420706606113986e-05,
|
|
"loss": 0.4189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637672424316406,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4603.4,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 3.4367816091954024,
|
|
"grad_norm": 0.5739576956888619,
|
|
"learning_rate": 2.4120874004128422e-05,
|
|
"loss": 0.4861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22790098190307617,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3478.2,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 3.4456233421750664,
|
|
"grad_norm": 0.6888223638237597,
|
|
"learning_rate": 2.4034601943555775e-05,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2896411418914795,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3563.5,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 3.4544650751547303,
|
|
"grad_norm": 0.6188357957447922,
|
|
"learning_rate": 2.3948251554326826e-05,
|
|
"loss": 0.4412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27495166659355164,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4352.5,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 3.4633068081343943,
|
|
"grad_norm": 0.6700510097036785,
|
|
"learning_rate": 2.3861824512867172e-05,
|
|
"loss": 0.421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3003349304199219,
|
|
"step": 1960,
|
|
"valid_targets_mean": 5187.4,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 3.4721485411140582,
|
|
"grad_norm": 0.6558839463348246,
|
|
"learning_rate": 2.377532249709054e-05,
|
|
"loss": 0.4557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35241687297821045,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4643.0,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 3.480990274093722,
|
|
"grad_norm": 0.6349474618857128,
|
|
"learning_rate": 2.368874718636625e-05,
|
|
"loss": 0.4472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1423875242471695,
|
|
"step": 1970,
|
|
"valid_targets_mean": 2537.1,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 3.489832007073386,
|
|
"grad_norm": 0.6813167822681871,
|
|
"learning_rate": 2.3602100261486557e-05,
|
|
"loss": 0.4754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27694791555404663,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4163.6,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 3.4986737400530505,
|
|
"grad_norm": 0.6514986467289062,
|
|
"learning_rate": 2.3515383404634075e-05,
|
|
"loss": 0.4378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1669061779975891,
|
|
"step": 1980,
|
|
"valid_targets_mean": 2230.2,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 3.5075154730327145,
|
|
"grad_norm": 0.7914161143585046,
|
|
"learning_rate": 2.3428598299349076e-05,
|
|
"loss": 0.4694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27049192786216736,
|
|
"step": 1985,
|
|
"valid_targets_mean": 1989.0,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 3.5163572060123784,
|
|
"grad_norm": 0.8656201914218805,
|
|
"learning_rate": 2.3341746630496828e-05,
|
|
"loss": 0.4454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25898832082748413,
|
|
"step": 1990,
|
|
"valid_targets_mean": 1998.6,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 3.5251989389920424,
|
|
"grad_norm": 0.7646102018521315,
|
|
"learning_rate": 2.3254830084234877e-05,
|
|
"loss": 0.4168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21087685227394104,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3901.9,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 3.5340406719717063,
|
|
"grad_norm": 0.7970932854441665,
|
|
"learning_rate": 2.316785034798032e-05,
|
|
"loss": 0.4506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.256100058555603,
|
|
"step": 2000,
|
|
"valid_targets_mean": 2907.2,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 3.5428824049513707,
|
|
"grad_norm": 0.7147517419674159,
|
|
"learning_rate": 2.308080911037703e-05,
|
|
"loss": 0.4358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644729018211365,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3793.5,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 3.5517241379310347,
|
|
"grad_norm": 0.6331675123587539,
|
|
"learning_rate": 2.2993708061262886e-05,
|
|
"loss": 0.458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1513529121875763,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2805.9,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 3.5605658709106986,
|
|
"grad_norm": 0.7549622939379798,
|
|
"learning_rate": 2.2906548891636956e-05,
|
|
"loss": 0.4567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24897724390029907,
|
|
"step": 2015,
|
|
"valid_targets_mean": 2534.6,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 3.5694076038903626,
|
|
"grad_norm": 0.9923863265222221,
|
|
"learning_rate": 2.2819333293626686e-05,
|
|
"loss": 0.4483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16775819659233093,
|
|
"step": 2020,
|
|
"valid_targets_mean": 1303.4,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 3.5782493368700266,
|
|
"grad_norm": 0.6137184109496221,
|
|
"learning_rate": 2.2732062960455014e-05,
|
|
"loss": 0.4373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15827752649784088,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3152.2,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 3.5870910698496905,
|
|
"grad_norm": 0.7316389088145041,
|
|
"learning_rate": 2.264473958640753e-05,
|
|
"loss": 0.412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19498181343078613,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2589.9,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 3.5959328028293545,
|
|
"grad_norm": 0.7269422076299851,
|
|
"learning_rate": 2.2557364866799572e-05,
|
|
"loss": 0.4285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3297356367111206,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3257.0,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 3.6047745358090184,
|
|
"grad_norm": 0.6557829256691635,
|
|
"learning_rate": 2.246994049794332e-05,
|
|
"loss": 0.4663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11620749533176422,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2108.5,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 3.6136162687886824,
|
|
"grad_norm": 0.6578106972416874,
|
|
"learning_rate": 2.2382468177114834e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1800394058227539,
|
|
"step": 2045,
|
|
"valid_targets_mean": 2848.1,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 3.6224580017683468,
|
|
"grad_norm": 0.6460106377859882,
|
|
"learning_rate": 2.2294949602521142e-05,
|
|
"loss": 0.4664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20682604610919952,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3096.0,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 3.6312997347480107,
|
|
"grad_norm": 0.7686693214243684,
|
|
"learning_rate": 2.2207386473267247e-05,
|
|
"loss": 0.4407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2174399495124817,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2397.0,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 3.6401414677276747,
|
|
"grad_norm": 0.7189899231337926,
|
|
"learning_rate": 2.2119780489323154e-05,
|
|
"loss": 0.4363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19445934891700745,
|
|
"step": 2060,
|
|
"valid_targets_mean": 2458.8,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 3.6489832007073386,
|
|
"grad_norm": 0.7020436393545836,
|
|
"learning_rate": 2.2032133351490835e-05,
|
|
"loss": 0.4571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30607905983924866,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3775.8,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 3.6578249336870026,
|
|
"grad_norm": 0.7444068107077655,
|
|
"learning_rate": 2.1944446761371264e-05,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22787776589393616,
|
|
"step": 2070,
|
|
"valid_targets_mean": 2930.4,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 3.6666666666666665,
|
|
"grad_norm": 0.6968317634420128,
|
|
"learning_rate": 2.1856722421331322e-05,
|
|
"loss": 0.444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15648722648620605,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2251.6,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 3.675508399646331,
|
|
"grad_norm": 0.6959983172874498,
|
|
"learning_rate": 2.1768962034470803e-05,
|
|
"loss": 0.4696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24266773462295532,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3423.4,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 3.684350132625995,
|
|
"grad_norm": 0.7788541548960379,
|
|
"learning_rate": 2.1681167304589295e-05,
|
|
"loss": 0.4074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1628911793231964,
|
|
"step": 2085,
|
|
"valid_targets_mean": 2365.0,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 3.693191865605659,
|
|
"grad_norm": 0.7039562542774168,
|
|
"learning_rate": 2.1593339936153148e-05,
|
|
"loss": 0.424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17437390983104706,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2311.9,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 3.7020335985853228,
|
|
"grad_norm": 0.5741843896967402,
|
|
"learning_rate": 2.1505481634262354e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22777321934700012,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4313.0,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 3.7108753315649867,
|
|
"grad_norm": 0.7672460830420991,
|
|
"learning_rate": 2.141759410461746e-05,
|
|
"loss": 0.4182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1350998878479004,
|
|
"step": 2100,
|
|
"valid_targets_mean": 1726.9,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 3.7197170645446507,
|
|
"grad_norm": 0.6710207783728802,
|
|
"learning_rate": 2.1329679053486444e-05,
|
|
"loss": 0.4478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29351454973220825,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3874.4,
|
|
"valid_targets_min": 1654
|
|
},
|
|
{
|
|
"epoch": 3.7285587975243146,
|
|
"grad_norm": 0.6423318027598373,
|
|
"learning_rate": 2.1241738187671595e-05,
|
|
"loss": 0.4299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19800537824630737,
|
|
"step": 2110,
|
|
"valid_targets_mean": 2980.0,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 3.7374005305039786,
|
|
"grad_norm": 0.6914935831642892,
|
|
"learning_rate": 2.115377321447637e-05,
|
|
"loss": 0.4992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1961551457643509,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3261.6,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 3.7462422634836425,
|
|
"grad_norm": 0.6625084903349797,
|
|
"learning_rate": 2.106578584167225e-05,
|
|
"loss": 0.4114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2027662992477417,
|
|
"step": 2120,
|
|
"valid_targets_mean": 2802.2,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 3.755083996463307,
|
|
"grad_norm": 0.644453283873008,
|
|
"learning_rate": 2.0977777777465594e-05,
|
|
"loss": 0.4229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17548662424087524,
|
|
"step": 2125,
|
|
"valid_targets_mean": 3109.0,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 3.763925729442971,
|
|
"grad_norm": 0.7667734390353328,
|
|
"learning_rate": 2.0889750730464474e-05,
|
|
"loss": 0.4612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23370791971683502,
|
|
"step": 2130,
|
|
"valid_targets_mean": 2261.5,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 3.772767462422635,
|
|
"grad_norm": 0.738277857442572,
|
|
"learning_rate": 2.0801706409645473e-05,
|
|
"loss": 0.4607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21809722483158112,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3180.4,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.781609195402299,
|
|
"grad_norm": 0.8463179804355774,
|
|
"learning_rate": 2.0713646524320557e-05,
|
|
"loss": 0.4408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22101402282714844,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2733.5,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 3.7904509283819627,
|
|
"grad_norm": 0.7086110797536909,
|
|
"learning_rate": 2.0625572784103855e-05,
|
|
"loss": 0.471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23744723200798035,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2938.8,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 3.799292661361627,
|
|
"grad_norm": 0.6251176123735659,
|
|
"learning_rate": 2.053748689887848e-05,
|
|
"loss": 0.4261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20360612869262695,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3229.5,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 3.808134394341291,
|
|
"grad_norm": 0.6958759696778948,
|
|
"learning_rate": 2.0449390578763336e-05,
|
|
"loss": 0.4283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2020493447780609,
|
|
"step": 2155,
|
|
"valid_targets_mean": 2590.4,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 3.816976127320955,
|
|
"grad_norm": 0.7006461931706444,
|
|
"learning_rate": 2.036128553407989e-05,
|
|
"loss": 0.4411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2114584743976593,
|
|
"step": 2160,
|
|
"valid_targets_mean": 2864.6,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 3.825817860300619,
|
|
"grad_norm": 0.6980121359538078,
|
|
"learning_rate": 2.027317347531902e-05,
|
|
"loss": 0.4583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2123686671257019,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3107.0,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 3.834659593280283,
|
|
"grad_norm": 0.7098528867513164,
|
|
"learning_rate": 2.0185056113107763e-05,
|
|
"loss": 0.495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23799559473991394,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3973.5,
|
|
"valid_targets_min": 1517
|
|
},
|
|
{
|
|
"epoch": 3.843501326259947,
|
|
"grad_norm": 0.9443823890149703,
|
|
"learning_rate": 2.0096935158176105e-05,
|
|
"loss": 0.434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21758808195590973,
|
|
"step": 2175,
|
|
"valid_targets_mean": 1717.0,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 3.852343059239611,
|
|
"grad_norm": 0.8056251979105057,
|
|
"learning_rate": 2.000881232132381e-05,
|
|
"loss": 0.4657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17145347595214844,
|
|
"step": 2180,
|
|
"valid_targets_mean": 1844.9,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 3.861184792219275,
|
|
"grad_norm": 0.631753151558534,
|
|
"learning_rate": 1.9920689313387148e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23968084156513214,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4621.9,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 3.8700265251989387,
|
|
"grad_norm": 0.5921935896404549,
|
|
"learning_rate": 1.9832567845205735e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23088482022285461,
|
|
"step": 2190,
|
|
"valid_targets_mean": 4285.4,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 3.878868258178603,
|
|
"grad_norm": 0.6575693180610859,
|
|
"learning_rate": 1.974444962758929e-05,
|
|
"loss": 0.4294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.220181405544281,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3333.4,
|
|
"valid_targets_min": 1980
|
|
},
|
|
{
|
|
"epoch": 3.887709991158267,
|
|
"grad_norm": 0.6941860616336191,
|
|
"learning_rate": 1.9656336371284406e-05,
|
|
"loss": 0.4552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22298529744148254,
|
|
"step": 2200,
|
|
"valid_targets_mean": 2727.5,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 3.896551724137931,
|
|
"grad_norm": 0.7225249260002937,
|
|
"learning_rate": 1.956822978694137e-05,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18556898832321167,
|
|
"step": 2205,
|
|
"valid_targets_mean": 2341.8,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 3.905393457117595,
|
|
"grad_norm": 0.7406697584636266,
|
|
"learning_rate": 1.9480131585080946e-05,
|
|
"loss": 0.4231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2133459746837616,
|
|
"step": 2210,
|
|
"valid_targets_mean": 2642.8,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 3.914235190097259,
|
|
"grad_norm": 0.7039889962915561,
|
|
"learning_rate": 1.939204347606115e-05,
|
|
"loss": 0.4493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2137300670146942,
|
|
"step": 2215,
|
|
"valid_targets_mean": 2678.2,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 3.9230769230769234,
|
|
"grad_norm": 0.634420740737628,
|
|
"learning_rate": 1.9303967170044043e-05,
|
|
"loss": 0.4358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17572371661663055,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3010.6,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 3.9319186560565873,
|
|
"grad_norm": 0.7155018581712755,
|
|
"learning_rate": 1.9215904376962555e-05,
|
|
"loss": 0.4263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21080003678798676,
|
|
"step": 2225,
|
|
"valid_targets_mean": 2322.0,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 3.9407603890362513,
|
|
"grad_norm": 0.758819158514058,
|
|
"learning_rate": 1.9127856806487266e-05,
|
|
"loss": 0.4388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16472750902175903,
|
|
"step": 2230,
|
|
"valid_targets_mean": 2019.2,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 3.949602122015915,
|
|
"grad_norm": 0.8777006692099467,
|
|
"learning_rate": 1.9039826167993235e-05,
|
|
"loss": 0.4483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17932608723640442,
|
|
"step": 2235,
|
|
"valid_targets_mean": 1764.1,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.958443854995579,
|
|
"grad_norm": 0.6089818772904589,
|
|
"learning_rate": 1.895181417052677e-05,
|
|
"loss": 0.4231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24629013240337372,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4223.6,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 3.967285587975243,
|
|
"grad_norm": 0.639631153985411,
|
|
"learning_rate": 1.88638225227723e-05,
|
|
"loss": 0.4245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21447336673736572,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3402.6,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 3.976127320954907,
|
|
"grad_norm": 0.8458138613793146,
|
|
"learning_rate": 1.877585293301918e-05,
|
|
"loss": 0.4163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22714388370513916,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2087.0,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 3.984969053934571,
|
|
"grad_norm": 0.8014506952688943,
|
|
"learning_rate": 1.868790710912853e-05,
|
|
"loss": 0.4301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27369236946105957,
|
|
"step": 2255,
|
|
"valid_targets_mean": 2659.0,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 3.993810786914235,
|
|
"grad_norm": 0.5931369878866818,
|
|
"learning_rate": 1.8599986758500047e-05,
|
|
"loss": 0.4667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2135334014892578,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3793.0,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 4.001768346595933,
|
|
"grad_norm": 0.6460139121405902,
|
|
"learning_rate": 1.8512093588038915e-05,
|
|
"loss": 0.4144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704264223575592,
|
|
"step": 2265,
|
|
"valid_targets_mean": 4983.9,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 4.010610079575597,
|
|
"grad_norm": 0.7103598023526009,
|
|
"learning_rate": 1.842422930412262e-05,
|
|
"loss": 0.4182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22416162490844727,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3300.9,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 4.0194518125552605,
|
|
"grad_norm": 0.6926185487221391,
|
|
"learning_rate": 1.8336395612567857e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19507721066474915,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3702.0,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 4.0282935455349245,
|
|
"grad_norm": 0.6966627277554577,
|
|
"learning_rate": 1.824859421859736e-05,
|
|
"loss": 0.4291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2344420850276947,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3541.6,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 4.037135278514588,
|
|
"grad_norm": 0.8185873096759235,
|
|
"learning_rate": 1.8160826826806856e-05,
|
|
"loss": 0.4333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16348719596862793,
|
|
"step": 2285,
|
|
"valid_targets_mean": 1622.6,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 4.045977011494253,
|
|
"grad_norm": 0.6298356213417173,
|
|
"learning_rate": 1.8073095141131946e-05,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15070627629756927,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4094.2,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 4.054818744473917,
|
|
"grad_norm": 0.7055842030089096,
|
|
"learning_rate": 1.7985400864815027e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1813596785068512,
|
|
"step": 2295,
|
|
"valid_targets_mean": 2406.0,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 4.063660477453581,
|
|
"grad_norm": 0.6605651518692166,
|
|
"learning_rate": 1.7897745700372194e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19027772545814514,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3733.2,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 4.072502210433245,
|
|
"grad_norm": 0.638116942250368,
|
|
"learning_rate": 1.781013134956025e-05,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1789359450340271,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4052.6,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 4.081343943412909,
|
|
"grad_norm": 0.6312271133669283,
|
|
"learning_rate": 1.7722559513343616e-05,
|
|
"loss": 0.4298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18907594680786133,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4083.4,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 4.090185676392573,
|
|
"grad_norm": 0.6831522353731495,
|
|
"learning_rate": 1.7635031891861334e-05,
|
|
"loss": 0.4048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2140839397907257,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3038.2,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 4.099027409372237,
|
|
"grad_norm": 0.7541794171611146,
|
|
"learning_rate": 1.7547550184394036e-05,
|
|
"loss": 0.3999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14385363459587097,
|
|
"step": 2320,
|
|
"valid_targets_mean": 1814.5,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 4.107869142351901,
|
|
"grad_norm": 0.850080748839246,
|
|
"learning_rate": 1.746011608933098e-05,
|
|
"loss": 0.4263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1753498613834381,
|
|
"step": 2325,
|
|
"valid_targets_mean": 2009.9,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 4.116710875331565,
|
|
"grad_norm": 0.6727035464038962,
|
|
"learning_rate": 1.7372731304137072e-05,
|
|
"loss": 0.4234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.156248539686203,
|
|
"step": 2330,
|
|
"valid_targets_mean": 2836.2,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 4.125552608311229,
|
|
"grad_norm": 0.7915258988052476,
|
|
"learning_rate": 1.7285397525319897e-05,
|
|
"loss": 0.4209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18911907076835632,
|
|
"step": 2335,
|
|
"valid_targets_mean": 2722.6,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 4.134394341290893,
|
|
"grad_norm": 0.7225681941913896,
|
|
"learning_rate": 1.719811644839679e-05,
|
|
"loss": 0.4446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2196042686700821,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2874.2,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 4.143236074270557,
|
|
"grad_norm": 0.6793110061906088,
|
|
"learning_rate": 1.711088976786192e-05,
|
|
"loss": 0.4049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22990606725215912,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3720.2,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 4.152077807250221,
|
|
"grad_norm": 0.6309626795534368,
|
|
"learning_rate": 1.7023719177153397e-05,
|
|
"loss": 0.4287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25914740562438965,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5181.9,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 4.160919540229885,
|
|
"grad_norm": 0.7534348626475472,
|
|
"learning_rate": 1.6936606368620393e-05,
|
|
"loss": 0.426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30650636553764343,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3284.2,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 4.1697612732095495,
|
|
"grad_norm": 0.7973625055634738,
|
|
"learning_rate": 1.684955303349027e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11398676037788391,
|
|
"step": 2360,
|
|
"valid_targets_mean": 1583.0,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 4.178603006189213,
|
|
"grad_norm": 0.6773926752150286,
|
|
"learning_rate": 1.6762560861835778e-05,
|
|
"loss": 0.3691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13966532051563263,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2208.0,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 4.187444739168877,
|
|
"grad_norm": 0.7600418307606107,
|
|
"learning_rate": 1.6675631542542206e-05,
|
|
"loss": 0.4199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1887199878692627,
|
|
"step": 2370,
|
|
"valid_targets_mean": 2575.6,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 4.196286472148541,
|
|
"grad_norm": 0.825716048698755,
|
|
"learning_rate": 1.658876676327464e-05,
|
|
"loss": 0.4325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17904534935951233,
|
|
"step": 2375,
|
|
"valid_targets_mean": 1817.2,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 4.205128205128205,
|
|
"grad_norm": 0.7357744240798385,
|
|
"learning_rate": 1.6501968210445144e-05,
|
|
"loss": 0.433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28740018606185913,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3760.6,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 4.213969938107869,
|
|
"grad_norm": 0.6102035784766663,
|
|
"learning_rate": 1.641523756918006e-05,
|
|
"loss": 0.4051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19327875971794128,
|
|
"step": 2385,
|
|
"valid_targets_mean": 4250.9,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 4.222811671087533,
|
|
"grad_norm": 0.6953359184280162,
|
|
"learning_rate": 1.6328576523287284e-05,
|
|
"loss": 0.4268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.310172438621521,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4208.6,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 4.231653404067197,
|
|
"grad_norm": 0.7297311607993835,
|
|
"learning_rate": 1.6241986755223574e-05,
|
|
"loss": 0.4281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32732078433036804,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3946.0,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 4.240495137046861,
|
|
"grad_norm": 0.7130243864792679,
|
|
"learning_rate": 1.6155469946061863e-05,
|
|
"loss": 0.3957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1902158409357071,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3486.8,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 4.249336870026525,
|
|
"grad_norm": 0.7379540908447911,
|
|
"learning_rate": 1.6069027775458665e-05,
|
|
"loss": 0.4381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1608520746231079,
|
|
"step": 2405,
|
|
"valid_targets_mean": 2679.8,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 4.258178603006189,
|
|
"grad_norm": 0.8452056516818466,
|
|
"learning_rate": 1.5982661921621437e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23513582348823547,
|
|
"step": 2410,
|
|
"valid_targets_mean": 2361.6,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 4.267020335985853,
|
|
"grad_norm": 0.730287777008674,
|
|
"learning_rate": 1.5896374061276015e-05,
|
|
"loss": 0.4286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784014344215393,
|
|
"step": 2415,
|
|
"valid_targets_mean": 5109.2,
|
|
"valid_targets_min": 1990
|
|
},
|
|
{
|
|
"epoch": 4.275862068965517,
|
|
"grad_norm": 0.7103251487687208,
|
|
"learning_rate": 1.5810165869634027e-05,
|
|
"loss": 0.4031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2809576392173767,
|
|
"step": 2420,
|
|
"valid_targets_mean": 5307.9,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 4.284703801945181,
|
|
"grad_norm": 0.6866454436464177,
|
|
"learning_rate": 1.572403902036042e-05,
|
|
"loss": 0.4139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1964649111032486,
|
|
"step": 2425,
|
|
"valid_targets_mean": 2979.6,
|
|
"valid_targets_min": 1814
|
|
},
|
|
{
|
|
"epoch": 4.293545534924846,
|
|
"grad_norm": 0.7150372488857892,
|
|
"learning_rate": 1.5637995185540932e-05,
|
|
"loss": 0.3881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1866159737110138,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3078.5,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 4.30238726790451,
|
|
"grad_norm": 0.8579495181131218,
|
|
"learning_rate": 1.5552036035649656e-05,
|
|
"loss": 0.4188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.200179323554039,
|
|
"step": 2435,
|
|
"valid_targets_mean": 2049.5,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 4.311229000884174,
|
|
"grad_norm": 0.7013758701584984,
|
|
"learning_rate": 1.5466163239516553e-05,
|
|
"loss": 0.3927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30045637488365173,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3139.2,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 4.3200707338638376,
|
|
"grad_norm": 0.7497089575459133,
|
|
"learning_rate": 1.5380378464295133e-05,
|
|
"loss": 0.3999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16102385520935059,
|
|
"step": 2445,
|
|
"valid_targets_mean": 2777.8,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 4.3289124668435015,
|
|
"grad_norm": 0.7738772317856076,
|
|
"learning_rate": 1.5294683375430035e-05,
|
|
"loss": 0.45,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24177439510822296,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3024.8,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 4.3377541998231655,
|
|
"grad_norm": 0.8087896123657419,
|
|
"learning_rate": 1.520907963662471e-05,
|
|
"loss": 0.398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17100030183792114,
|
|
"step": 2455,
|
|
"valid_targets_mean": 1706.0,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 4.346595932802829,
|
|
"grad_norm": 0.6521224471842675,
|
|
"learning_rate": 1.5123568909809103e-05,
|
|
"loss": 0.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21314716339111328,
|
|
"step": 2460,
|
|
"valid_targets_mean": 4596.2,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 4.355437665782493,
|
|
"grad_norm": 0.6730460262609402,
|
|
"learning_rate": 1.5038152855107422e-05,
|
|
"loss": 0.4331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29492098093032837,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4156.1,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 4.364279398762157,
|
|
"grad_norm": 0.8227541669467124,
|
|
"learning_rate": 1.4952833130805884e-05,
|
|
"loss": 0.4228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20231404900550842,
|
|
"step": 2470,
|
|
"valid_targets_mean": 2325.4,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 4.373121131741821,
|
|
"grad_norm": 0.6978839330328747,
|
|
"learning_rate": 1.4867611393320533e-05,
|
|
"loss": 0.4432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23436486721038818,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3332.9,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 4.381962864721485,
|
|
"grad_norm": 0.7946215788236765,
|
|
"learning_rate": 1.478248929716505e-05,
|
|
"loss": 0.3911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1627664864063263,
|
|
"step": 2480,
|
|
"valid_targets_mean": 1966.0,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 4.390804597701149,
|
|
"grad_norm": 0.7156697773599238,
|
|
"learning_rate": 1.4697468494918687e-05,
|
|
"loss": 0.4436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.186476469039917,
|
|
"step": 2485,
|
|
"valid_targets_mean": 2992.4,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 4.399646330680813,
|
|
"grad_norm": 0.8727002600998265,
|
|
"learning_rate": 1.4612550637194141e-05,
|
|
"loss": 0.4475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19441717863082886,
|
|
"step": 2490,
|
|
"valid_targets_mean": 2303.6,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 4.408488063660477,
|
|
"grad_norm": 0.7507265043943055,
|
|
"learning_rate": 1.4527737372605529e-05,
|
|
"loss": 0.4507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23913750052452087,
|
|
"step": 2495,
|
|
"valid_targets_mean": 3582.6,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 4.417329796640141,
|
|
"grad_norm": 0.7610940521640887,
|
|
"learning_rate": 1.4443030347736347e-05,
|
|
"loss": 0.4381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1793220490217209,
|
|
"step": 2500,
|
|
"valid_targets_mean": 2593.2,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 4.426171529619806,
|
|
"grad_norm": 0.6342963620181833,
|
|
"learning_rate": 1.435843120710756e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20556139945983887,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4132.1,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 4.43501326259947,
|
|
"grad_norm": 0.7530536406998339,
|
|
"learning_rate": 1.4273941593145636e-05,
|
|
"loss": 0.4483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38451525568962097,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4499.5,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 4.443854995579134,
|
|
"grad_norm": 0.8273793851318227,
|
|
"learning_rate": 1.4189563146150669e-05,
|
|
"loss": 0.4212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20944646000862122,
|
|
"step": 2515,
|
|
"valid_targets_mean": 2535.8,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 4.452696728558798,
|
|
"grad_norm": 0.7909970336145224,
|
|
"learning_rate": 1.4105297504264516e-05,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3092140555381775,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3188.4,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 4.461538461538462,
|
|
"grad_norm": 0.9428226334726033,
|
|
"learning_rate": 1.4021146303439031e-05,
|
|
"loss": 0.4029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530262768268585,
|
|
"step": 2525,
|
|
"valid_targets_mean": 2076.4,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 4.470380194518126,
|
|
"grad_norm": 0.7317845516311463,
|
|
"learning_rate": 1.3937111177404279e-05,
|
|
"loss": 0.4369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24385397136211395,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3438.1,
|
|
"valid_targets_min": 1464
|
|
},
|
|
{
|
|
"epoch": 4.47922192749779,
|
|
"grad_norm": 0.7770528449780305,
|
|
"learning_rate": 1.385319375763682e-05,
|
|
"loss": 0.3756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17684635519981384,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2598.0,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 4.4880636604774535,
|
|
"grad_norm": 0.7420657248975656,
|
|
"learning_rate": 1.376939567332804e-05,
|
|
"loss": 0.4351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18782931566238403,
|
|
"step": 2540,
|
|
"valid_targets_mean": 2944.4,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 4.4969053934571175,
|
|
"grad_norm": 1.0340838110233663,
|
|
"learning_rate": 1.3685718551352518e-05,
|
|
"loss": 0.4428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.160398930311203,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3630.4,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 4.505747126436781,
|
|
"grad_norm": 0.6091047868776585,
|
|
"learning_rate": 1.3602164016236444e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1208481639623642,
|
|
"step": 2550,
|
|
"valid_targets_mean": 2871.2,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 4.514588859416445,
|
|
"grad_norm": 0.7276768828189113,
|
|
"learning_rate": 1.3518733690126084e-05,
|
|
"loss": 0.4236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21554812788963318,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3198.2,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 4.523430592396109,
|
|
"grad_norm": 0.8138197550507267,
|
|
"learning_rate": 1.3435429192756275e-05,
|
|
"loss": 0.388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15686780214309692,
|
|
"step": 2560,
|
|
"valid_targets_mean": 1802.8,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 4.532272325375773,
|
|
"grad_norm": 0.7698296993001761,
|
|
"learning_rate": 1.3352252141418998e-05,
|
|
"loss": 0.4202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21974670886993408,
|
|
"step": 2565,
|
|
"valid_targets_mean": 2627.8,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 4.541114058355438,
|
|
"grad_norm": 0.6991515960307244,
|
|
"learning_rate": 1.3269204150931953e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1762574017047882,
|
|
"step": 2570,
|
|
"valid_targets_mean": 2943.6,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 4.549955791335101,
|
|
"grad_norm": 0.5961797549163438,
|
|
"learning_rate": 1.3186286833607244e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24313318729400635,
|
|
"step": 2575,
|
|
"valid_targets_mean": 4766.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 4.558797524314766,
|
|
"grad_norm": 0.7490840636312823,
|
|
"learning_rate": 1.3103501799220033e-05,
|
|
"loss": 0.4085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23084694147109985,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4158.1,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 4.56763925729443,
|
|
"grad_norm": 0.7065075883727937,
|
|
"learning_rate": 1.3020850654977333e-05,
|
|
"loss": 0.4209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1791738122701645,
|
|
"step": 2585,
|
|
"valid_targets_mean": 2766.2,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 4.576480990274094,
|
|
"grad_norm": 0.6016645740317225,
|
|
"learning_rate": 1.293833500548678e-05,
|
|
"loss": 0.4446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17156481742858887,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3903.5,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 4.585322723253758,
|
|
"grad_norm": 0.6114926043432934,
|
|
"learning_rate": 1.2855956452725471e-05,
|
|
"loss": 0.3752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23596268892288208,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3700.5,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 4.594164456233422,
|
|
"grad_norm": 0.7917827130664251,
|
|
"learning_rate": 1.2773716596008889e-05,
|
|
"loss": 0.3987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21856589615345,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3063.4,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 4.603006189213086,
|
|
"grad_norm": 0.648273474654547,
|
|
"learning_rate": 1.2691617031959843e-05,
|
|
"loss": 0.4105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17004728317260742,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3463.0,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 4.61184792219275,
|
|
"grad_norm": 0.8189757224237537,
|
|
"learning_rate": 1.2609659354477458e-05,
|
|
"loss": 0.4349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3089355528354645,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3244.4,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 4.620689655172414,
|
|
"grad_norm": 0.7858403164318954,
|
|
"learning_rate": 1.2527845154706264e-05,
|
|
"loss": 0.3781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20955795049667358,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3477.9,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 4.629531388152078,
|
|
"grad_norm": 0.6689115673937959,
|
|
"learning_rate": 1.2446176021005245e-05,
|
|
"loss": 0.4202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19080306589603424,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3002.6,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 4.638373121131742,
|
|
"grad_norm": 0.6541092626716796,
|
|
"learning_rate": 1.2364653538917076e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25211408734321594,
|
|
"step": 2625,
|
|
"valid_targets_mean": 5283.1,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 4.647214854111406,
|
|
"grad_norm": 0.6591383126713584,
|
|
"learning_rate": 1.2283279291137291e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23830297589302063,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4273.6,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 4.6560565870910695,
|
|
"grad_norm": 0.6424277501690692,
|
|
"learning_rate": 1.2202054857483576e-05,
|
|
"loss": 0.4014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27352088689804077,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4946.4,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 4.6648983200707335,
|
|
"grad_norm": 0.7211760280647633,
|
|
"learning_rate": 1.2120981814865073e-05,
|
|
"loss": 0.4114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23283889889717102,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3097.9,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 4.673740053050398,
|
|
"grad_norm": 0.7571275814501419,
|
|
"learning_rate": 1.2040061737251807e-05,
|
|
"loss": 0.4629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3724607229232788,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3724.1,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 4.682581786030062,
|
|
"grad_norm": 0.6466326044624365,
|
|
"learning_rate": 1.19592961956441e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22775639593601227,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4530.5,
|
|
"valid_targets_min": 1760
|
|
},
|
|
{
|
|
"epoch": 4.691423519009726,
|
|
"grad_norm": 0.731960782868472,
|
|
"learning_rate": 1.1878686758042086e-05,
|
|
"loss": 0.4552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20677362382411957,
|
|
"step": 2655,
|
|
"valid_targets_mean": 2433.1,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 4.70026525198939,
|
|
"grad_norm": 0.6658248910649158,
|
|
"learning_rate": 1.179823498941523e-05,
|
|
"loss": 0.4191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20261318981647491,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3311.1,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 4.709106984969054,
|
|
"grad_norm": 0.7076657293158575,
|
|
"learning_rate": 1.1717942451672008e-05,
|
|
"loss": 0.4428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2810763120651245,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4327.8,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 4.717948717948718,
|
|
"grad_norm": 0.7884816045404514,
|
|
"learning_rate": 1.1637810703629549e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21002733707427979,
|
|
"step": 2670,
|
|
"valid_targets_mean": 2842.2,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 4.726790450928382,
|
|
"grad_norm": 0.7169004560389791,
|
|
"learning_rate": 1.1557841300983363e-05,
|
|
"loss": 0.4537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17877726256847382,
|
|
"step": 2675,
|
|
"valid_targets_mean": 2562.5,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 4.735632183908046,
|
|
"grad_norm": 0.5669741728591965,
|
|
"learning_rate": 1.1478035796277158e-05,
|
|
"loss": 0.4426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10677316039800644,
|
|
"step": 2680,
|
|
"valid_targets_mean": 2404.2,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 4.74447391688771,
|
|
"grad_norm": 0.7657675790094381,
|
|
"learning_rate": 1.1398395738872683e-05,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1494864672422409,
|
|
"step": 2685,
|
|
"valid_targets_mean": 2797.8,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 4.753315649867374,
|
|
"grad_norm": 0.6799388910970321,
|
|
"learning_rate": 1.1318922674919675e-05,
|
|
"loss": 0.4646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2254074662923813,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3502.0,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 4.762157382847038,
|
|
"grad_norm": 0.7628855637000989,
|
|
"learning_rate": 1.1239618147325802e-05,
|
|
"loss": 0.3928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2163882851600647,
|
|
"step": 2695,
|
|
"valid_targets_mean": 2970.4,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 4.770999115826702,
|
|
"grad_norm": 0.7194697076051723,
|
|
"learning_rate": 1.116048369572674e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21982774138450623,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3383.4,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 4.779840848806366,
|
|
"grad_norm": 0.6669494326491152,
|
|
"learning_rate": 1.1081520856456267e-05,
|
|
"loss": 0.409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23762443661689758,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3892.1,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 4.78868258178603,
|
|
"grad_norm": 0.7108382848669145,
|
|
"learning_rate": 1.1002731162516453e-05,
|
|
"loss": 0.4359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1936943084001541,
|
|
"step": 2710,
|
|
"valid_targets_mean": 2535.1,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 4.797524314765694,
|
|
"grad_norm": 0.6713475501297167,
|
|
"learning_rate": 1.0924116143547878e-05,
|
|
"loss": 0.4171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21732935309410095,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3802.6,
|
|
"valid_targets_min": 1567
|
|
},
|
|
{
|
|
"epoch": 4.8063660477453585,
|
|
"grad_norm": 0.7413050176699072,
|
|
"learning_rate": 1.0845677325799935e-05,
|
|
"loss": 0.4312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25657641887664795,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3387.0,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 4.815207780725022,
|
|
"grad_norm": 0.7577009577762279,
|
|
"learning_rate": 1.0767416232101215e-05,
|
|
"loss": 0.4391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28581398725509644,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3098.0,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 4.824049513704686,
|
|
"grad_norm": 0.7724716975951001,
|
|
"learning_rate": 1.0689334381829948e-05,
|
|
"loss": 0.4088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20168830454349518,
|
|
"step": 2730,
|
|
"valid_targets_mean": 2979.0,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 4.83289124668435,
|
|
"grad_norm": 0.7089679302709749,
|
|
"learning_rate": 1.0611433290884472e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11011864989995956,
|
|
"step": 2735,
|
|
"valid_targets_mean": 2865.8,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 4.841732979664014,
|
|
"grad_norm": 0.8144495361272787,
|
|
"learning_rate": 1.0533714471653832e-05,
|
|
"loss": 0.4312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19665873050689697,
|
|
"step": 2740,
|
|
"valid_targets_mean": 2790.5,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 4.850574712643678,
|
|
"grad_norm": 0.77684643133323,
|
|
"learning_rate": 1.04561794329884e-05,
|
|
"loss": 0.4497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21007674932479858,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2555.6,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 4.859416445623342,
|
|
"grad_norm": 0.742546614835338,
|
|
"learning_rate": 1.0378829680170622e-05,
|
|
"loss": 0.4322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3378821611404419,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4008.6,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 4.868258178603006,
|
|
"grad_norm": 0.7479115324842988,
|
|
"learning_rate": 1.0301666714885725e-05,
|
|
"loss": 0.4325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2188922017812729,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3129.8,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 4.87709991158267,
|
|
"grad_norm": 0.7454709519506926,
|
|
"learning_rate": 1.0224692035192625e-05,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28481394052505493,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3897.9,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 4.885941644562334,
|
|
"grad_norm": 0.769621578421721,
|
|
"learning_rate": 1.0147907135494803e-05,
|
|
"loss": 0.4236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15574485063552856,
|
|
"step": 2765,
|
|
"valid_targets_mean": 2061.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 4.894783377541998,
|
|
"grad_norm": 0.7396299843802391,
|
|
"learning_rate": 1.0071313506511334e-05,
|
|
"loss": 0.3959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16915792226791382,
|
|
"step": 2770,
|
|
"valid_targets_mean": 2474.2,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 4.903625110521662,
|
|
"grad_norm": 0.6989394194624301,
|
|
"learning_rate": 9.9949126352479e-06,
|
|
"loss": 0.3991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22265878319740295,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3984.8,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 4.912466843501326,
|
|
"grad_norm": 0.6502932314793353,
|
|
"learning_rate": 9.918706004967948e-06,
|
|
"loss": 0.4288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32565969228744507,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4374.5,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 4.921308576480991,
|
|
"grad_norm": 0.7229307437239456,
|
|
"learning_rate": 9.842695095163878e-06,
|
|
"loss": 0.423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24379673600196838,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3383.4,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 4.930150309460654,
|
|
"grad_norm": 0.7940649246854591,
|
|
"learning_rate": 9.766881381528357e-06,
|
|
"loss": 0.4199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25852537155151367,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3000.1,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 4.938992042440319,
|
|
"grad_norm": 0.6722076902519407,
|
|
"learning_rate": 9.691266335925615e-06,
|
|
"loss": 0.4507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1473044753074646,
|
|
"step": 2795,
|
|
"valid_targets_mean": 2647.0,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 4.947833775419983,
|
|
"grad_norm": 0.819920472445427,
|
|
"learning_rate": 9.615851426362904e-06,
|
|
"loss": 0.4385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1795499622821808,
|
|
"step": 2800,
|
|
"valid_targets_mean": 2951.1,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 4.9566755083996465,
|
|
"grad_norm": 0.7025906740887181,
|
|
"learning_rate": 9.540638116961986e-06,
|
|
"loss": 0.4195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25529012084007263,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3555.2,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 4.9655172413793105,
|
|
"grad_norm": 0.7370327390638789,
|
|
"learning_rate": 9.465627867930734e-06,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20865119993686676,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2461.0,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 4.9743589743589745,
|
|
"grad_norm": 0.6324581481449822,
|
|
"learning_rate": 9.390822135534738e-06,
|
|
"loss": 0.4037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20276159048080444,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3668.0,
|
|
"valid_targets_min": 1824
|
|
},
|
|
{
|
|
"epoch": 4.983200707338638,
|
|
"grad_norm": 0.7834993482168726,
|
|
"learning_rate": 9.31622237206907e-06,
|
|
"loss": 0.4356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17606160044670105,
|
|
"step": 2820,
|
|
"valid_targets_mean": 2788.4,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 4.992042440318302,
|
|
"grad_norm": 0.6837420383955042,
|
|
"learning_rate": 9.241830025830059e-06,
|
|
"loss": 0.4128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18292655050754547,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3192.5,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 1.1516433834283342,
|
|
"learning_rate": 9.167646541087216e-06,
|
|
"loss": 0.4185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4343792200088501,
|
|
"step": 2830,
|
|
"valid_targets_mean": 2228.5,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 5.008841732979664,
|
|
"grad_norm": 0.7806271076048815,
|
|
"learning_rate": 9.09367335805515e-06,
|
|
"loss": 0.418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21847274899482727,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3220.6,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 5.017683465959328,
|
|
"grad_norm": 0.6706581564830443,
|
|
"learning_rate": 9.019911912865634e-06,
|
|
"loss": 0.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25733518600463867,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3993.9,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 5.026525198938992,
|
|
"grad_norm": 0.728929240628752,
|
|
"learning_rate": 8.946363637539699e-06,
|
|
"loss": 0.3912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16333594918251038,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2894.0,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 5.035366931918656,
|
|
"grad_norm": 0.728458524357944,
|
|
"learning_rate": 8.87302995995988e-06,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20494450628757477,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3790.2,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 5.04420866489832,
|
|
"grad_norm": 0.7084662868772482,
|
|
"learning_rate": 8.799912303842437e-06,
|
|
"loss": 0.4087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20348328351974487,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3220.6,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 5.053050397877984,
|
|
"grad_norm": 0.7088150993211034,
|
|
"learning_rate": 8.727012088709751e-06,
|
|
"loss": 0.3722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22844524681568146,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4072.2,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 5.061892130857648,
|
|
"grad_norm": 0.8455946822457715,
|
|
"learning_rate": 8.654330729862752e-06,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19593322277069092,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2430.2,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 5.0707338638373125,
|
|
"grad_norm": 0.5858642767010211,
|
|
"learning_rate": 8.58186963835346e-06,
|
|
"loss": 0.3783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21078699827194214,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4977.0,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 5.0795755968169765,
|
|
"grad_norm": 0.7994534314183666,
|
|
"learning_rate": 8.509630220957554e-06,
|
|
"loss": 0.4093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19775576889514923,
|
|
"step": 2875,
|
|
"valid_targets_mean": 2545.8,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 5.08841732979664,
|
|
"grad_norm": 0.8314569923246584,
|
|
"learning_rate": 8.437613880147093e-06,
|
|
"loss": 0.3913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1837921142578125,
|
|
"step": 2880,
|
|
"valid_targets_mean": 2678.4,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 5.097259062776304,
|
|
"grad_norm": 0.7860099404554846,
|
|
"learning_rate": 8.365822014063287e-06,
|
|
"loss": 0.3658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17934468388557434,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3174.5,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 5.106100795755968,
|
|
"grad_norm": 0.7604145315647566,
|
|
"learning_rate": 8.29425601648933e-06,
|
|
"loss": 0.3952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21388058364391327,
|
|
"step": 2890,
|
|
"valid_targets_mean": 2896.4,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 5.114942528735632,
|
|
"grad_norm": 0.8107815627893672,
|
|
"learning_rate": 8.222917276823361e-06,
|
|
"loss": 0.3976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22443638741970062,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3731.9,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 5.123784261715296,
|
|
"grad_norm": 0.7079702745049611,
|
|
"learning_rate": 8.151807180051474e-06,
|
|
"loss": 0.4313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25032246112823486,
|
|
"step": 2900,
|
|
"valid_targets_mean": 3908.5,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 5.13262599469496,
|
|
"grad_norm": 0.7899459874887411,
|
|
"learning_rate": 8.080927106720862e-06,
|
|
"loss": 0.4158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1818743646144867,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2431.4,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 5.141467727674624,
|
|
"grad_norm": 0.8962540094006461,
|
|
"learning_rate": 8.010278432912971e-06,
|
|
"loss": 0.4258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31215745210647583,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3037.2,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 5.150309460654288,
|
|
"grad_norm": 0.7521497447867146,
|
|
"learning_rate": 7.939862530216808e-06,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18189725279808044,
|
|
"step": 2915,
|
|
"valid_targets_mean": 2674.8,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 5.159151193633952,
|
|
"grad_norm": 0.7250094122688368,
|
|
"learning_rate": 7.869680765702313e-06,
|
|
"loss": 0.4319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1579514592885971,
|
|
"step": 2920,
|
|
"valid_targets_mean": 2929.2,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 5.167992926613616,
|
|
"grad_norm": 0.7675162922726106,
|
|
"learning_rate": 7.799734501893825e-06,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1362682580947876,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2386.4,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 5.17683465959328,
|
|
"grad_norm": 0.6709418238351449,
|
|
"learning_rate": 7.730025096743601e-06,
|
|
"loss": 0.4124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16993936896324158,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3081.4,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 5.185676392572944,
|
|
"grad_norm": 0.6432114501479861,
|
|
"learning_rate": 7.6605539036055e-06,
|
|
"loss": 0.3949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1883753538131714,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4789.6,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 5.194518125552609,
|
|
"grad_norm": 0.7001052560650384,
|
|
"learning_rate": 7.591322271208639e-06,
|
|
"loss": 0.3833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1550978720188141,
|
|
"step": 2940,
|
|
"valid_targets_mean": 2667.6,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 5.203359858532273,
|
|
"grad_norm": 1.0203017538563217,
|
|
"learning_rate": 7.522331543631289e-06,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27559781074523926,
|
|
"step": 2945,
|
|
"valid_targets_mean": 2583.2,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 5.212201591511937,
|
|
"grad_norm": 0.63220733334086,
|
|
"learning_rate": 7.453583060274714e-06,
|
|
"loss": 0.3912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1653447449207306,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4349.5,
|
|
"valid_targets_min": 2007
|
|
},
|
|
{
|
|
"epoch": 5.221043324491601,
|
|
"grad_norm": 0.7658723022429813,
|
|
"learning_rate": 7.385078155837226e-06,
|
|
"loss": 0.4172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2508220672607422,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3356.6,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 5.2298850574712645,
|
|
"grad_norm": 0.6379179057891206,
|
|
"learning_rate": 7.316818160288195e-06,
|
|
"loss": 0.3916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654326558113098,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4930.0,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 5.2387267904509285,
|
|
"grad_norm": 0.8198169061308748,
|
|
"learning_rate": 7.248804398842322e-06,
|
|
"loss": 0.4376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17658288776874542,
|
|
"step": 2965,
|
|
"valid_targets_mean": 2807.2,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 5.247568523430592,
|
|
"grad_norm": 0.7161368425685732,
|
|
"learning_rate": 7.181038191933833e-06,
|
|
"loss": 0.3725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23815590143203735,
|
|
"step": 2970,
|
|
"valid_targets_mean": 3746.0,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 5.256410256410256,
|
|
"grad_norm": 0.7383520605352984,
|
|
"learning_rate": 7.113520855190907e-06,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17981159687042236,
|
|
"step": 2975,
|
|
"valid_targets_mean": 2893.6,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 5.26525198938992,
|
|
"grad_norm": 0.7617118383380348,
|
|
"learning_rate": 7.046253699410058e-06,
|
|
"loss": 0.3901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24191364645957947,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3604.0,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 5.274093722369584,
|
|
"grad_norm": 0.761360266153581,
|
|
"learning_rate": 6.979238030530775e-06,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29760921001434326,
|
|
"step": 2985,
|
|
"valid_targets_mean": 5094.0,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 5.282935455349248,
|
|
"grad_norm": 0.6957464522130561,
|
|
"learning_rate": 6.912475149610094e-06,
|
|
"loss": 0.375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14544111490249634,
|
|
"step": 2990,
|
|
"valid_targets_mean": 2849.9,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 5.291777188328912,
|
|
"grad_norm": 0.8834015419681706,
|
|
"learning_rate": 6.8459663527973995e-06,
|
|
"loss": 0.4253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2348986715078354,
|
|
"step": 2995,
|
|
"valid_targets_mean": 2487.2,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 5.300618921308576,
|
|
"grad_norm": 0.7175307826338068,
|
|
"learning_rate": 6.779712931309186e-06,
|
|
"loss": 0.4069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13426730036735535,
|
|
"step": 3000,
|
|
"valid_targets_mean": 2886.1,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 5.30946065428824,
|
|
"grad_norm": 0.6263326470511927,
|
|
"learning_rate": 6.713716171404086e-06,
|
|
"loss": 0.4349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2130982130765915,
|
|
"step": 3005,
|
|
"valid_targets_mean": 4677.0,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 5.318302387267904,
|
|
"grad_norm": 0.7281926036143173,
|
|
"learning_rate": 6.647977354357804e-06,
|
|
"loss": 0.4308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22627121210098267,
|
|
"step": 3010,
|
|
"valid_targets_mean": 2928.8,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 5.327144120247569,
|
|
"grad_norm": 0.6832070210761304,
|
|
"learning_rate": 6.582497756438326e-06,
|
|
"loss": 0.386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27786123752593994,
|
|
"step": 3015,
|
|
"valid_targets_mean": 4205.2,
|
|
"valid_targets_min": 1913
|
|
},
|
|
{
|
|
"epoch": 5.335985853227233,
|
|
"grad_norm": 0.7067931683994918,
|
|
"learning_rate": 6.517278648881054e-06,
|
|
"loss": 0.3716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2012796401977539,
|
|
"step": 3020,
|
|
"valid_targets_mean": 3926.8,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 5.344827586206897,
|
|
"grad_norm": 1.0101144556395403,
|
|
"learning_rate": 6.452321297864212e-06,
|
|
"loss": 0.4082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1759074628353119,
|
|
"step": 3025,
|
|
"valid_targets_mean": 1739.6,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 5.353669319186561,
|
|
"grad_norm": 0.6504695714072357,
|
|
"learning_rate": 6.387626964484197e-06,
|
|
"loss": 0.3963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1815790832042694,
|
|
"step": 3030,
|
|
"valid_targets_mean": 3034.5,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 5.362511052166225,
|
|
"grad_norm": 0.829852685254614,
|
|
"learning_rate": 6.323196904731155e-06,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1513374149799347,
|
|
"step": 3035,
|
|
"valid_targets_mean": 2116.5,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 5.371352785145889,
|
|
"grad_norm": 0.7446352358190379,
|
|
"learning_rate": 6.259032369464522e-06,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27236083149909973,
|
|
"step": 3040,
|
|
"valid_targets_mean": 3079.9,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 5.380194518125553,
|
|
"grad_norm": 0.8228515281616915,
|
|
"learning_rate": 6.195134604388819e-06,
|
|
"loss": 0.4388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3066047430038452,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3165.6,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 5.389036251105217,
|
|
"grad_norm": 0.7798826897148097,
|
|
"learning_rate": 6.1315048500294125e-06,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19620686769485474,
|
|
"step": 3050,
|
|
"valid_targets_mean": 3101.9,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 5.3978779840848805,
|
|
"grad_norm": 0.930463249760995,
|
|
"learning_rate": 6.068144341708464e-06,
|
|
"loss": 0.4117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18991626799106598,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2486.2,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 5.4067197170645445,
|
|
"grad_norm": 0.7704365886141176,
|
|
"learning_rate": 6.0050543095209215e-06,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1924263834953308,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3358.0,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 5.415561450044208,
|
|
"grad_norm": 0.8532428692792394,
|
|
"learning_rate": 5.942235978310649e-06,
|
|
"loss": 0.4086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22423534095287323,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3000.2,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 5.424403183023872,
|
|
"grad_norm": 0.669085550320398,
|
|
"learning_rate": 5.879690567646645e-06,
|
|
"loss": 0.4072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893330216407776,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3361.8,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 5.433244916003536,
|
|
"grad_norm": 0.7376176849413955,
|
|
"learning_rate": 5.817419291799388e-06,
|
|
"loss": 0.4171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16862516105175018,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3250.4,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 5.442086648983201,
|
|
"grad_norm": 0.6542529886450859,
|
|
"learning_rate": 5.755423359717222e-06,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13393324613571167,
|
|
"step": 3080,
|
|
"valid_targets_mean": 2877.9,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 5.450928381962865,
|
|
"grad_norm": 0.6392677271638753,
|
|
"learning_rate": 5.693703975002911e-06,
|
|
"loss": 0.4028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13595180213451385,
|
|
"step": 3085,
|
|
"valid_targets_mean": 2198.1,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 5.459770114942529,
|
|
"grad_norm": 0.8729225649131562,
|
|
"learning_rate": 5.632262335890262e-06,
|
|
"loss": 0.3457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21021749079227448,
|
|
"step": 3090,
|
|
"valid_targets_mean": 2377.5,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 5.468611847922193,
|
|
"grad_norm": 0.6919169604156997,
|
|
"learning_rate": 5.571099635220887e-06,
|
|
"loss": 0.3906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09621213376522064,
|
|
"step": 3095,
|
|
"valid_targets_mean": 1912.2,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 5.477453580901857,
|
|
"grad_norm": 0.9121306344310365,
|
|
"learning_rate": 5.510217060421006e-06,
|
|
"loss": 0.3922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281181275844574,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2873.5,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 5.486295313881521,
|
|
"grad_norm": 0.7192489768020969,
|
|
"learning_rate": 5.449615793478411e-06,
|
|
"loss": 0.4078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16287308931350708,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3211.4,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 5.495137046861185,
|
|
"grad_norm": 0.831717545619796,
|
|
"learning_rate": 5.389297010919545e-06,
|
|
"loss": 0.4097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19926907122135162,
|
|
"step": 3110,
|
|
"valid_targets_mean": 2817.5,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 5.503978779840849,
|
|
"grad_norm": 0.8140212742785664,
|
|
"learning_rate": 5.329261883786611e-06,
|
|
"loss": 0.4226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15885740518569946,
|
|
"step": 3115,
|
|
"valid_targets_mean": 2335.2,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 5.512820512820513,
|
|
"grad_norm": 0.9603182918185311,
|
|
"learning_rate": 5.2695115776148765e-06,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2298716902732849,
|
|
"step": 3120,
|
|
"valid_targets_mean": 1846.4,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 5.521662245800177,
|
|
"grad_norm": 0.8019735181021684,
|
|
"learning_rate": 5.21004725241002e-06,
|
|
"loss": 0.3822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22657175362110138,
|
|
"step": 3125,
|
|
"valid_targets_mean": 2703.9,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 5.530503978779841,
|
|
"grad_norm": 0.6986863564233587,
|
|
"learning_rate": 5.150870062625639e-06,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12742537260055542,
|
|
"step": 3130,
|
|
"valid_targets_mean": 2276.5,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 5.539345711759505,
|
|
"grad_norm": 0.6392753287000917,
|
|
"learning_rate": 5.091981157140808e-06,
|
|
"loss": 0.4035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16173365712165833,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3372.1,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 5.548187444739169,
|
|
"grad_norm": 0.6393994411262591,
|
|
"learning_rate": 5.03338167923779e-06,
|
|
"loss": 0.3976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24456486105918884,
|
|
"step": 3140,
|
|
"valid_targets_mean": 4942.8,
|
|
"valid_targets_min": 1585
|
|
},
|
|
{
|
|
"epoch": 5.5570291777188325,
|
|
"grad_norm": 0.6957281925877212,
|
|
"learning_rate": 4.975072766579829e-06,
|
|
"loss": 0.4048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13130195438861847,
|
|
"step": 3145,
|
|
"valid_targets_mean": 2417.5,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 5.5658709106984965,
|
|
"grad_norm": 0.9301033896027021,
|
|
"learning_rate": 4.917055551189089e-06,
|
|
"loss": 0.3766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24469897150993347,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2273.6,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 5.574712643678161,
|
|
"grad_norm": 0.6781134728348155,
|
|
"learning_rate": 4.859331159424645e-06,
|
|
"loss": 0.3818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16952422261238098,
|
|
"step": 3155,
|
|
"valid_targets_mean": 3207.5,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 5.583554376657825,
|
|
"grad_norm": 0.6976758942726916,
|
|
"learning_rate": 4.801900711960628e-06,
|
|
"loss": 0.3838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16358278691768646,
|
|
"step": 3160,
|
|
"valid_targets_mean": 3211.9,
|
|
"valid_targets_min": 1839
|
|
},
|
|
{
|
|
"epoch": 5.592396109637489,
|
|
"grad_norm": 0.7660887912670873,
|
|
"learning_rate": 4.744765323764469e-06,
|
|
"loss": 0.4141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2197507917881012,
|
|
"step": 3165,
|
|
"valid_targets_mean": 2894.8,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 5.601237842617153,
|
|
"grad_norm": 0.7314275252775809,
|
|
"learning_rate": 4.6879261040752685e-06,
|
|
"loss": 0.3959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14985954761505127,
|
|
"step": 3170,
|
|
"valid_targets_mean": 2667.5,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 5.610079575596817,
|
|
"grad_norm": 0.6999874428567335,
|
|
"learning_rate": 4.63138415638223e-06,
|
|
"loss": 0.4117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18852269649505615,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3877.0,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 5.618921308576481,
|
|
"grad_norm": 0.7215106059187592,
|
|
"learning_rate": 4.575140578403256e-06,
|
|
"loss": 0.4084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15598289668560028,
|
|
"step": 3180,
|
|
"valid_targets_mean": 2837.5,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 5.627763041556145,
|
|
"grad_norm": 0.6810403634872003,
|
|
"learning_rate": 4.519196462063628e-06,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24649617075920105,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4866.0,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 5.636604774535809,
|
|
"grad_norm": 0.8661083461110505,
|
|
"learning_rate": 4.4635528934748315e-06,
|
|
"loss": 0.4099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25795987248420715,
|
|
"step": 3190,
|
|
"valid_targets_mean": 2327.2,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 5.645446507515473,
|
|
"grad_norm": 0.9984452335642174,
|
|
"learning_rate": 4.408210952913434e-06,
|
|
"loss": 0.4076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21475867927074432,
|
|
"step": 3195,
|
|
"valid_targets_mean": 2365.8,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 5.654288240495137,
|
|
"grad_norm": 0.63284420296626,
|
|
"learning_rate": 4.353171714800135e-06,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21914245188236237,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4543.1,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 5.663129973474801,
|
|
"grad_norm": 0.7215660888800569,
|
|
"learning_rate": 4.298436247678892e-06,
|
|
"loss": 0.4097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18757544457912445,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3180.9,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 5.671971706454465,
|
|
"grad_norm": 0.8776106727330101,
|
|
"learning_rate": 4.2440056141962116e-06,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23493947088718414,
|
|
"step": 3210,
|
|
"valid_targets_mean": 2881.6,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 5.680813439434129,
|
|
"grad_norm": 0.8049825392272455,
|
|
"learning_rate": 4.189880871080465e-06,
|
|
"loss": 0.4049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13369911909103394,
|
|
"step": 3215,
|
|
"valid_targets_mean": 3066.9,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 5.689655172413794,
|
|
"grad_norm": 0.8302974347090468,
|
|
"learning_rate": 4.136063069121412e-06,
|
|
"loss": 0.3795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1873476505279541,
|
|
"step": 3220,
|
|
"valid_targets_mean": 2237.5,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 5.698496905393457,
|
|
"grad_norm": 0.6993234633000938,
|
|
"learning_rate": 4.082553253149784e-06,
|
|
"loss": 0.3919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21738377213478088,
|
|
"step": 3225,
|
|
"valid_targets_mean": 3679.9,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 5.7073386383731215,
|
|
"grad_norm": 0.9105087640110349,
|
|
"learning_rate": 4.029352462017016e-06,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23904664814472198,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2628.5,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 5.7161803713527854,
|
|
"grad_norm": 0.7887831667909359,
|
|
"learning_rate": 3.9764617285750516e-06,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15723231434822083,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2306.4,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 5.725022104332449,
|
|
"grad_norm": 0.8628976638475314,
|
|
"learning_rate": 3.923882079656311e-06,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1998981535434723,
|
|
"step": 3240,
|
|
"valid_targets_mean": 2389.6,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 5.733863837312113,
|
|
"grad_norm": 0.8851868206445387,
|
|
"learning_rate": 3.8716145360537516e-06,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23257210850715637,
|
|
"step": 3245,
|
|
"valid_targets_mean": 2343.6,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 5.742705570291777,
|
|
"grad_norm": 0.7909943609866004,
|
|
"learning_rate": 3.819660112501053e-06,
|
|
"loss": 0.4063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.237594872713089,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3360.4,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 5.751547303271441,
|
|
"grad_norm": 0.9465551894292238,
|
|
"learning_rate": 3.768019817652906e-06,
|
|
"loss": 0.4158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17449277639389038,
|
|
"step": 3255,
|
|
"valid_targets_mean": 1845.4,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 5.760389036251105,
|
|
"grad_norm": 0.8378732351349432,
|
|
"learning_rate": 3.7166946540654403e-06,
|
|
"loss": 0.3863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22095929086208344,
|
|
"step": 3260,
|
|
"valid_targets_mean": 2153.5,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 5.769230769230769,
|
|
"grad_norm": 0.7193963936499834,
|
|
"learning_rate": 3.6656856181767487e-06,
|
|
"loss": 0.427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17883676290512085,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3431.5,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 5.778072502210433,
|
|
"grad_norm": 0.8619394466221819,
|
|
"learning_rate": 3.6149937002875725e-06,
|
|
"loss": 0.4139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1710013896226883,
|
|
"step": 3270,
|
|
"valid_targets_mean": 1953.4,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 5.786914235190097,
|
|
"grad_norm": 0.7617624941176476,
|
|
"learning_rate": 3.564619884542033e-06,
|
|
"loss": 0.4073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18097054958343506,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2563.1,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 5.795755968169761,
|
|
"grad_norm": 0.7210899911422212,
|
|
"learning_rate": 3.514565148908553e-06,
|
|
"loss": 0.4115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21109169721603394,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3776.4,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 5.804597701149425,
|
|
"grad_norm": 0.8741955579576905,
|
|
"learning_rate": 3.4648304651608623e-06,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17497266829013824,
|
|
"step": 3285,
|
|
"valid_targets_mean": 2469.2,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 5.813439434129089,
|
|
"grad_norm": 0.7414269383554729,
|
|
"learning_rate": 3.4154167988591367e-06,
|
|
"loss": 0.3837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15306875109672546,
|
|
"step": 3290,
|
|
"valid_targets_mean": 2028.0,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 5.822281167108754,
|
|
"grad_norm": 0.680343087267019,
|
|
"learning_rate": 3.3663251093312477e-06,
|
|
"loss": 0.4257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15991319715976715,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3004.1,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 5.831122900088418,
|
|
"grad_norm": 0.7192829761203814,
|
|
"learning_rate": 3.317556349654125e-06,
|
|
"loss": 0.3822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24054303765296936,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4578.8,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 5.839964633068082,
|
|
"grad_norm": 0.8129183520607333,
|
|
"learning_rate": 3.2691114666352887e-06,
|
|
"loss": 0.41,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17372924089431763,
|
|
"step": 3305,
|
|
"valid_targets_mean": 1957.9,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 5.848806366047746,
|
|
"grad_norm": 0.6912822577892667,
|
|
"learning_rate": 3.220991400794431e-06,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20085839927196503,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4113.1,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 5.85764809902741,
|
|
"grad_norm": 0.7568673476778853,
|
|
"learning_rate": 3.1731970863451746e-06,
|
|
"loss": 0.3949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16372664272785187,
|
|
"step": 3315,
|
|
"valid_targets_mean": 2882.0,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 5.8664898320070735,
|
|
"grad_norm": 0.8292901753145713,
|
|
"learning_rate": 3.1257294511769263e-06,
|
|
"loss": 0.3945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1919775903224945,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2993.4,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 5.8753315649867375,
|
|
"grad_norm": 0.66874118757708,
|
|
"learning_rate": 3.0785894168368857e-06,
|
|
"loss": 0.4101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3156202435493469,
|
|
"step": 3325,
|
|
"valid_targets_mean": 4430.4,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 5.884173297966401,
|
|
"grad_norm": 0.7803502732071088,
|
|
"learning_rate": 3.0317778985121182e-06,
|
|
"loss": 0.4128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17863729596138,
|
|
"step": 3330,
|
|
"valid_targets_mean": 2628.6,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 5.893015030946065,
|
|
"grad_norm": 0.8120192943631549,
|
|
"learning_rate": 2.9852958050118275e-06,
|
|
"loss": 0.4182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1689552515745163,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3196.8,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 5.901856763925729,
|
|
"grad_norm": 0.7503397178701974,
|
|
"learning_rate": 2.9391440387496617e-06,
|
|
"loss": 0.4242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1889551877975464,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3213.4,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 5.910698496905393,
|
|
"grad_norm": 0.8591198480547091,
|
|
"learning_rate": 2.89332349572625e-06,
|
|
"loss": 0.3703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1662624180316925,
|
|
"step": 3345,
|
|
"valid_targets_mean": 1885.9,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 5.919540229885057,
|
|
"grad_norm": 0.9519020720938417,
|
|
"learning_rate": 2.847835065511766e-06,
|
|
"loss": 0.3988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17353492975234985,
|
|
"step": 3350,
|
|
"valid_targets_mean": 1727.4,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 5.928381962864721,
|
|
"grad_norm": 0.8747102894127499,
|
|
"learning_rate": 2.8026796312286843e-06,
|
|
"loss": 0.4373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23620933294296265,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3666.1,
|
|
"valid_targets_min": 1644
|
|
},
|
|
{
|
|
"epoch": 5.937223695844385,
|
|
"grad_norm": 0.6376827702661823,
|
|
"learning_rate": 2.757858069534598e-06,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1924460232257843,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4527.5,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 5.946065428824049,
|
|
"grad_norm": 0.6516051452822853,
|
|
"learning_rate": 2.71337125060525e-06,
|
|
"loss": 0.417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005494475364685,
|
|
"step": 3365,
|
|
"valid_targets_mean": 5757.8,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 5.954907161803714,
|
|
"grad_norm": 0.732478020417441,
|
|
"learning_rate": 2.669220038117597e-06,
|
|
"loss": 0.4419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27111363410949707,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3390.0,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 5.963748894783378,
|
|
"grad_norm": 0.8080636185217684,
|
|
"learning_rate": 2.6254052892330737e-06,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3034144341945648,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3722.1,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 5.972590627763042,
|
|
"grad_norm": 0.7169451114480241,
|
|
"learning_rate": 2.5819278545809103e-06,
|
|
"loss": 0.4149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27992749214172363,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3570.2,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 5.981432360742706,
|
|
"grad_norm": 0.844425388762182,
|
|
"learning_rate": 2.5387885782416712e-06,
|
|
"loss": 0.3631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17740076780319214,
|
|
"step": 3385,
|
|
"valid_targets_mean": 2162.0,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 5.99027409372237,
|
|
"grad_norm": 0.6494113116563578,
|
|
"learning_rate": 2.495988297730818e-06,
|
|
"loss": 0.4035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20014826953411102,
|
|
"step": 3390,
|
|
"valid_targets_mean": 4345.4,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 5.999115826702034,
|
|
"grad_norm": 0.6847608100268655,
|
|
"learning_rate": 2.453527843982497e-06,
|
|
"loss": 0.3689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28000521659851074,
|
|
"step": 3395,
|
|
"valid_targets_mean": 5669.8,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 6.007073386383731,
|
|
"grad_norm": 0.7892539431347021,
|
|
"learning_rate": 2.4114080413333474e-06,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21648083627223969,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3293.4,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 6.015915119363395,
|
|
"grad_norm": 0.7327489748764502,
|
|
"learning_rate": 2.369629707506569e-06,
|
|
"loss": 0.3901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21525824069976807,
|
|
"step": 3405,
|
|
"valid_targets_mean": 2875.0,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 6.024756852343059,
|
|
"grad_norm": 0.7427914142271864,
|
|
"learning_rate": 2.328193653595985e-06,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29550355672836304,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3901.6,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 6.033598585322723,
|
|
"grad_norm": 0.7551934122627986,
|
|
"learning_rate": 2.2871006840503384e-06,
|
|
"loss": 0.385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1656990498304367,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2678.4,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 6.042440318302387,
|
|
"grad_norm": 0.8014400239410849,
|
|
"learning_rate": 2.2463515966576342e-06,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19182810187339783,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2418.2,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 6.051282051282051,
|
|
"grad_norm": 0.897788988403115,
|
|
"learning_rate": 2.2059471825296973e-06,
|
|
"loss": 0.3931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26385846734046936,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2770.5,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 6.060123784261715,
|
|
"grad_norm": 0.8149264081278768,
|
|
"learning_rate": 2.1658882260867698e-06,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12121979147195816,
|
|
"step": 3430,
|
|
"valid_targets_mean": 1847.2,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 6.068965517241379,
|
|
"grad_norm": 0.9058022749241772,
|
|
"learning_rate": 2.1261755050423204e-06,
|
|
"loss": 0.3914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18681831657886505,
|
|
"step": 3435,
|
|
"valid_targets_mean": 2502.9,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 6.077807250221043,
|
|
"grad_norm": 0.5875487670316494,
|
|
"learning_rate": 2.086809790387905e-06,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14535044133663177,
|
|
"step": 3440,
|
|
"valid_targets_mean": 3132.5,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 6.086648983200707,
|
|
"grad_norm": 0.7952935546054049,
|
|
"learning_rate": 2.047791846378242e-06,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18173527717590332,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3148.6,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 6.095490716180372,
|
|
"grad_norm": 0.7607277089232913,
|
|
"learning_rate": 2.009122430516337e-06,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17606256902217865,
|
|
"step": 3450,
|
|
"valid_targets_mean": 2331.9,
|
|
"valid_targets_min": 1478
|
|
},
|
|
{
|
|
"epoch": 6.104332449160036,
|
|
"grad_norm": 0.7496258419853713,
|
|
"learning_rate": 1.970802293538807e-06,
|
|
"loss": 0.3777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27735546231269836,
|
|
"step": 3455,
|
|
"valid_targets_mean": 4273.4,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 6.1131741821397,
|
|
"grad_norm": 0.6985174849382113,
|
|
"learning_rate": 1.9328321794012762e-06,
|
|
"loss": 0.3882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2295549213886261,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3212.6,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 6.122015915119364,
|
|
"grad_norm": 0.6160591675123345,
|
|
"learning_rate": 1.895212825263959e-06,
|
|
"loss": 0.3912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262872576713562,
|
|
"step": 3465,
|
|
"valid_targets_mean": 6619.6,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 6.130857648099028,
|
|
"grad_norm": 0.8207914026042112,
|
|
"learning_rate": 1.8579449614773314e-06,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13713327050209045,
|
|
"step": 3470,
|
|
"valid_targets_mean": 1882.8,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 6.1396993810786915,
|
|
"grad_norm": 0.8071469248304369,
|
|
"learning_rate": 1.8210293115679677e-06,
|
|
"loss": 0.4061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2146981954574585,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3234.0,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 6.1485411140583555,
|
|
"grad_norm": 0.6897319556110078,
|
|
"learning_rate": 1.784466592224472e-06,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13424663245677948,
|
|
"step": 3480,
|
|
"valid_targets_mean": 2311.5,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 6.157382847038019,
|
|
"grad_norm": 0.729819719486692,
|
|
"learning_rate": 1.74825751328358e-06,
|
|
"loss": 0.3926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19196979701519012,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3340.2,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 6.166224580017683,
|
|
"grad_norm": 0.8426168349719839,
|
|
"learning_rate": 1.7124027777163732e-06,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14435023069381714,
|
|
"step": 3490,
|
|
"valid_targets_mean": 1912.6,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 6.175066312997347,
|
|
"grad_norm": 0.759757715201301,
|
|
"learning_rate": 1.6769030816146404e-06,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22856047749519348,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3739.8,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 6.183908045977011,
|
|
"grad_norm": 0.8366056994371286,
|
|
"learning_rate": 1.6417591141773458e-06,
|
|
"loss": 0.4146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17554280161857605,
|
|
"step": 3500,
|
|
"valid_targets_mean": 2551.6,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 6.192749778956675,
|
|
"grad_norm": 0.7980312027283708,
|
|
"learning_rate": 1.6069715576972634e-06,
|
|
"loss": 0.3904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27862659096717834,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3707.2,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 6.201591511936339,
|
|
"grad_norm": 0.7364325995709389,
|
|
"learning_rate": 1.5725410875477187e-06,
|
|
"loss": 0.3608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1674642115831375,
|
|
"step": 3510,
|
|
"valid_targets_mean": 2912.5,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 6.210433244916003,
|
|
"grad_norm": 0.8858927969586514,
|
|
"learning_rate": 1.5384683721694993e-06,
|
|
"loss": 0.3889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18463276326656342,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2316.4,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 6.219274977895668,
|
|
"grad_norm": 0.8919860695858821,
|
|
"learning_rate": 1.504754073057846e-06,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19577711820602417,
|
|
"step": 3520,
|
|
"valid_targets_mean": 1823.4,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 6.228116710875332,
|
|
"grad_norm": 0.8455766242568858,
|
|
"learning_rate": 1.4713988447496342e-06,
|
|
"loss": 0.4037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19854457676410675,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2662.8,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 6.236958443854996,
|
|
"grad_norm": 0.744453900421472,
|
|
"learning_rate": 1.438403334810663e-06,
|
|
"loss": 0.3768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15537121891975403,
|
|
"step": 3530,
|
|
"valid_targets_mean": 2982.4,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 6.24580017683466,
|
|
"grad_norm": 0.8827206937052091,
|
|
"learning_rate": 1.405768183823073e-06,
|
|
"loss": 0.3864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19907264411449432,
|
|
"step": 3535,
|
|
"valid_targets_mean": 2362.4,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 6.254641909814324,
|
|
"grad_norm": 0.8416510383985708,
|
|
"learning_rate": 1.3734940253729167e-06,
|
|
"loss": 0.3777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22170794010162354,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3408.8,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 6.263483642793988,
|
|
"grad_norm": 0.7873873101117113,
|
|
"learning_rate": 1.3415814860378573e-06,
|
|
"loss": 0.3766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14214427769184113,
|
|
"step": 3545,
|
|
"valid_targets_mean": 2900.8,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 6.272325375773652,
|
|
"grad_norm": 0.7347622266646417,
|
|
"learning_rate": 1.3100311853750114e-06,
|
|
"loss": 0.4088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.152814120054245,
|
|
"step": 3550,
|
|
"valid_targets_mean": 2783.9,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 6.281167108753316,
|
|
"grad_norm": 0.8638119930741502,
|
|
"learning_rate": 1.2788437359089056e-06,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2797715365886688,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3625.0,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 6.29000884173298,
|
|
"grad_norm": 0.7458458853206296,
|
|
"learning_rate": 1.248019743119595e-06,
|
|
"loss": 0.3677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2126968950033188,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3209.9,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 6.2988505747126435,
|
|
"grad_norm": 0.7013433868816651,
|
|
"learning_rate": 1.2175598054309058e-06,
|
|
"loss": 0.3637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24043208360671997,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4891.2,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 6.3076923076923075,
|
|
"grad_norm": 0.7155286710184046,
|
|
"learning_rate": 1.187464514198824e-06,
|
|
"loss": 0.389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22147774696350098,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3511.0,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 6.316534040671971,
|
|
"grad_norm": 0.8942958090189175,
|
|
"learning_rate": 1.1577344537000012e-06,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24931302666664124,
|
|
"step": 3575,
|
|
"valid_targets_mean": 3223.0,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 6.325375773651635,
|
|
"grad_norm": 0.8019003945889316,
|
|
"learning_rate": 1.128370201120419e-06,
|
|
"loss": 0.4175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635502517223358,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3202.5,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 6.334217506631299,
|
|
"grad_norm": 0.6817340392127685,
|
|
"learning_rate": 1.0993723265441835e-06,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13677793741226196,
|
|
"step": 3585,
|
|
"valid_targets_mean": 2905.6,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 6.343059239610964,
|
|
"grad_norm": 0.6095420535867488,
|
|
"learning_rate": 1.0707413929424626e-06,
|
|
"loss": 0.415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17292746901512146,
|
|
"step": 3590,
|
|
"valid_targets_mean": 3958.5,
|
|
"valid_targets_min": 2560
|
|
},
|
|
{
|
|
"epoch": 6.351900972590628,
|
|
"grad_norm": 0.6319517497546866,
|
|
"learning_rate": 1.0424779561625465e-06,
|
|
"loss": 0.3905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15072670578956604,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3169.4,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 6.360742705570292,
|
|
"grad_norm": 0.806370402271801,
|
|
"learning_rate": 1.0145825649170593e-06,
|
|
"loss": 0.4086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2352861762046814,
|
|
"step": 3600,
|
|
"valid_targets_mean": 2252.2,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 6.369584438549956,
|
|
"grad_norm": 0.6699724587109076,
|
|
"learning_rate": 9.870557607733056e-07,
|
|
"loss": 0.3799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16308927536010742,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3615.8,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 6.37842617152962,
|
|
"grad_norm": 0.827228282016501,
|
|
"learning_rate": 9.59898078142767e-07,
|
|
"loss": 0.4035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2147194743156433,
|
|
"step": 3610,
|
|
"valid_targets_mean": 2595.4,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 6.387267904509284,
|
|
"grad_norm": 0.6652223634579235,
|
|
"learning_rate": 9.331100442707108e-07,
|
|
"loss": 0.4052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17819935083389282,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3927.1,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 6.396109637488948,
|
|
"grad_norm": 0.7289789412288719,
|
|
"learning_rate": 9.066921792259654e-07,
|
|
"loss": 0.389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18159447610378265,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3477.9,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 6.404951370468612,
|
|
"grad_norm": 0.6396828577240828,
|
|
"learning_rate": 8.806449958908115e-07,
|
|
"loss": 0.382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22703109681606293,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5019.6,
|
|
"valid_targets_min": 1272
|
|
},
|
|
{
|
|
"epoch": 6.413793103448276,
|
|
"grad_norm": 0.8721346570560746,
|
|
"learning_rate": 8.549689999510446e-07,
|
|
"loss": 0.3822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23551982641220093,
|
|
"step": 3630,
|
|
"valid_targets_mean": 2339.8,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 6.42263483642794,
|
|
"grad_norm": 0.6947586156644703,
|
|
"learning_rate": 8.296646898861405e-07,
|
|
"loss": 0.3807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16428089141845703,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3315.1,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 6.431476569407604,
|
|
"grad_norm": 0.7795433915349836,
|
|
"learning_rate": 8.047325569595821e-07,
|
|
"loss": 0.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1826629936695099,
|
|
"step": 3640,
|
|
"valid_targets_mean": 2893.4,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 6.440318302387268,
|
|
"grad_norm": 0.6984355115855847,
|
|
"learning_rate": 7.801730852093193e-07,
|
|
"loss": 0.4044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21642327308654785,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3958.0,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 6.449160035366932,
|
|
"grad_norm": 1.0772469112743712,
|
|
"learning_rate": 7.559867514383867e-07,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2273642122745514,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3464.5,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 6.458001768346596,
|
|
"grad_norm": 0.8907493802415359,
|
|
"learning_rate": 7.321740252056253e-07,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19292961061000824,
|
|
"step": 3655,
|
|
"valid_targets_mean": 2551.0,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 6.4668435013262595,
|
|
"grad_norm": 0.8209923973422765,
|
|
"learning_rate": 7.087353688165776e-07,
|
|
"loss": 0.4453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1861528754234314,
|
|
"step": 3660,
|
|
"valid_targets_mean": 2526.1,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 6.475685234305924,
|
|
"grad_norm": 0.7474743887332304,
|
|
"learning_rate": 6.856712373145158e-07,
|
|
"loss": 0.3908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22273015975952148,
|
|
"step": 3665,
|
|
"valid_targets_mean": 2730.5,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 6.484526967285588,
|
|
"grad_norm": 0.7947503636901528,
|
|
"learning_rate": 6.62982078471599e-07,
|
|
"loss": 0.399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3024711310863495,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4017.4,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 6.493368700265252,
|
|
"grad_norm": 0.8974195325023576,
|
|
"learning_rate": 6.406683327801877e-07,
|
|
"loss": 0.3775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1381479799747467,
|
|
"step": 3675,
|
|
"valid_targets_mean": 1530.0,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 6.502210433244916,
|
|
"grad_norm": 0.7191615989293343,
|
|
"learning_rate": 6.187304334442856e-07,
|
|
"loss": 0.3885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1408117115497589,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3383.2,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 6.51105216622458,
|
|
"grad_norm": 0.9329960975796369,
|
|
"learning_rate": 5.97168806371129e-07,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2396770715713501,
|
|
"step": 3685,
|
|
"valid_targets_mean": 2167.9,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 6.519893899204244,
|
|
"grad_norm": 0.6478034299508139,
|
|
"learning_rate": 5.759838701629284e-07,
|
|
"loss": 0.3876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18991714715957642,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3872.1,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 6.528735632183908,
|
|
"grad_norm": 0.665389821538992,
|
|
"learning_rate": 5.551760361087288e-07,
|
|
"loss": 0.3781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20422768592834473,
|
|
"step": 3695,
|
|
"valid_targets_mean": 3974.6,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 6.537577365163572,
|
|
"grad_norm": 0.8481964209332128,
|
|
"learning_rate": 5.347457081764318e-07,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12965896725654602,
|
|
"step": 3700,
|
|
"valid_targets_mean": 2102.9,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 6.546419098143236,
|
|
"grad_norm": 0.7959235784114853,
|
|
"learning_rate": 5.146932830049545e-07,
|
|
"loss": 0.3908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13871905207633972,
|
|
"step": 3705,
|
|
"valid_targets_mean": 2381.4,
|
|
"valid_targets_min": 416
|
|
},
|
|
{
|
|
"epoch": 6.5552608311229,
|
|
"grad_norm": 0.9738963676820827,
|
|
"learning_rate": 4.950191498965207e-07,
|
|
"loss": 0.391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24106061458587646,
|
|
"step": 3710,
|
|
"valid_targets_mean": 2475.1,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 6.564102564102564,
|
|
"grad_norm": 0.6979946706582677,
|
|
"learning_rate": 4.7572369080910897e-07,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283669114112854,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3481.0,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 6.572944297082228,
|
|
"grad_norm": 0.8704935577886896,
|
|
"learning_rate": 4.568072803490409e-07,
|
|
"loss": 0.4178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18020617961883545,
|
|
"step": 3720,
|
|
"valid_targets_mean": 2580.9,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 6.581786030061892,
|
|
"grad_norm": 0.7804746793990116,
|
|
"learning_rate": 4.382702857637022e-07,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17412936687469482,
|
|
"step": 3725,
|
|
"valid_targets_mean": 3689.2,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 6.590627763041557,
|
|
"grad_norm": 0.8487934940824559,
|
|
"learning_rate": 4.2011306693441554e-07,
|
|
"loss": 0.4013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24455024302005768,
|
|
"step": 3730,
|
|
"valid_targets_mean": 2609.2,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 6.59946949602122,
|
|
"grad_norm": 0.6852406296840956,
|
|
"learning_rate": 4.0233597636944565e-07,
|
|
"loss": 0.3904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19759978353977203,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3594.5,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 6.6083112290008845,
|
|
"grad_norm": 0.7416508492161489,
|
|
"learning_rate": 3.84939359197174e-07,
|
|
"loss": 0.4063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.138950914144516,
|
|
"step": 3740,
|
|
"valid_targets_mean": 2434.2,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 6.6171529619805485,
|
|
"grad_norm": 0.8357824054733599,
|
|
"learning_rate": 3.6792355315937947e-07,
|
|
"loss": 0.3735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19350282847881317,
|
|
"step": 3745,
|
|
"valid_targets_mean": 2571.5,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 6.625994694960212,
|
|
"grad_norm": 0.8587156622200902,
|
|
"learning_rate": 3.5128888860468836e-07,
|
|
"loss": 0.3885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22190819680690765,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3109.5,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 6.634836427939876,
|
|
"grad_norm": 0.6581213951003092,
|
|
"learning_rate": 3.350356884821748e-07,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21432948112487793,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3528.0,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 6.64367816091954,
|
|
"grad_norm": 0.8286218218972864,
|
|
"learning_rate": 3.191642683350571e-07,
|
|
"loss": 0.3918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32919830083847046,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4206.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 6.652519893899204,
|
|
"grad_norm": 0.7950121098953326,
|
|
"learning_rate": 3.036749362946112e-07,
|
|
"loss": 0.388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18096879124641418,
|
|
"step": 3765,
|
|
"valid_targets_mean": 2454.1,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 6.661361626878868,
|
|
"grad_norm": 0.7940166037642752,
|
|
"learning_rate": 2.8856799307415807e-07,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14865876734256744,
|
|
"step": 3770,
|
|
"valid_targets_mean": 2513.9,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 6.670203359858532,
|
|
"grad_norm": 0.7495945684460611,
|
|
"learning_rate": 2.7384373196324365e-07,
|
|
"loss": 0.3681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11169509589672089,
|
|
"step": 3775,
|
|
"valid_targets_mean": 2019.8,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 6.679045092838196,
|
|
"grad_norm": 0.711551778287024,
|
|
"learning_rate": 2.595024388219347e-07,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22360673546791077,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3386.1,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 6.68788682581786,
|
|
"grad_norm": 0.7177246883491567,
|
|
"learning_rate": 2.4554439207527424e-07,
|
|
"loss": 0.3825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19970688223838806,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4797.1,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 6.696728558797524,
|
|
"grad_norm": 0.7780793009957203,
|
|
"learning_rate": 2.3196986270787704e-07,
|
|
"loss": 0.4226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3514820635318756,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4338.2,
|
|
"valid_targets_min": 1681
|
|
},
|
|
{
|
|
"epoch": 6.705570291777188,
|
|
"grad_norm": 0.7174415720541817,
|
|
"learning_rate": 2.1877911425866705e-07,
|
|
"loss": 0.3864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15473148226737976,
|
|
"step": 3795,
|
|
"valid_targets_mean": 2806.8,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 6.714412024756852,
|
|
"grad_norm": 0.6743683915959348,
|
|
"learning_rate": 2.059724028157506e-07,
|
|
"loss": 0.3834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16321048140525818,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3285.9,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 6.723253757736517,
|
|
"grad_norm": 0.7217029334413514,
|
|
"learning_rate": 1.9354997701146904e-07,
|
|
"loss": 0.3951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24354010820388794,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3629.1,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 6.732095490716181,
|
|
"grad_norm": 0.7644466171909563,
|
|
"learning_rate": 1.8151207801754722e-07,
|
|
"loss": 0.4429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22783520817756653,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3151.8,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 6.740937223695845,
|
|
"grad_norm": 0.697996637149306,
|
|
"learning_rate": 1.6985893954042598e-07,
|
|
"loss": 0.4009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34702497720718384,
|
|
"step": 3815,
|
|
"valid_targets_mean": 5219.4,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 6.749778956675509,
|
|
"grad_norm": 0.6524281262476507,
|
|
"learning_rate": 1.5859078781671477e-07,
|
|
"loss": 0.3758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16086144745349884,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3767.5,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 6.758620689655173,
|
|
"grad_norm": 0.8703374924931297,
|
|
"learning_rate": 1.477078416088107e-07,
|
|
"loss": 0.383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.157963365316391,
|
|
"step": 3825,
|
|
"valid_targets_mean": 2099.6,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 6.7674624226348365,
|
|
"grad_norm": 0.9150276768989083,
|
|
"learning_rate": 1.3721031220064185e-07,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20869338512420654,
|
|
"step": 3830,
|
|
"valid_targets_mean": 2169.6,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 6.7763041556145005,
|
|
"grad_norm": 0.8582742028385791,
|
|
"learning_rate": 1.2709840339357515e-07,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1651896834373474,
|
|
"step": 3835,
|
|
"valid_targets_mean": 1981.6,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 6.7851458885941645,
|
|
"grad_norm": 0.8933135468827135,
|
|
"learning_rate": 1.1737231150244388e-07,
|
|
"loss": 0.383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16246512532234192,
|
|
"step": 3840,
|
|
"valid_targets_mean": 2891.0,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 6.793987621573828,
|
|
"grad_norm": 0.8039369028796995,
|
|
"learning_rate": 1.0803222535175295e-07,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12738923728466034,
|
|
"step": 3845,
|
|
"valid_targets_mean": 2474.1,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 6.802829354553492,
|
|
"grad_norm": 0.800841966247947,
|
|
"learning_rate": 9.907832627200631e-08,
|
|
"loss": 0.3988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19415853917598724,
|
|
"step": 3850,
|
|
"valid_targets_mean": 2828.2,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 6.811671087533156,
|
|
"grad_norm": 0.7602967171940971,
|
|
"learning_rate": 9.051078809618752e-08,
|
|
"loss": 0.3876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29658013582229614,
|
|
"step": 3855,
|
|
"valid_targets_mean": 4289.4,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 6.82051282051282,
|
|
"grad_norm": 0.6866320695142892,
|
|
"learning_rate": 8.232977715637358e-08,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22931569814682007,
|
|
"step": 3860,
|
|
"valid_targets_mean": 5008.4,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 6.829354553492484,
|
|
"grad_norm": 0.8047620480314491,
|
|
"learning_rate": 7.453545228053305e-08,
|
|
"loss": 0.389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17397227883338928,
|
|
"step": 3865,
|
|
"valid_targets_mean": 2955.0,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 6.838196286472148,
|
|
"grad_norm": 0.7586490896107623,
|
|
"learning_rate": 6.712796478940853e-08,
|
|
"loss": 0.4068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16233235597610474,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2807.6,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 6.847038019451812,
|
|
"grad_norm": 0.6581619537458464,
|
|
"learning_rate": 6.010745849361011e-08,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.133625328540802,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4207.5,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 6.855879752431477,
|
|
"grad_norm": 0.7436096405016829,
|
|
"learning_rate": 5.347406969079982e-08,
|
|
"loss": 0.417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1265230029821396,
|
|
"step": 3880,
|
|
"valid_targets_mean": 2987.5,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 6.864721485411141,
|
|
"grad_norm": 0.7763730599939348,
|
|
"learning_rate": 4.722792716306046e-08,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20675580203533173,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3732.2,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 6.873563218390805,
|
|
"grad_norm": 0.7715820038607789,
|
|
"learning_rate": 4.1369152174381975e-08,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21535265445709229,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3796.9,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 6.882404951370469,
|
|
"grad_norm": 0.9019028296801088,
|
|
"learning_rate": 3.589785846832783e-08,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20554758608341217,
|
|
"step": 3895,
|
|
"valid_targets_mean": 2699.5,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 6.891246684350133,
|
|
"grad_norm": 0.7944162594852624,
|
|
"learning_rate": 3.081415226579898e-08,
|
|
"loss": 0.3451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15075156092643738,
|
|
"step": 3900,
|
|
"valid_targets_mean": 2368.2,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 6.900088417329797,
|
|
"grad_norm": 0.7617097376400151,
|
|
"learning_rate": 2.6118132263002193e-08,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1630379855632782,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3169.9,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 6.908930150309461,
|
|
"grad_norm": 0.7539821586393874,
|
|
"learning_rate": 2.18098896295027e-08,
|
|
"loss": 0.3911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14324209094047546,
|
|
"step": 3910,
|
|
"valid_targets_mean": 2474.1,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 6.917771883289125,
|
|
"grad_norm": 0.797486908920383,
|
|
"learning_rate": 1.788950800648337e-08,
|
|
"loss": 0.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1615052968263626,
|
|
"step": 3915,
|
|
"valid_targets_mean": 2934.4,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 6.926613616268789,
|
|
"grad_norm": 0.7651379607020341,
|
|
"learning_rate": 1.4357063505101576e-08,
|
|
"loss": 0.4133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14266857504844666,
|
|
"step": 3920,
|
|
"valid_targets_mean": 2230.8,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 6.9354553492484525,
|
|
"grad_norm": 0.7502594071352197,
|
|
"learning_rate": 1.1212624705014829e-08,
|
|
"loss": 0.4242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.321631520986557,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3950.1,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 6.9442970822281165,
|
|
"grad_norm": 0.6647778859131394,
|
|
"learning_rate": 8.456252653055164e-09,
|
|
"loss": 0.4284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23882895708084106,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3747.4,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 6.95313881520778,
|
|
"grad_norm": 0.6155232006040645,
|
|
"learning_rate": 6.08800086204342e-09,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1027369350194931,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3219.8,
|
|
"valid_targets_min": 1858
|
|
},
|
|
{
|
|
"epoch": 6.961980548187444,
|
|
"grad_norm": 0.9711608387054405,
|
|
"learning_rate": 4.107915309743416e-09,
|
|
"loss": 0.4059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.190896674990654,
|
|
"step": 3940,
|
|
"valid_targets_mean": 2195.4,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 6.970822281167109,
|
|
"grad_norm": 0.7122188828888222,
|
|
"learning_rate": 2.516034437973769e-09,
|
|
"loss": 0.4263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21069204807281494,
|
|
"step": 3945,
|
|
"valid_targets_mean": 3195.0,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 6.979664014146772,
|
|
"grad_norm": 0.7376023644733862,
|
|
"learning_rate": 1.312389151861826e-09,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25383979082107544,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3817.6,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 6.988505747126437,
|
|
"grad_norm": 0.627582373951346,
|
|
"learning_rate": 4.970028192485821e-10,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1778397411108017,
|
|
"step": 3955,
|
|
"valid_targets_mean": 3882.2,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 6.997347480106101,
|
|
"grad_norm": 0.7920130024563453,
|
|
"learning_rate": 6.989127021794773e-11,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20227769017219543,
|
|
"step": 3960,
|
|
"valid_targets_mean": 2815.9,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48488524556159973,
|
|
"step": 3962,
|
|
"total_flos": 9.02500960062931e+17,
|
|
"train_loss": 0.468742822619895,
|
|
"train_runtime": 33154.24,
|
|
"train_samples_per_second": 1.91,
|
|
"train_steps_per_second": 0.12,
|
|
"valid_targets_mean": 3460.0,
|
|
"valid_targets_min": 1632
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 3962,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 9.02500960062931e+17,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|