Model: laion/nemosci-tasrep-a1mfc-gfistaqc-dev1-scaff-maxeps-swes-r2eg__Qwen3-8B Source: Original Platform
10245 lines
285 KiB
JSON
10245 lines
285 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4635,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00539568345323741,
|
|
"grad_norm": 13.661851014144142,
|
|
"learning_rate": 3.4482758620689656e-07,
|
|
"loss": 0.9671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33074837923049927,
|
|
"step": 5,
|
|
"valid_targets_mean": 8170.6,
|
|
"valid_targets_min": 3077
|
|
},
|
|
{
|
|
"epoch": 0.01079136690647482,
|
|
"grad_norm": 13.17705289916657,
|
|
"learning_rate": 7.758620689655173e-07,
|
|
"loss": 0.9704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31075888872146606,
|
|
"step": 10,
|
|
"valid_targets_mean": 6980.7,
|
|
"valid_targets_min": 2298
|
|
},
|
|
{
|
|
"epoch": 0.01618705035971223,
|
|
"grad_norm": 9.577086560756,
|
|
"learning_rate": 1.2068965517241381e-06,
|
|
"loss": 0.9376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2770102024078369,
|
|
"step": 15,
|
|
"valid_targets_mean": 6148.2,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 0.02158273381294964,
|
|
"grad_norm": 5.7752121097326645,
|
|
"learning_rate": 1.6379310344827587e-06,
|
|
"loss": 0.8844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996809482574463,
|
|
"step": 20,
|
|
"valid_targets_mean": 7165.8,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 0.02697841726618705,
|
|
"grad_norm": 2.5751991958369604,
|
|
"learning_rate": 2.0689655172413796e-06,
|
|
"loss": 0.8112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.267288476228714,
|
|
"step": 25,
|
|
"valid_targets_mean": 7202.3,
|
|
"valid_targets_min": 2907
|
|
},
|
|
{
|
|
"epoch": 0.03237410071942446,
|
|
"grad_norm": 1.7536742757275627,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.7803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608868479728699,
|
|
"step": 30,
|
|
"valid_targets_mean": 7477.5,
|
|
"valid_targets_min": 2714
|
|
},
|
|
{
|
|
"epoch": 0.03776978417266187,
|
|
"grad_norm": 1.5301402289588988,
|
|
"learning_rate": 2.931034482758621e-06,
|
|
"loss": 0.7499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2523272633552551,
|
|
"step": 35,
|
|
"valid_targets_mean": 7314.0,
|
|
"valid_targets_min": 2883
|
|
},
|
|
{
|
|
"epoch": 0.04316546762589928,
|
|
"grad_norm": 0.9977408060534682,
|
|
"learning_rate": 3.362068965517242e-06,
|
|
"loss": 0.7065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.226681649684906,
|
|
"step": 40,
|
|
"valid_targets_mean": 7126.6,
|
|
"valid_targets_min": 2597
|
|
},
|
|
{
|
|
"epoch": 0.048561151079136694,
|
|
"grad_norm": 0.8152335838388542,
|
|
"learning_rate": 3.793103448275862e-06,
|
|
"loss": 0.681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.215467631816864,
|
|
"step": 45,
|
|
"valid_targets_mean": 7369.8,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 0.0539568345323741,
|
|
"grad_norm": 0.569283935129924,
|
|
"learning_rate": 4.224137931034483e-06,
|
|
"loss": 0.6484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20571649074554443,
|
|
"step": 50,
|
|
"valid_targets_mean": 7356.4,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 0.05935251798561151,
|
|
"grad_norm": 0.5035757847242521,
|
|
"learning_rate": 4.655172413793104e-06,
|
|
"loss": 0.5951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18595808744430542,
|
|
"step": 55,
|
|
"valid_targets_mean": 8719.0,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.06474820143884892,
|
|
"grad_norm": 0.4066678699275454,
|
|
"learning_rate": 5.086206896551724e-06,
|
|
"loss": 0.555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19412162899971008,
|
|
"step": 60,
|
|
"valid_targets_mean": 9029.5,
|
|
"valid_targets_min": 4655
|
|
},
|
|
{
|
|
"epoch": 0.07014388489208633,
|
|
"grad_norm": 0.33507199352137684,
|
|
"learning_rate": 5.517241379310345e-06,
|
|
"loss": 0.5279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17783935368061066,
|
|
"step": 65,
|
|
"valid_targets_mean": 10019.8,
|
|
"valid_targets_min": 4996
|
|
},
|
|
{
|
|
"epoch": 0.07553956834532374,
|
|
"grad_norm": 0.26846397352262785,
|
|
"learning_rate": 5.9482758620689665e-06,
|
|
"loss": 0.5082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16786092519760132,
|
|
"step": 70,
|
|
"valid_targets_mean": 9413.2,
|
|
"valid_targets_min": 3337
|
|
},
|
|
{
|
|
"epoch": 0.08093525179856115,
|
|
"grad_norm": 0.24316373192587276,
|
|
"learning_rate": 6.379310344827587e-06,
|
|
"loss": 0.5006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17443504929542542,
|
|
"step": 75,
|
|
"valid_targets_mean": 10127.1,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 0.08633093525179857,
|
|
"grad_norm": 0.2296433153544275,
|
|
"learning_rate": 6.810344827586207e-06,
|
|
"loss": 0.4791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16901013255119324,
|
|
"step": 80,
|
|
"valid_targets_mean": 9319.1,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 0.09172661870503597,
|
|
"grad_norm": 0.22732950372409771,
|
|
"learning_rate": 7.241379310344828e-06,
|
|
"loss": 0.4803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1583300232887268,
|
|
"step": 85,
|
|
"valid_targets_mean": 9109.9,
|
|
"valid_targets_min": 4379
|
|
},
|
|
{
|
|
"epoch": 0.09712230215827339,
|
|
"grad_norm": 0.2243457192250265,
|
|
"learning_rate": 7.672413793103449e-06,
|
|
"loss": 0.4635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1495744287967682,
|
|
"step": 90,
|
|
"valid_targets_mean": 9313.2,
|
|
"valid_targets_min": 3452
|
|
},
|
|
{
|
|
"epoch": 0.10251798561151079,
|
|
"grad_norm": 0.23356274439914965,
|
|
"learning_rate": 8.103448275862069e-06,
|
|
"loss": 0.4554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14848843216896057,
|
|
"step": 95,
|
|
"valid_targets_mean": 8640.9,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 0.1079136690647482,
|
|
"grad_norm": 0.23213600903193946,
|
|
"learning_rate": 8.53448275862069e-06,
|
|
"loss": 0.4524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14884325861930847,
|
|
"step": 100,
|
|
"valid_targets_mean": 9029.9,
|
|
"valid_targets_min": 4552
|
|
},
|
|
{
|
|
"epoch": 0.11330935251798561,
|
|
"grad_norm": 0.2371364669155294,
|
|
"learning_rate": 8.965517241379312e-06,
|
|
"loss": 0.4436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14973995089530945,
|
|
"step": 105,
|
|
"valid_targets_mean": 10390.2,
|
|
"valid_targets_min": 4833
|
|
},
|
|
{
|
|
"epoch": 0.11870503597122302,
|
|
"grad_norm": 0.23281673272232828,
|
|
"learning_rate": 9.396551724137931e-06,
|
|
"loss": 0.4334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1490229368209839,
|
|
"step": 110,
|
|
"valid_targets_mean": 9771.6,
|
|
"valid_targets_min": 3641
|
|
},
|
|
{
|
|
"epoch": 0.12410071942446044,
|
|
"grad_norm": 0.33126875309843196,
|
|
"learning_rate": 9.827586206896553e-06,
|
|
"loss": 0.4307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13347497582435608,
|
|
"step": 115,
|
|
"valid_targets_mean": 9316.0,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 0.12949640287769784,
|
|
"grad_norm": 0.22485799981595347,
|
|
"learning_rate": 1.0258620689655174e-05,
|
|
"loss": 0.4304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14381901919841766,
|
|
"step": 120,
|
|
"valid_targets_mean": 9838.7,
|
|
"valid_targets_min": 4324
|
|
},
|
|
{
|
|
"epoch": 0.13489208633093525,
|
|
"grad_norm": 0.2535950289720206,
|
|
"learning_rate": 1.0689655172413792e-05,
|
|
"loss": 0.4356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14818724989891052,
|
|
"step": 125,
|
|
"valid_targets_mean": 9693.1,
|
|
"valid_targets_min": 3811
|
|
},
|
|
{
|
|
"epoch": 0.14028776978417265,
|
|
"grad_norm": 0.2017237671900251,
|
|
"learning_rate": 1.1120689655172414e-05,
|
|
"loss": 0.4199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12693054974079132,
|
|
"step": 130,
|
|
"valid_targets_mean": 9686.9,
|
|
"valid_targets_min": 3338
|
|
},
|
|
{
|
|
"epoch": 0.14568345323741008,
|
|
"grad_norm": 0.21819461779066096,
|
|
"learning_rate": 1.1551724137931035e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12800461053848267,
|
|
"step": 135,
|
|
"valid_targets_mean": 9590.6,
|
|
"valid_targets_min": 4785
|
|
},
|
|
{
|
|
"epoch": 0.1510791366906475,
|
|
"grad_norm": 0.21365790004820776,
|
|
"learning_rate": 1.1982758620689657e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14556610584259033,
|
|
"step": 140,
|
|
"valid_targets_mean": 10718.9,
|
|
"valid_targets_min": 4892
|
|
},
|
|
{
|
|
"epoch": 0.1564748201438849,
|
|
"grad_norm": 0.23391386296257358,
|
|
"learning_rate": 1.2413793103448277e-05,
|
|
"loss": 0.4147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1569352149963379,
|
|
"step": 145,
|
|
"valid_targets_mean": 10753.2,
|
|
"valid_targets_min": 3836
|
|
},
|
|
{
|
|
"epoch": 0.1618705035971223,
|
|
"grad_norm": 0.22344874993803135,
|
|
"learning_rate": 1.2844827586206898e-05,
|
|
"loss": 0.4078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12048947811126709,
|
|
"step": 150,
|
|
"valid_targets_mean": 8877.9,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 0.1672661870503597,
|
|
"grad_norm": 0.23194262576110505,
|
|
"learning_rate": 1.327586206896552e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14570260047912598,
|
|
"step": 155,
|
|
"valid_targets_mean": 11518.0,
|
|
"valid_targets_min": 4504
|
|
},
|
|
{
|
|
"epoch": 0.17266187050359713,
|
|
"grad_norm": 0.22759334217157462,
|
|
"learning_rate": 1.3706896551724138e-05,
|
|
"loss": 0.4056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14676150679588318,
|
|
"step": 160,
|
|
"valid_targets_mean": 10316.1,
|
|
"valid_targets_min": 4842
|
|
},
|
|
{
|
|
"epoch": 0.17805755395683454,
|
|
"grad_norm": 0.26749252109284444,
|
|
"learning_rate": 1.4137931034482759e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13555777072906494,
|
|
"step": 165,
|
|
"valid_targets_mean": 10630.5,
|
|
"valid_targets_min": 5085
|
|
},
|
|
{
|
|
"epoch": 0.18345323741007194,
|
|
"grad_norm": 0.22958292736205713,
|
|
"learning_rate": 1.456896551724138e-05,
|
|
"loss": 0.3999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14400115609169006,
|
|
"step": 170,
|
|
"valid_targets_mean": 10322.6,
|
|
"valid_targets_min": 4928
|
|
},
|
|
{
|
|
"epoch": 0.18884892086330934,
|
|
"grad_norm": 0.25130114600482717,
|
|
"learning_rate": 1.5000000000000002e-05,
|
|
"loss": 0.3954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1385354995727539,
|
|
"step": 175,
|
|
"valid_targets_mean": 10297.2,
|
|
"valid_targets_min": 3888
|
|
},
|
|
{
|
|
"epoch": 0.19424460431654678,
|
|
"grad_norm": 0.2499541563134387,
|
|
"learning_rate": 1.5431034482758624e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12814520299434662,
|
|
"step": 180,
|
|
"valid_targets_mean": 8726.7,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 0.19964028776978418,
|
|
"grad_norm": 0.2567391954737445,
|
|
"learning_rate": 1.586206896551724e-05,
|
|
"loss": 0.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11609770357608795,
|
|
"step": 185,
|
|
"valid_targets_mean": 8473.2,
|
|
"valid_targets_min": 3703
|
|
},
|
|
{
|
|
"epoch": 0.20503597122302158,
|
|
"grad_norm": 0.23077999551847594,
|
|
"learning_rate": 1.6293103448275863e-05,
|
|
"loss": 0.3907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313605159521103,
|
|
"step": 190,
|
|
"valid_targets_mean": 10562.8,
|
|
"valid_targets_min": 4059
|
|
},
|
|
{
|
|
"epoch": 0.210431654676259,
|
|
"grad_norm": 0.3663398714547867,
|
|
"learning_rate": 1.6724137931034485e-05,
|
|
"loss": 0.4119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13502202928066254,
|
|
"step": 195,
|
|
"valid_targets_mean": 5732.4,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 0.2158273381294964,
|
|
"grad_norm": 0.3343196788709341,
|
|
"learning_rate": 1.7155172413793103e-05,
|
|
"loss": 0.4277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1395188868045807,
|
|
"step": 200,
|
|
"valid_targets_mean": 5909.7,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 0.22122302158273383,
|
|
"grad_norm": 4.019402470065197,
|
|
"learning_rate": 1.7586206896551724e-05,
|
|
"loss": 0.6739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3533898591995239,
|
|
"step": 205,
|
|
"valid_targets_mean": 3622.3,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 0.22661870503597123,
|
|
"grad_norm": 1.4615635340558357,
|
|
"learning_rate": 1.8017241379310346e-05,
|
|
"loss": 0.9117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31081002950668335,
|
|
"step": 210,
|
|
"valid_targets_mean": 4234.8,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 0.23201438848920863,
|
|
"grad_norm": 0.5243887172611595,
|
|
"learning_rate": 1.8448275862068967e-05,
|
|
"loss": 0.7959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18653781712055206,
|
|
"step": 215,
|
|
"valid_targets_mean": 3496.8,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 0.23741007194244604,
|
|
"grad_norm": 0.3846512936861592,
|
|
"learning_rate": 1.887931034482759e-05,
|
|
"loss": 0.8472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2773069143295288,
|
|
"step": 220,
|
|
"valid_targets_mean": 4790.7,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 0.24280575539568344,
|
|
"grad_norm": 0.3814272863956771,
|
|
"learning_rate": 1.931034482758621e-05,
|
|
"loss": 0.7693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947912812232971,
|
|
"step": 225,
|
|
"valid_targets_mean": 4529.3,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 0.24820143884892087,
|
|
"grad_norm": 0.36159551553626756,
|
|
"learning_rate": 1.9741379310344828e-05,
|
|
"loss": 0.7262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22180311381816864,
|
|
"step": 230,
|
|
"valid_targets_mean": 3307.1,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 0.25359712230215825,
|
|
"grad_norm": 0.33310481712581036,
|
|
"learning_rate": 2.017241379310345e-05,
|
|
"loss": 0.7331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23589260876178741,
|
|
"step": 235,
|
|
"valid_targets_mean": 4017.8,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 0.2589928057553957,
|
|
"grad_norm": 0.32708176975512904,
|
|
"learning_rate": 2.060344827586207e-05,
|
|
"loss": 0.7003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14246545732021332,
|
|
"step": 240,
|
|
"valid_targets_mean": 1564.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 0.2643884892086331,
|
|
"grad_norm": 0.29966045584802664,
|
|
"learning_rate": 2.1034482758620692e-05,
|
|
"loss": 0.718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2444619983434677,
|
|
"step": 245,
|
|
"valid_targets_mean": 4694.7,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 0.2697841726618705,
|
|
"grad_norm": 0.3119057835075692,
|
|
"learning_rate": 2.146551724137931e-05,
|
|
"loss": 0.6919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2760992646217346,
|
|
"step": 250,
|
|
"valid_targets_mean": 4543.5,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 0.2751798561151079,
|
|
"grad_norm": 0.35694118206524017,
|
|
"learning_rate": 2.1896551724137932e-05,
|
|
"loss": 0.6743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24106468260288239,
|
|
"step": 255,
|
|
"valid_targets_mean": 3509.4,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 0.2805755395683453,
|
|
"grad_norm": 0.29514234672889933,
|
|
"learning_rate": 2.2327586206896554e-05,
|
|
"loss": 0.684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16689273715019226,
|
|
"step": 260,
|
|
"valid_targets_mean": 3599.4,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 0.28597122302158273,
|
|
"grad_norm": 0.3355385446874665,
|
|
"learning_rate": 2.2758620689655175e-05,
|
|
"loss": 0.6459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1117447018623352,
|
|
"step": 265,
|
|
"valid_targets_mean": 980.5,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 0.29136690647482016,
|
|
"grad_norm": 0.3102092819729468,
|
|
"learning_rate": 2.3189655172413796e-05,
|
|
"loss": 0.6821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24593019485473633,
|
|
"step": 270,
|
|
"valid_targets_mean": 4849.9,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 0.29676258992805754,
|
|
"grad_norm": 0.3015456543388383,
|
|
"learning_rate": 2.3620689655172418e-05,
|
|
"loss": 0.6589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22312313318252563,
|
|
"step": 275,
|
|
"valid_targets_mean": 4380.0,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 0.302158273381295,
|
|
"grad_norm": 0.33270616011155174,
|
|
"learning_rate": 2.405172413793104e-05,
|
|
"loss": 0.6944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2572234272956848,
|
|
"step": 280,
|
|
"valid_targets_mean": 3904.7,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 0.30755395683453235,
|
|
"grad_norm": 0.29737468240759607,
|
|
"learning_rate": 2.4482758620689654e-05,
|
|
"loss": 0.6678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2014371007680893,
|
|
"step": 285,
|
|
"valid_targets_mean": 3962.2,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 0.3129496402877698,
|
|
"grad_norm": 0.35973631840191267,
|
|
"learning_rate": 2.4913793103448276e-05,
|
|
"loss": 0.6424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13129623234272003,
|
|
"step": 290,
|
|
"valid_targets_mean": 1099.3,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 0.3183453237410072,
|
|
"grad_norm": 0.3176800607170934,
|
|
"learning_rate": 2.5344827586206897e-05,
|
|
"loss": 0.6662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2500911056995392,
|
|
"step": 295,
|
|
"valid_targets_mean": 5050.1,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 0.3237410071942446,
|
|
"grad_norm": 0.2785271720425388,
|
|
"learning_rate": 2.577586206896552e-05,
|
|
"loss": 0.6532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23385725915431976,
|
|
"step": 300,
|
|
"valid_targets_mean": 5151.1,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 0.329136690647482,
|
|
"grad_norm": 0.3276555310980307,
|
|
"learning_rate": 2.620689655172414e-05,
|
|
"loss": 0.4567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09173206239938736,
|
|
"step": 305,
|
|
"valid_targets_mean": 5515.5,
|
|
"valid_targets_min": 1936
|
|
},
|
|
{
|
|
"epoch": 0.3345323741007194,
|
|
"grad_norm": 0.3094421703609023,
|
|
"learning_rate": 2.663793103448276e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07555842399597168,
|
|
"step": 310,
|
|
"valid_targets_mean": 5127.6,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 0.33992805755395683,
|
|
"grad_norm": 0.28314164294489497,
|
|
"learning_rate": 2.7068965517241383e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07078821957111359,
|
|
"step": 315,
|
|
"valid_targets_mean": 5021.8,
|
|
"valid_targets_min": 2808
|
|
},
|
|
{
|
|
"epoch": 0.34532374100719426,
|
|
"grad_norm": 0.23563486885998303,
|
|
"learning_rate": 2.75e-05,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07426190376281738,
|
|
"step": 320,
|
|
"valid_targets_mean": 5327.4,
|
|
"valid_targets_min": 2771
|
|
},
|
|
{
|
|
"epoch": 0.35071942446043164,
|
|
"grad_norm": 0.2722129278613121,
|
|
"learning_rate": 2.7931034482758622e-05,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08157891035079956,
|
|
"step": 325,
|
|
"valid_targets_mean": 5568.0,
|
|
"valid_targets_min": 3219
|
|
},
|
|
{
|
|
"epoch": 0.35611510791366907,
|
|
"grad_norm": 0.21517555907188482,
|
|
"learning_rate": 2.8362068965517244e-05,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07121473550796509,
|
|
"step": 330,
|
|
"valid_targets_mean": 5279.6,
|
|
"valid_targets_min": 3339
|
|
},
|
|
{
|
|
"epoch": 0.36151079136690645,
|
|
"grad_norm": 0.2516120209313091,
|
|
"learning_rate": 2.8793103448275865e-05,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07057154923677444,
|
|
"step": 335,
|
|
"valid_targets_mean": 5462.8,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 0.3669064748201439,
|
|
"grad_norm": 0.23929165059468704,
|
|
"learning_rate": 2.9224137931034487e-05,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06793409585952759,
|
|
"step": 340,
|
|
"valid_targets_mean": 5447.9,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 0.3723021582733813,
|
|
"grad_norm": 0.26505288380425496,
|
|
"learning_rate": 2.965517241379311e-05,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07121676951646805,
|
|
"step": 345,
|
|
"valid_targets_mean": 5365.3,
|
|
"valid_targets_min": 2902
|
|
},
|
|
{
|
|
"epoch": 0.3776978417266187,
|
|
"grad_norm": 0.2450330079473962,
|
|
"learning_rate": 3.008620689655173e-05,
|
|
"loss": 0.201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06382409483194351,
|
|
"step": 350,
|
|
"valid_targets_mean": 4987.0,
|
|
"valid_targets_min": 3287
|
|
},
|
|
{
|
|
"epoch": 0.3830935251798561,
|
|
"grad_norm": 0.2247339341529373,
|
|
"learning_rate": 3.0517241379310348e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06054084375500679,
|
|
"step": 355,
|
|
"valid_targets_mean": 5047.8,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 0.38848920863309355,
|
|
"grad_norm": 0.21670287104800137,
|
|
"learning_rate": 3.094827586206897e-05,
|
|
"loss": 0.2013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07276194542646408,
|
|
"step": 360,
|
|
"valid_targets_mean": 5323.9,
|
|
"valid_targets_min": 2401
|
|
},
|
|
{
|
|
"epoch": 0.39388489208633093,
|
|
"grad_norm": 0.24856480257780397,
|
|
"learning_rate": 3.137931034482759e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06408809125423431,
|
|
"step": 365,
|
|
"valid_targets_mean": 5421.0,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 0.39928057553956836,
|
|
"grad_norm": 0.2177706797161666,
|
|
"learning_rate": 3.1810344827586206e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05981561541557312,
|
|
"step": 370,
|
|
"valid_targets_mean": 5157.3,
|
|
"valid_targets_min": 2787
|
|
},
|
|
{
|
|
"epoch": 0.40467625899280574,
|
|
"grad_norm": 0.27044154713435054,
|
|
"learning_rate": 3.224137931034483e-05,
|
|
"loss": 0.1965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06720144301652908,
|
|
"step": 375,
|
|
"valid_targets_mean": 5285.9,
|
|
"valid_targets_min": 2725
|
|
},
|
|
{
|
|
"epoch": 0.41007194244604317,
|
|
"grad_norm": 0.22887302691807412,
|
|
"learning_rate": 3.267241379310345e-05,
|
|
"loss": 0.197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06442099064588547,
|
|
"step": 380,
|
|
"valid_targets_mean": 4471.1,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 0.4154676258992806,
|
|
"grad_norm": 0.2427633902489572,
|
|
"learning_rate": 3.310344827586207e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06555483490228653,
|
|
"step": 385,
|
|
"valid_targets_mean": 5323.2,
|
|
"valid_targets_min": 3113
|
|
},
|
|
{
|
|
"epoch": 0.420863309352518,
|
|
"grad_norm": 0.2361929770258699,
|
|
"learning_rate": 3.353448275862069e-05,
|
|
"loss": 0.1969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06662483513355255,
|
|
"step": 390,
|
|
"valid_targets_mean": 5507.8,
|
|
"valid_targets_min": 3054
|
|
},
|
|
{
|
|
"epoch": 0.4262589928057554,
|
|
"grad_norm": 0.2581864124902917,
|
|
"learning_rate": 3.3965517241379316e-05,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06160246208310127,
|
|
"step": 395,
|
|
"valid_targets_mean": 5065.1,
|
|
"valid_targets_min": 2739
|
|
},
|
|
{
|
|
"epoch": 0.4316546762589928,
|
|
"grad_norm": 0.26174452084446487,
|
|
"learning_rate": 3.4396551724137934e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06583835184574127,
|
|
"step": 400,
|
|
"valid_targets_mean": 5267.3,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 0.4370503597122302,
|
|
"grad_norm": 0.23090174646339584,
|
|
"learning_rate": 3.482758620689655e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06701557338237762,
|
|
"step": 405,
|
|
"valid_targets_mean": 5350.7,
|
|
"valid_targets_min": 2809
|
|
},
|
|
{
|
|
"epoch": 0.44244604316546765,
|
|
"grad_norm": 0.4578162737997756,
|
|
"learning_rate": 3.525862068965518e-05,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12810498476028442,
|
|
"step": 410,
|
|
"valid_targets_mean": 2500.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 0.447841726618705,
|
|
"grad_norm": 0.3439955592816175,
|
|
"learning_rate": 3.5689655172413795e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13320687413215637,
|
|
"step": 415,
|
|
"valid_targets_mean": 2896.2,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 0.45323741007194246,
|
|
"grad_norm": 0.34711046121189076,
|
|
"learning_rate": 3.612068965517242e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15754249691963196,
|
|
"step": 420,
|
|
"valid_targets_mean": 3304.2,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 0.45863309352517984,
|
|
"grad_norm": 0.3471324464098901,
|
|
"learning_rate": 3.655172413793104e-05,
|
|
"loss": 0.3997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13491228222846985,
|
|
"step": 425,
|
|
"valid_targets_mean": 2892.6,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 0.46402877697841727,
|
|
"grad_norm": 0.34097643088398083,
|
|
"learning_rate": 3.6982758620689656e-05,
|
|
"loss": 0.3963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342666894197464,
|
|
"step": 430,
|
|
"valid_targets_mean": 2760.3,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 0.4694244604316547,
|
|
"grad_norm": 0.31997241152059186,
|
|
"learning_rate": 3.7413793103448274e-05,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11509554088115692,
|
|
"step": 435,
|
|
"valid_targets_mean": 2537.0,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.4748201438848921,
|
|
"grad_norm": 0.2961944187218414,
|
|
"learning_rate": 3.78448275862069e-05,
|
|
"loss": 0.3959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10283783078193665,
|
|
"step": 440,
|
|
"valid_targets_mean": 2513.7,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 0.4802158273381295,
|
|
"grad_norm": 0.3063285182212741,
|
|
"learning_rate": 3.827586206896552e-05,
|
|
"loss": 0.3906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1199796050786972,
|
|
"step": 445,
|
|
"valid_targets_mean": 3243.0,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 0.4856115107913669,
|
|
"grad_norm": 0.33712394460650424,
|
|
"learning_rate": 3.870689655172414e-05,
|
|
"loss": 0.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13619664311408997,
|
|
"step": 450,
|
|
"valid_targets_mean": 3058.7,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 0.4910071942446043,
|
|
"grad_norm": 0.3557399469315878,
|
|
"learning_rate": 3.913793103448276e-05,
|
|
"loss": 0.3868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13313749432563782,
|
|
"step": 455,
|
|
"valid_targets_mean": 2666.9,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 0.49640287769784175,
|
|
"grad_norm": 0.3074872698544308,
|
|
"learning_rate": 3.9568965517241385e-05,
|
|
"loss": 0.3878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12399311363697052,
|
|
"step": 460,
|
|
"valid_targets_mean": 2825.4,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.5017985611510791,
|
|
"grad_norm": 0.32025482161271973,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12885364890098572,
|
|
"step": 465,
|
|
"valid_targets_mean": 2946.9,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 0.5071942446043165,
|
|
"grad_norm": 0.3689192017181773,
|
|
"learning_rate": 3.999985817301812e-05,
|
|
"loss": 0.4035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12410745769739151,
|
|
"step": 470,
|
|
"valid_targets_mean": 2378.3,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 0.512589928057554,
|
|
"grad_norm": 0.3225532828628995,
|
|
"learning_rate": 3.9999432694083974e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12422003597021103,
|
|
"step": 475,
|
|
"valid_targets_mean": 3020.7,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.5179856115107914,
|
|
"grad_norm": 0.327197912630227,
|
|
"learning_rate": 3.999872356923199e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11747937649488449,
|
|
"step": 480,
|
|
"valid_targets_mean": 2624.6,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 0.5233812949640287,
|
|
"grad_norm": 0.3277544036137107,
|
|
"learning_rate": 3.999773080851948e-05,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13312754034996033,
|
|
"step": 485,
|
|
"valid_targets_mean": 2637.3,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 0.5287769784172662,
|
|
"grad_norm": 0.31074692154468214,
|
|
"learning_rate": 3.9996454426026466e-05,
|
|
"loss": 0.381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1295740008354187,
|
|
"step": 490,
|
|
"valid_targets_mean": 3129.5,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 0.5341726618705036,
|
|
"grad_norm": 0.2997683845813791,
|
|
"learning_rate": 3.99948944398555e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14925053715705872,
|
|
"step": 495,
|
|
"valid_targets_mean": 3229.0,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 0.539568345323741,
|
|
"grad_norm": 0.3223251663917693,
|
|
"learning_rate": 3.999305087213139e-05,
|
|
"loss": 0.3814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10910022258758545,
|
|
"step": 500,
|
|
"valid_targets_mean": 3065.2,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 0.5449640287769785,
|
|
"grad_norm": 0.3529670115609576,
|
|
"learning_rate": 3.999092374900091e-05,
|
|
"loss": 0.5024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18016985058784485,
|
|
"step": 505,
|
|
"valid_targets_mean": 3194.9,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 0.5503597122302158,
|
|
"grad_norm": 0.33481319326024866,
|
|
"learning_rate": 3.9988513100632394e-05,
|
|
"loss": 0.5529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22997578978538513,
|
|
"step": 510,
|
|
"valid_targets_mean": 4403.2,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 0.5557553956834532,
|
|
"grad_norm": 0.47833238459003274,
|
|
"learning_rate": 3.998581896121534e-05,
|
|
"loss": 0.5389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2788754105567932,
|
|
"step": 515,
|
|
"valid_targets_mean": 2703.6,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 0.5611510791366906,
|
|
"grad_norm": 0.27419312909248184,
|
|
"learning_rate": 3.998284136895993e-05,
|
|
"loss": 0.5449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15211211144924164,
|
|
"step": 520,
|
|
"valid_targets_mean": 3833.4,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 0.5665467625899281,
|
|
"grad_norm": 0.25858896667739123,
|
|
"learning_rate": 3.997958036609643e-05,
|
|
"loss": 0.5345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16302376985549927,
|
|
"step": 525,
|
|
"valid_targets_mean": 4071.8,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 0.5719424460431655,
|
|
"grad_norm": 0.3267427896890124,
|
|
"learning_rate": 3.997603599887469e-05,
|
|
"loss": 0.5603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.160415917634964,
|
|
"step": 530,
|
|
"valid_targets_mean": 3823.3,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 0.5773381294964028,
|
|
"grad_norm": 0.30238528714484314,
|
|
"learning_rate": 3.997220831756338e-05,
|
|
"loss": 0.5295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1796768754720688,
|
|
"step": 535,
|
|
"valid_targets_mean": 3716.2,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 0.5827338129496403,
|
|
"grad_norm": 0.2727304118979801,
|
|
"learning_rate": 3.9968097376449346e-05,
|
|
"loss": 0.4987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13594326376914978,
|
|
"step": 540,
|
|
"valid_targets_mean": 4653.4,
|
|
"valid_targets_min": 1378
|
|
},
|
|
{
|
|
"epoch": 0.5881294964028777,
|
|
"grad_norm": 0.3648444989183689,
|
|
"learning_rate": 3.996370323383684e-05,
|
|
"loss": 0.5619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2214108109474182,
|
|
"step": 545,
|
|
"valid_targets_mean": 4251.1,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 0.5935251798561151,
|
|
"grad_norm": 0.4492677817046559,
|
|
"learning_rate": 3.995902595204665e-05,
|
|
"loss": 0.5164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1641436070203781,
|
|
"step": 550,
|
|
"valid_targets_mean": 3837.9,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 0.5989208633093526,
|
|
"grad_norm": 0.429242399125553,
|
|
"learning_rate": 3.995406559741526e-05,
|
|
"loss": 0.5245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27558740973472595,
|
|
"step": 555,
|
|
"valid_targets_mean": 4099.4,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 0.60431654676259,
|
|
"grad_norm": 0.2787260117174852,
|
|
"learning_rate": 3.994882224029388e-05,
|
|
"loss": 0.515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11318367719650269,
|
|
"step": 560,
|
|
"valid_targets_mean": 2985.5,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 0.6097122302158273,
|
|
"grad_norm": 0.2714461248423055,
|
|
"learning_rate": 3.994329595504745e-05,
|
|
"loss": 0.5088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13463138043880463,
|
|
"step": 565,
|
|
"valid_targets_mean": 3816.5,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 0.6151079136690647,
|
|
"grad_norm": 0.3327420667504721,
|
|
"learning_rate": 3.993748682005363e-05,
|
|
"loss": 0.5347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1805243194103241,
|
|
"step": 570,
|
|
"valid_targets_mean": 3534.6,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 0.6205035971223022,
|
|
"grad_norm": 0.32305391491606295,
|
|
"learning_rate": 3.99313949177016e-05,
|
|
"loss": 0.5327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18147042393684387,
|
|
"step": 575,
|
|
"valid_targets_mean": 4482.8,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 0.6258992805755396,
|
|
"grad_norm": 0.29930099140604266,
|
|
"learning_rate": 3.992502033439099e-05,
|
|
"loss": 0.5019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.162867933511734,
|
|
"step": 580,
|
|
"valid_targets_mean": 5174.0,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 0.6312949640287769,
|
|
"grad_norm": 0.3389986448034872,
|
|
"learning_rate": 3.991836316053059e-05,
|
|
"loss": 0.5424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16579881310462952,
|
|
"step": 585,
|
|
"valid_targets_mean": 3754.9,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 0.6366906474820144,
|
|
"grad_norm": 0.28953569409871305,
|
|
"learning_rate": 3.991142349053709e-05,
|
|
"loss": 0.5159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1808883249759674,
|
|
"step": 590,
|
|
"valid_targets_mean": 4046.8,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 0.6420863309352518,
|
|
"grad_norm": 0.29525595518451264,
|
|
"learning_rate": 3.9904201422833724e-05,
|
|
"loss": 0.5058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2112310230731964,
|
|
"step": 595,
|
|
"valid_targets_mean": 4540.8,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 0.6474820143884892,
|
|
"grad_norm": 0.27830222128862653,
|
|
"learning_rate": 3.989669705984891e-05,
|
|
"loss": 0.5195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15632130205631256,
|
|
"step": 600,
|
|
"valid_targets_mean": 4031.6,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 0.6528776978417267,
|
|
"grad_norm": 0.2877440132433666,
|
|
"learning_rate": 3.988891050801474e-05,
|
|
"loss": 0.5197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1549455225467682,
|
|
"step": 605,
|
|
"valid_targets_mean": 4447.7,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 0.658273381294964,
|
|
"grad_norm": 0.2671133836570766,
|
|
"learning_rate": 3.988084187776557e-05,
|
|
"loss": 0.4628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11571325361728668,
|
|
"step": 610,
|
|
"valid_targets_mean": 6965.9,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 0.6636690647482014,
|
|
"grad_norm": 0.2554413331005041,
|
|
"learning_rate": 3.9872491283536314e-05,
|
|
"loss": 0.3585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11104419827461243,
|
|
"step": 615,
|
|
"valid_targets_mean": 6287.7,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 0.6690647482014388,
|
|
"grad_norm": 0.259894302349862,
|
|
"learning_rate": 3.986385884376094e-05,
|
|
"loss": 0.3622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1438366025686264,
|
|
"step": 620,
|
|
"valid_targets_mean": 6181.2,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 0.6744604316546763,
|
|
"grad_norm": 0.2889823680846773,
|
|
"learning_rate": 3.9854944680870735e-05,
|
|
"loss": 0.3466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10108718276023865,
|
|
"step": 625,
|
|
"valid_targets_mean": 5839.1,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 0.6798561151079137,
|
|
"grad_norm": 0.23017869482339526,
|
|
"learning_rate": 3.9845748921292585e-05,
|
|
"loss": 0.3456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09219245612621307,
|
|
"step": 630,
|
|
"valid_targets_mean": 5992.5,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 0.685251798561151,
|
|
"grad_norm": 0.2170413606118262,
|
|
"learning_rate": 3.983627169544717e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1289173662662506,
|
|
"step": 635,
|
|
"valid_targets_mean": 7167.2,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 0.6906474820143885,
|
|
"grad_norm": 0.2291998071770793,
|
|
"learning_rate": 3.982651313774711e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09046798944473267,
|
|
"step": 640,
|
|
"valid_targets_mean": 6423.7,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 0.6960431654676259,
|
|
"grad_norm": 0.25040896050029476,
|
|
"learning_rate": 3.9816473386595114e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09347311407327652,
|
|
"step": 645,
|
|
"valid_targets_mean": 6112.5,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 0.7014388489208633,
|
|
"grad_norm": 0.23410777875850672,
|
|
"learning_rate": 3.9806152584381915e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10945681482553482,
|
|
"step": 650,
|
|
"valid_targets_mean": 7196.7,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.7068345323741008,
|
|
"grad_norm": 0.2841553198766409,
|
|
"learning_rate": 3.979555087748435e-05,
|
|
"loss": 0.335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07978956401348114,
|
|
"step": 655,
|
|
"valid_targets_mean": 1509.7,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 0.7122302158273381,
|
|
"grad_norm": 0.2098020439594527,
|
|
"learning_rate": 3.978466841626323e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08036281168460846,
|
|
"step": 660,
|
|
"valid_targets_mean": 5713.6,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 0.7176258992805755,
|
|
"grad_norm": 0.2122221289426077,
|
|
"learning_rate": 3.9773505355061204e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11824588477611542,
|
|
"step": 665,
|
|
"valid_targets_mean": 7103.9,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 0.7230215827338129,
|
|
"grad_norm": 0.2168895172339188,
|
|
"learning_rate": 3.9762061852200604e-05,
|
|
"loss": 0.3434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08899690210819244,
|
|
"step": 670,
|
|
"valid_targets_mean": 5966.4,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 0.7284172661870504,
|
|
"grad_norm": 0.20097865500255213,
|
|
"learning_rate": 3.975033806998119e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11559335142374039,
|
|
"step": 675,
|
|
"valid_targets_mean": 7561.1,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 0.7338129496402878,
|
|
"grad_norm": 0.23076986375307884,
|
|
"learning_rate": 3.9738334174677816e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18072107434272766,
|
|
"step": 680,
|
|
"valid_targets_mean": 7743.8,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 0.7392086330935251,
|
|
"grad_norm": 0.19493637706745665,
|
|
"learning_rate": 3.972605033653811e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11012397706508636,
|
|
"step": 685,
|
|
"valid_targets_mean": 7503.8,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 0.7446043165467626,
|
|
"grad_norm": 0.22091829841696203,
|
|
"learning_rate": 3.971348672978004e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10159297287464142,
|
|
"step": 690,
|
|
"valid_targets_mean": 5711.1,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"grad_norm": 0.21431831058681372,
|
|
"learning_rate": 3.970064353258944e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09455393254756927,
|
|
"step": 695,
|
|
"valid_targets_mean": 5931.0,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 0.7553956834532374,
|
|
"grad_norm": 0.20872085901437715,
|
|
"learning_rate": 3.968752092711752e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1444239318370819,
|
|
"step": 700,
|
|
"valid_targets_mean": 7564.3,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 0.7607913669064749,
|
|
"grad_norm": 0.311970996425203,
|
|
"learning_rate": 3.967411909947822e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08187437057495117,
|
|
"step": 705,
|
|
"valid_targets_mean": 4701.6,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 0.7661870503597122,
|
|
"grad_norm": 0.29176702772233304,
|
|
"learning_rate": 3.966043823974561e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07707139849662781,
|
|
"step": 710,
|
|
"valid_targets_mean": 5168.5,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 0.7715827338129496,
|
|
"grad_norm": 0.24805942884052917,
|
|
"learning_rate": 3.96464785419512e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08308923244476318,
|
|
"step": 715,
|
|
"valid_targets_mean": 5513.5,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 0.7769784172661871,
|
|
"grad_norm": 0.2417639313723342,
|
|
"learning_rate": 3.9632240204081177e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07682941854000092,
|
|
"step": 720,
|
|
"valid_targets_mean": 5370.2,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 0.7823741007194245,
|
|
"grad_norm": 0.250232144456227,
|
|
"learning_rate": 3.961772342807359e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07301056385040283,
|
|
"step": 725,
|
|
"valid_targets_mean": 4751.9,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 0.7877697841726619,
|
|
"grad_norm": 0.23785648155282393,
|
|
"learning_rate": 3.960292841981548e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08084704726934433,
|
|
"step": 730,
|
|
"valid_targets_mean": 5578.0,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 0.7931654676258992,
|
|
"grad_norm": 0.22590945068663362,
|
|
"learning_rate": 3.958785538913999e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08218394219875336,
|
|
"step": 735,
|
|
"valid_targets_mean": 5186.3,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 0.7985611510791367,
|
|
"grad_norm": 0.23543103825027317,
|
|
"learning_rate": 3.957250454982337e-05,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0713026374578476,
|
|
"step": 740,
|
|
"valid_targets_mean": 4736.5,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 0.8039568345323741,
|
|
"grad_norm": 0.22464266111001063,
|
|
"learning_rate": 3.955687611958193e-05,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06828778237104416,
|
|
"step": 745,
|
|
"valid_targets_mean": 4341.2,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 0.8093525179856115,
|
|
"grad_norm": 0.2251031116977346,
|
|
"learning_rate": 3.9540970320068995e-05,
|
|
"loss": 0.219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08588295429944992,
|
|
"step": 750,
|
|
"valid_targets_mean": 5612.3,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 0.814748201438849,
|
|
"grad_norm": 0.23896940028803026,
|
|
"learning_rate": 3.952478737687171e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07905903458595276,
|
|
"step": 755,
|
|
"valid_targets_mean": 5527.5,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 0.8201438848920863,
|
|
"grad_norm": 0.2258274778048245,
|
|
"learning_rate": 3.950832751950786e-05,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06484881043434143,
|
|
"step": 760,
|
|
"valid_targets_mean": 4836.5,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 0.8255395683453237,
|
|
"grad_norm": 0.25003948890057504,
|
|
"learning_rate": 3.949159098142266e-05,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07148534059524536,
|
|
"step": 765,
|
|
"valid_targets_mean": 5341.6,
|
|
"valid_targets_min": 211
|
|
},
|
|
{
|
|
"epoch": 0.8309352517985612,
|
|
"grad_norm": 0.24414852646943802,
|
|
"learning_rate": 3.947457799998536e-05,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07453455775976181,
|
|
"step": 770,
|
|
"valid_targets_mean": 5475.3,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 0.8363309352517986,
|
|
"grad_norm": 0.21532494275479494,
|
|
"learning_rate": 3.945728881648595e-05,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07090412080287933,
|
|
"step": 775,
|
|
"valid_targets_mean": 4993.7,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 0.841726618705036,
|
|
"grad_norm": 0.24122670364797474,
|
|
"learning_rate": 3.94397236761317e-05,
|
|
"loss": 0.2064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07416989654302597,
|
|
"step": 780,
|
|
"valid_targets_mean": 5846.2,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 0.8471223021582733,
|
|
"grad_norm": 0.22462552846041287,
|
|
"learning_rate": 3.9421882828043695e-05,
|
|
"loss": 0.2144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07025903463363647,
|
|
"step": 785,
|
|
"valid_targets_mean": 5926.5,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 0.8525179856115108,
|
|
"grad_norm": 0.2529640931750284,
|
|
"learning_rate": 3.94037665252533e-05,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0704699382185936,
|
|
"step": 790,
|
|
"valid_targets_mean": 5794.1,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 0.8579136690647482,
|
|
"grad_norm": 0.2846415365928212,
|
|
"learning_rate": 3.9385375024698555e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0865827426314354,
|
|
"step": 795,
|
|
"valid_targets_mean": 3965.2,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 0.8633093525179856,
|
|
"grad_norm": 0.2082960156342036,
|
|
"learning_rate": 3.936670858722058e-05,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0755947008728981,
|
|
"step": 800,
|
|
"valid_targets_mean": 5186.0,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 0.8687050359712231,
|
|
"grad_norm": 0.19857389877391915,
|
|
"learning_rate": 3.9347767477559824e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07174943387508392,
|
|
"step": 805,
|
|
"valid_targets_mean": 5449.4,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.8741007194244604,
|
|
"grad_norm": 0.3486607930197446,
|
|
"learning_rate": 3.932855196435231e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09697943925857544,
|
|
"step": 810,
|
|
"valid_targets_mean": 3763.9,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 0.8794964028776978,
|
|
"grad_norm": 0.30199111599387324,
|
|
"learning_rate": 3.9309062320125886e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08938825130462646,
|
|
"step": 815,
|
|
"valid_targets_mean": 3637.1,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 0.8848920863309353,
|
|
"grad_norm": 0.27252687331522574,
|
|
"learning_rate": 3.928929882129627e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0939856618642807,
|
|
"step": 820,
|
|
"valid_targets_mean": 3596.2,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 0.8902877697841727,
|
|
"grad_norm": 0.2841768614781188,
|
|
"learning_rate": 3.926926174816322e-05,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08685845136642456,
|
|
"step": 825,
|
|
"valid_targets_mean": 3691.9,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 0.89568345323741,
|
|
"grad_norm": 0.2596126648828701,
|
|
"learning_rate": 3.924895138490649e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09059734642505646,
|
|
"step": 830,
|
|
"valid_targets_mean": 3853.4,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 0.9010791366906474,
|
|
"grad_norm": 0.23934102210969094,
|
|
"learning_rate": 3.922836801958183e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07588137686252594,
|
|
"step": 835,
|
|
"valid_targets_mean": 3486.2,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 0.9064748201438849,
|
|
"grad_norm": 0.26234204657994675,
|
|
"learning_rate": 3.9207511944116906e-05,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08523990213871002,
|
|
"step": 840,
|
|
"valid_targets_mean": 3726.7,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 0.9118705035971223,
|
|
"grad_norm": 0.27843629482327714,
|
|
"learning_rate": 3.918638345430713e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08137428760528564,
|
|
"step": 845,
|
|
"valid_targets_mean": 3357.6,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 0.9172661870503597,
|
|
"grad_norm": 0.2550121226327044,
|
|
"learning_rate": 3.9164982849811506e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.084633469581604,
|
|
"step": 850,
|
|
"valid_targets_mean": 3763.7,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 0.9226618705035972,
|
|
"grad_norm": 0.24196394253971001,
|
|
"learning_rate": 3.9143310434148346e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08093880116939545,
|
|
"step": 855,
|
|
"valid_targets_mean": 3556.0,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 0.9280575539568345,
|
|
"grad_norm": 0.24105838811856217,
|
|
"learning_rate": 3.912136651469097e-05,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08387899398803711,
|
|
"step": 860,
|
|
"valid_targets_mean": 4024.1,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 0.9334532374100719,
|
|
"grad_norm": 0.2667255236539097,
|
|
"learning_rate": 3.909915140266339e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08499912172555923,
|
|
"step": 865,
|
|
"valid_targets_mean": 3715.5,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 0.9388489208633094,
|
|
"grad_norm": 0.2582737102252545,
|
|
"learning_rate": 3.907666541313581e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07677219808101654,
|
|
"step": 870,
|
|
"valid_targets_mean": 3545.7,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 0.9442446043165468,
|
|
"grad_norm": 0.27093190308625975,
|
|
"learning_rate": 3.905390886502024e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08147233724594116,
|
|
"step": 875,
|
|
"valid_targets_mean": 3937.0,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 0.9496402877697842,
|
|
"grad_norm": 0.24370676795201554,
|
|
"learning_rate": 3.9030882081065935e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08083240687847137,
|
|
"step": 880,
|
|
"valid_targets_mean": 3809.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 0.9550359712230215,
|
|
"grad_norm": 0.4376377414972107,
|
|
"learning_rate": 3.900758538785482e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1016848236322403,
|
|
"step": 885,
|
|
"valid_targets_mean": 6235.2,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 0.960431654676259,
|
|
"grad_norm": 0.2883570982005158,
|
|
"learning_rate": 3.8984019115796875e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10974374413490295,
|
|
"step": 890,
|
|
"valid_targets_mean": 6916.2,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 0.9658273381294964,
|
|
"grad_norm": 0.23280187556180232,
|
|
"learning_rate": 3.896018359912541e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09505230188369751,
|
|
"step": 895,
|
|
"valid_targets_mean": 7471.8,
|
|
"valid_targets_min": 2163
|
|
},
|
|
{
|
|
"epoch": 0.9712230215827338,
|
|
"grad_norm": 0.26687569633155583,
|
|
"learning_rate": 3.893607917589237e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1029740646481514,
|
|
"step": 900,
|
|
"valid_targets_mean": 7115.8,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 0.9766187050359713,
|
|
"grad_norm": 0.24125719752296282,
|
|
"learning_rate": 3.891170618796352e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10338474810123444,
|
|
"step": 905,
|
|
"valid_targets_mean": 6409.3,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 0.9820143884892086,
|
|
"grad_norm": 0.2336165968436042,
|
|
"learning_rate": 3.888706498101358e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0861898735165596,
|
|
"step": 910,
|
|
"valid_targets_mean": 6192.6,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 0.987410071942446,
|
|
"grad_norm": 0.28210000107231953,
|
|
"learning_rate": 3.886215590452136e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10195198655128479,
|
|
"step": 915,
|
|
"valid_targets_mean": 6844.6,
|
|
"valid_targets_min": 2949
|
|
},
|
|
{
|
|
"epoch": 0.9928057553956835,
|
|
"grad_norm": 0.23361851112185045,
|
|
"learning_rate": 3.883697931176477e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08389988541603088,
|
|
"step": 920,
|
|
"valid_targets_mean": 6486.1,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 0.9982014388489209,
|
|
"grad_norm": 0.22768468793836413,
|
|
"learning_rate": 3.881153555981582e-05,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0892738550901413,
|
|
"step": 925,
|
|
"valid_targets_mean": 6323.1,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 1.0032374100719423,
|
|
"grad_norm": 1.2089800519237202,
|
|
"learning_rate": 3.8785825009535584e-05,
|
|
"loss": 0.5648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23496457934379578,
|
|
"step": 930,
|
|
"valid_targets_mean": 7854.0,
|
|
"valid_targets_min": 2676
|
|
},
|
|
{
|
|
"epoch": 1.00863309352518,
|
|
"grad_norm": 0.7037184401927531,
|
|
"learning_rate": 3.8759848025569024e-05,
|
|
"loss": 0.5734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16314341127872467,
|
|
"step": 935,
|
|
"valid_targets_mean": 6740.2,
|
|
"valid_targets_min": 3017
|
|
},
|
|
{
|
|
"epoch": 1.0140287769784173,
|
|
"grad_norm": 0.4792672837684372,
|
|
"learning_rate": 3.873360497633986e-05,
|
|
"loss": 0.5097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15072952210903168,
|
|
"step": 940,
|
|
"valid_targets_mean": 7294.2,
|
|
"valid_targets_min": 3200
|
|
},
|
|
{
|
|
"epoch": 1.0194244604316547,
|
|
"grad_norm": 0.33907933863154,
|
|
"learning_rate": 3.8707096234045344e-05,
|
|
"loss": 0.4758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16921278834342957,
|
|
"step": 945,
|
|
"valid_targets_mean": 7212.2,
|
|
"valid_targets_min": 2554
|
|
},
|
|
{
|
|
"epoch": 1.024820143884892,
|
|
"grad_norm": 0.3096731657611035,
|
|
"learning_rate": 3.868032217465097e-05,
|
|
"loss": 0.452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15272179245948792,
|
|
"step": 950,
|
|
"valid_targets_mean": 7016.0,
|
|
"valid_targets_min": 2429
|
|
},
|
|
{
|
|
"epoch": 1.0302158273381294,
|
|
"grad_norm": 0.2664598250424121,
|
|
"learning_rate": 3.865328317788513e-05,
|
|
"loss": 0.4435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13758009672164917,
|
|
"step": 955,
|
|
"valid_targets_mean": 7897.7,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 1.0356115107913668,
|
|
"grad_norm": 0.27621408450756535,
|
|
"learning_rate": 3.862597962723377e-05,
|
|
"loss": 0.4362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15285232663154602,
|
|
"step": 960,
|
|
"valid_targets_mean": 7362.4,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 1.0410071942446044,
|
|
"grad_norm": 0.27171521373007623,
|
|
"learning_rate": 3.859841190993489e-05,
|
|
"loss": 0.421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13829010725021362,
|
|
"step": 965,
|
|
"valid_targets_mean": 6896.6,
|
|
"valid_targets_min": 2522
|
|
},
|
|
{
|
|
"epoch": 1.0464028776978418,
|
|
"grad_norm": 0.22702656706384494,
|
|
"learning_rate": 3.857058041697313e-05,
|
|
"loss": 0.4184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13083615899085999,
|
|
"step": 970,
|
|
"valid_targets_mean": 7470.7,
|
|
"valid_targets_min": 2790
|
|
},
|
|
{
|
|
"epoch": 1.0517985611510792,
|
|
"grad_norm": 0.26383121989406144,
|
|
"learning_rate": 3.854248554307413e-05,
|
|
"loss": 0.4155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14103451371192932,
|
|
"step": 975,
|
|
"valid_targets_mean": 8266.8,
|
|
"valid_targets_min": 3307
|
|
},
|
|
{
|
|
"epoch": 1.0571942446043165,
|
|
"grad_norm": 0.23371832103449125,
|
|
"learning_rate": 3.851412768669902e-05,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11628811806440353,
|
|
"step": 980,
|
|
"valid_targets_mean": 8286.0,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 1.062589928057554,
|
|
"grad_norm": 0.2205629835065627,
|
|
"learning_rate": 3.848550725003872e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14082926511764526,
|
|
"step": 985,
|
|
"valid_targets_mean": 9267.8,
|
|
"valid_targets_min": 5215
|
|
},
|
|
{
|
|
"epoch": 1.0679856115107913,
|
|
"grad_norm": 0.21301484178676328,
|
|
"learning_rate": 3.8456624639008235e-05,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1294064223766327,
|
|
"step": 990,
|
|
"valid_targets_mean": 9206.4,
|
|
"valid_targets_min": 3552
|
|
},
|
|
{
|
|
"epoch": 1.0733812949640287,
|
|
"grad_norm": 0.21421913941944684,
|
|
"learning_rate": 3.842748026324093e-05,
|
|
"loss": 0.3756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13461777567863464,
|
|
"step": 995,
|
|
"valid_targets_mean": 9805.0,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 1.0787769784172663,
|
|
"grad_norm": 0.24256706073766107,
|
|
"learning_rate": 3.839807453608268e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338627189397812,
|
|
"step": 1000,
|
|
"valid_targets_mean": 9971.8,
|
|
"valid_targets_min": 3896
|
|
},
|
|
{
|
|
"epoch": 1.0841726618705037,
|
|
"grad_norm": 0.24045352794456387,
|
|
"learning_rate": 3.836840787458605e-05,
|
|
"loss": 0.3722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12530121207237244,
|
|
"step": 1005,
|
|
"valid_targets_mean": 9100.1,
|
|
"valid_targets_min": 3598
|
|
},
|
|
{
|
|
"epoch": 1.089568345323741,
|
|
"grad_norm": 0.23623762383531074,
|
|
"learning_rate": 3.833848069950434e-05,
|
|
"loss": 0.3701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12466295808553696,
|
|
"step": 1010,
|
|
"valid_targets_mean": 9095.8,
|
|
"valid_targets_min": 3870
|
|
},
|
|
{
|
|
"epoch": 1.0949640287769784,
|
|
"grad_norm": 0.22837971839655904,
|
|
"learning_rate": 3.830829343528564e-05,
|
|
"loss": 0.373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11991312354803085,
|
|
"step": 1015,
|
|
"valid_targets_mean": 8719.1,
|
|
"valid_targets_min": 3803
|
|
},
|
|
{
|
|
"epoch": 1.1003597122302158,
|
|
"grad_norm": 0.21743430785651363,
|
|
"learning_rate": 3.827784651006681e-05,
|
|
"loss": 0.3636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12810218334197998,
|
|
"step": 1020,
|
|
"valid_targets_mean": 9952.8,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 1.1057553956834532,
|
|
"grad_norm": 0.21062644481241244,
|
|
"learning_rate": 3.82471403556674e-05,
|
|
"loss": 0.3675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1231626570224762,
|
|
"step": 1025,
|
|
"valid_targets_mean": 9790.2,
|
|
"valid_targets_min": 1990
|
|
},
|
|
{
|
|
"epoch": 1.1111510791366905,
|
|
"grad_norm": 0.22440701611868305,
|
|
"learning_rate": 3.8216175407583526e-05,
|
|
"loss": 0.3658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12076719105243683,
|
|
"step": 1030,
|
|
"valid_targets_mean": 9145.0,
|
|
"valid_targets_min": 3036
|
|
},
|
|
{
|
|
"epoch": 1.1165467625899281,
|
|
"grad_norm": 0.22106031585254207,
|
|
"learning_rate": 3.8184952104981714e-05,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11013393104076385,
|
|
"step": 1035,
|
|
"valid_targets_mean": 9393.3,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 1.1219424460431655,
|
|
"grad_norm": 0.21629144263044187,
|
|
"learning_rate": 3.815347089069264e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10919845849275589,
|
|
"step": 1040,
|
|
"valid_targets_mean": 8753.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.1273381294964029,
|
|
"grad_norm": 0.2367898982695017,
|
|
"learning_rate": 3.812173221120485e-05,
|
|
"loss": 0.3601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12045092135667801,
|
|
"step": 1045,
|
|
"valid_targets_mean": 9578.5,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 1.1327338129496403,
|
|
"grad_norm": 0.26404599888978547,
|
|
"learning_rate": 3.808973651665847e-05,
|
|
"loss": 0.3663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12104611843824387,
|
|
"step": 1050,
|
|
"valid_targets_mean": 9520.8,
|
|
"valid_targets_min": 4270
|
|
},
|
|
{
|
|
"epoch": 1.1381294964028776,
|
|
"grad_norm": 0.27974463655141213,
|
|
"learning_rate": 3.8057484260838764e-05,
|
|
"loss": 0.3649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271187961101532,
|
|
"step": 1055,
|
|
"valid_targets_mean": 10044.1,
|
|
"valid_targets_min": 5057
|
|
},
|
|
{
|
|
"epoch": 1.143525179856115,
|
|
"grad_norm": 0.23445518780296368,
|
|
"learning_rate": 3.802497590116976e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11394988745450974,
|
|
"step": 1060,
|
|
"valid_targets_mean": 9430.1,
|
|
"valid_targets_min": 4478
|
|
},
|
|
{
|
|
"epoch": 1.1489208633093526,
|
|
"grad_norm": 0.22377981116628626,
|
|
"learning_rate": 3.7992211898707705e-05,
|
|
"loss": 0.3529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11737430095672607,
|
|
"step": 1065,
|
|
"valid_targets_mean": 9906.8,
|
|
"valid_targets_min": 4602
|
|
},
|
|
{
|
|
"epoch": 1.15431654676259,
|
|
"grad_norm": 0.30416076838627254,
|
|
"learning_rate": 3.7959192718134555e-05,
|
|
"loss": 0.3531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12114359438419342,
|
|
"step": 1070,
|
|
"valid_targets_mean": 9816.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.1597122302158274,
|
|
"grad_norm": 0.27407423235882683,
|
|
"learning_rate": 3.792591882775138e-05,
|
|
"loss": 0.3589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11804314702749252,
|
|
"step": 1075,
|
|
"valid_targets_mean": 9615.7,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 1.1651079136690647,
|
|
"grad_norm": 0.22766574557874883,
|
|
"learning_rate": 3.7892390699471726e-05,
|
|
"loss": 0.3475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11104542016983032,
|
|
"step": 1080,
|
|
"valid_targets_mean": 9669.4,
|
|
"valid_targets_min": 4373
|
|
},
|
|
{
|
|
"epoch": 1.1705035971223021,
|
|
"grad_norm": 0.1994295373093577,
|
|
"learning_rate": 3.785860880881492e-05,
|
|
"loss": 0.3566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12419213354587555,
|
|
"step": 1085,
|
|
"valid_targets_mean": 10030.3,
|
|
"valid_targets_min": 4760
|
|
},
|
|
{
|
|
"epoch": 1.1758992805755395,
|
|
"grad_norm": 0.21571417735141596,
|
|
"learning_rate": 3.782457363489933e-05,
|
|
"loss": 0.348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11777124553918839,
|
|
"step": 1090,
|
|
"valid_targets_mean": 9881.2,
|
|
"valid_targets_min": 4785
|
|
},
|
|
{
|
|
"epoch": 1.181294964028777,
|
|
"grad_norm": 0.24316740284233035,
|
|
"learning_rate": 3.779028566043554e-05,
|
|
"loss": 0.3523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11457142233848572,
|
|
"step": 1095,
|
|
"valid_targets_mean": 9669.7,
|
|
"valid_targets_min": 3932
|
|
},
|
|
{
|
|
"epoch": 1.1866906474820145,
|
|
"grad_norm": 0.22600466865786545,
|
|
"learning_rate": 3.7755745371719546e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1161317452788353,
|
|
"step": 1100,
|
|
"valid_targets_mean": 9425.6,
|
|
"valid_targets_min": 3063
|
|
},
|
|
{
|
|
"epoch": 1.1920863309352518,
|
|
"grad_norm": 0.21664851143649888,
|
|
"learning_rate": 3.772095325862584e-05,
|
|
"loss": 0.3477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12644286453723907,
|
|
"step": 1105,
|
|
"valid_targets_mean": 9942.2,
|
|
"valid_targets_min": 3468
|
|
},
|
|
{
|
|
"epoch": 1.1974820143884892,
|
|
"grad_norm": 0.19737454991743844,
|
|
"learning_rate": 3.768590981460047e-05,
|
|
"loss": 0.343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1118108332157135,
|
|
"step": 1110,
|
|
"valid_targets_mean": 9693.8,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 1.2028776978417266,
|
|
"grad_norm": 0.2156165981554578,
|
|
"learning_rate": 3.765061553665402e-05,
|
|
"loss": 0.3447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1220989003777504,
|
|
"step": 1115,
|
|
"valid_targets_mean": 10011.0,
|
|
"valid_targets_min": 3774
|
|
},
|
|
{
|
|
"epoch": 1.208273381294964,
|
|
"grad_norm": 0.20881772368406623,
|
|
"learning_rate": 3.7615070925354584e-05,
|
|
"loss": 0.35,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08132517337799072,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5209.4,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 1.2136690647482014,
|
|
"grad_norm": 0.281975310042935,
|
|
"learning_rate": 3.757927648482065e-05,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12493978440761566,
|
|
"step": 1125,
|
|
"valid_targets_mean": 6386.3,
|
|
"valid_targets_min": 3796
|
|
},
|
|
{
|
|
"epoch": 1.219064748201439,
|
|
"grad_norm": 0.29714858589880105,
|
|
"learning_rate": 3.754323272271396e-05,
|
|
"loss": 0.3775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14196348190307617,
|
|
"step": 1130,
|
|
"valid_targets_mean": 6407.3,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 1.2244604316546763,
|
|
"grad_norm": 0.6576214767217704,
|
|
"learning_rate": 3.750694015023233e-05,
|
|
"loss": 0.7828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22823312878608704,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4497.1,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 1.2298561151079137,
|
|
"grad_norm": 0.4222888164220241,
|
|
"learning_rate": 3.7470399282102355e-05,
|
|
"loss": 0.6654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1854623258113861,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4428.4,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 1.235251798561151,
|
|
"grad_norm": 0.39800787914876284,
|
|
"learning_rate": 3.7433610636572135e-05,
|
|
"loss": 0.6789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24138781428337097,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3730.1,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 1.2406474820143885,
|
|
"grad_norm": 0.30001807058119,
|
|
"learning_rate": 3.7396574735403935e-05,
|
|
"loss": 0.6318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23919786512851715,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4497.8,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 1.2460431654676258,
|
|
"grad_norm": 0.3175896546911328,
|
|
"learning_rate": 3.735929210386675e-05,
|
|
"loss": 0.6135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17985211312770844,
|
|
"step": 1155,
|
|
"valid_targets_mean": 2116.2,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 1.2514388489208632,
|
|
"grad_norm": 0.33018971793599083,
|
|
"learning_rate": 3.73217632707289e-05,
|
|
"loss": 0.6088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18721823394298553,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4756.1,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 1.2568345323741008,
|
|
"grad_norm": 0.25137862242079223,
|
|
"learning_rate": 3.72839887682505e-05,
|
|
"loss": 0.5997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1901225596666336,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4760.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 1.2622302158273382,
|
|
"grad_norm": 0.27209036617011956,
|
|
"learning_rate": 3.724596913217591e-05,
|
|
"loss": 0.6222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21063515543937683,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3936.6,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 1.2676258992805756,
|
|
"grad_norm": 0.26431478412247195,
|
|
"learning_rate": 3.720770490172617e-05,
|
|
"loss": 0.5984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21316054463386536,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4049.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 1.273021582733813,
|
|
"grad_norm": 0.314560087163647,
|
|
"learning_rate": 3.7169196619591284e-05,
|
|
"loss": 0.5815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2237289845943451,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3266.9,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 1.2784172661870503,
|
|
"grad_norm": 0.24795014056801568,
|
|
"learning_rate": 3.713044483192262e-05,
|
|
"loss": 0.5932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19811943173408508,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4822.6,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 1.2838129496402877,
|
|
"grad_norm": 0.26877135143462183,
|
|
"learning_rate": 3.709145008832508e-05,
|
|
"loss": 0.5912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17508143186569214,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3932.0,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 1.289208633093525,
|
|
"grad_norm": 0.2937237304285928,
|
|
"learning_rate": 3.7052212941849345e-05,
|
|
"loss": 0.577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19970647990703583,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3117.2,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 1.2946043165467627,
|
|
"grad_norm": 0.2729365716162603,
|
|
"learning_rate": 3.701273394898401e-05,
|
|
"loss": 0.6018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16959702968597412,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3402.1,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"grad_norm": 0.3707017043736891,
|
|
"learning_rate": 3.697301366964773e-05,
|
|
"loss": 0.6002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2770856022834778,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3427.9,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 1.3053956834532374,
|
|
"grad_norm": 0.27051631330542053,
|
|
"learning_rate": 3.693305266718123e-05,
|
|
"loss": 0.5882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18100076913833618,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4640.0,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 1.3107913669064748,
|
|
"grad_norm": 0.2656336992006882,
|
|
"learning_rate": 3.689285150833935e-05,
|
|
"loss": 0.5956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1869252324104309,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3996.0,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 1.3161870503597122,
|
|
"grad_norm": 0.2949623248958983,
|
|
"learning_rate": 3.685241076328299e-05,
|
|
"loss": 0.5836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18050090968608856,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3636.5,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 1.3215827338129498,
|
|
"grad_norm": 0.2909977281200949,
|
|
"learning_rate": 3.6811731005571034e-05,
|
|
"loss": 0.6055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20079699158668518,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3391.8,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 1.326978417266187,
|
|
"grad_norm": 0.4337282156853864,
|
|
"learning_rate": 3.6770812812152215e-05,
|
|
"loss": 0.5287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13361048698425293,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5537.1,
|
|
"valid_targets_min": 2564
|
|
},
|
|
{
|
|
"epoch": 1.3323741007194245,
|
|
"grad_norm": 0.2723093499959398,
|
|
"learning_rate": 3.672965676335691e-05,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06738299131393433,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5478.3,
|
|
"valid_targets_min": 2226
|
|
},
|
|
{
|
|
"epoch": 1.337769784172662,
|
|
"grad_norm": 0.21458192066429968,
|
|
"learning_rate": 3.668826344288894e-05,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05311910808086395,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4935.5,
|
|
"valid_targets_min": 2518
|
|
},
|
|
{
|
|
"epoch": 1.3431654676258993,
|
|
"grad_norm": 0.19896775554984736,
|
|
"learning_rate": 3.6646633437817276e-05,
|
|
"loss": 0.1818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06248020753264427,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5529.9,
|
|
"valid_targets_min": 3140
|
|
},
|
|
{
|
|
"epoch": 1.3485611510791367,
|
|
"grad_norm": 0.19261744943786063,
|
|
"learning_rate": 3.6604767338567724e-05,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05509330332279205,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4935.2,
|
|
"valid_targets_min": 2984
|
|
},
|
|
{
|
|
"epoch": 1.353956834532374,
|
|
"grad_norm": 0.21651773832693447,
|
|
"learning_rate": 3.656266573891452e-05,
|
|
"loss": 0.1776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05393898859620094,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4934.8,
|
|
"valid_targets_min": 2648
|
|
},
|
|
{
|
|
"epoch": 1.3593525179856116,
|
|
"grad_norm": 0.20477971636757444,
|
|
"learning_rate": 3.652032923597196e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05723518878221512,
|
|
"step": 1260,
|
|
"valid_targets_mean": 5136.8,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 1.3647482014388488,
|
|
"grad_norm": 0.20544748166104182,
|
|
"learning_rate": 3.647775843018587e-05,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05382589250802994,
|
|
"step": 1265,
|
|
"valid_targets_mean": 5029.4,
|
|
"valid_targets_min": 2963
|
|
},
|
|
{
|
|
"epoch": 1.3701438848920864,
|
|
"grad_norm": 0.19370327874118423,
|
|
"learning_rate": 3.643495392532515e-05,
|
|
"loss": 0.1751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05765447020530701,
|
|
"step": 1270,
|
|
"valid_targets_mean": 5930.7,
|
|
"valid_targets_min": 2714
|
|
},
|
|
{
|
|
"epoch": 1.3755395683453238,
|
|
"grad_norm": 0.19135004175600454,
|
|
"learning_rate": 3.639191632847318e-05,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053187206387519836,
|
|
"step": 1275,
|
|
"valid_targets_mean": 5439.3,
|
|
"valid_targets_min": 2668
|
|
},
|
|
{
|
|
"epoch": 1.3809352517985611,
|
|
"grad_norm": 0.1873735163501062,
|
|
"learning_rate": 3.6348646250019195e-05,
|
|
"loss": 0.1683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053390875458717346,
|
|
"step": 1280,
|
|
"valid_targets_mean": 5153.3,
|
|
"valid_targets_min": 2730
|
|
},
|
|
{
|
|
"epoch": 1.3863309352517985,
|
|
"grad_norm": 0.1994166234255336,
|
|
"learning_rate": 3.630514430364967e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06378839910030365,
|
|
"step": 1285,
|
|
"valid_targets_mean": 5870.0,
|
|
"valid_targets_min": 3065
|
|
},
|
|
{
|
|
"epoch": 1.391726618705036,
|
|
"grad_norm": 0.19309021505978724,
|
|
"learning_rate": 3.626141110633957e-05,
|
|
"loss": 0.1673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05984627455472946,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5658.6,
|
|
"valid_targets_min": 2803
|
|
},
|
|
{
|
|
"epoch": 1.3971223021582735,
|
|
"grad_norm": 0.22660169596596427,
|
|
"learning_rate": 3.621744727834364e-05,
|
|
"loss": 0.1685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053223881870508194,
|
|
"step": 1295,
|
|
"valid_targets_mean": 5224.7,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 1.4025179856115109,
|
|
"grad_norm": 0.2047155408200805,
|
|
"learning_rate": 3.617325344318758e-05,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05734802782535553,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5735.1,
|
|
"valid_targets_min": 3533
|
|
},
|
|
{
|
|
"epoch": 1.4079136690647482,
|
|
"grad_norm": 0.20725775856698078,
|
|
"learning_rate": 3.6128830227659214e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05309347063302994,
|
|
"step": 1305,
|
|
"valid_targets_mean": 5265.8,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 1.4133093525179856,
|
|
"grad_norm": 0.1946955666032623,
|
|
"learning_rate": 3.6084178261799604e-05,
|
|
"loss": 0.1684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06062513589859009,
|
|
"step": 1310,
|
|
"valid_targets_mean": 5609.0,
|
|
"valid_targets_min": 3609
|
|
},
|
|
{
|
|
"epoch": 1.418705035971223,
|
|
"grad_norm": 0.21494503590176667,
|
|
"learning_rate": 3.6039298178894104e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054977159947156906,
|
|
"step": 1315,
|
|
"valid_targets_mean": 5324.1,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 1.4241007194244604,
|
|
"grad_norm": 0.24498777196394217,
|
|
"learning_rate": 3.59941906154634e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056830182671546936,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4990.3,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.4294964028776977,
|
|
"grad_norm": 0.228220637112905,
|
|
"learning_rate": 3.594885621125442e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05826651677489281,
|
|
"step": 1325,
|
|
"valid_targets_mean": 5599.9,
|
|
"valid_targets_min": 2657
|
|
},
|
|
{
|
|
"epoch": 1.4348920863309353,
|
|
"grad_norm": 0.18717221020722888,
|
|
"learning_rate": 3.590329560923135e-05,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05736761912703514,
|
|
"step": 1330,
|
|
"valid_targets_mean": 5342.1,
|
|
"valid_targets_min": 2880
|
|
},
|
|
{
|
|
"epoch": 1.4402877697841727,
|
|
"grad_norm": 0.36380591404035517,
|
|
"learning_rate": 3.585750945556647e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11138807237148285,
|
|
"step": 1335,
|
|
"valid_targets_mean": 2753.4,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 1.44568345323741,
|
|
"grad_norm": 0.34600448505841774,
|
|
"learning_rate": 3.5811498399630967e-05,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1121455729007721,
|
|
"step": 1340,
|
|
"valid_targets_mean": 2784.8,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 1.4510791366906475,
|
|
"grad_norm": 0.36302330159462143,
|
|
"learning_rate": 3.576526309398576e-05,
|
|
"loss": 0.3383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11689145863056183,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2908.0,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 1.4564748201438849,
|
|
"grad_norm": 0.3188754024373517,
|
|
"learning_rate": 3.5718804194372234e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10116714239120483,
|
|
"step": 1350,
|
|
"valid_targets_mean": 2257.8,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 1.4618705035971222,
|
|
"grad_norm": 0.3164246800595274,
|
|
"learning_rate": 3.567212235970294e-05,
|
|
"loss": 0.3279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09090153872966766,
|
|
"step": 1355,
|
|
"valid_targets_mean": 2673.1,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 1.4672661870503596,
|
|
"grad_norm": 0.2563262082207949,
|
|
"learning_rate": 3.562521825205226e-05,
|
|
"loss": 0.3204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09674327075481415,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3598.9,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 1.4726618705035972,
|
|
"grad_norm": 0.324824278954391,
|
|
"learning_rate": 3.557809253664699e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12109720706939697,
|
|
"step": 1365,
|
|
"valid_targets_mean": 2927.8,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 1.4780575539568346,
|
|
"grad_norm": 0.3039859105643819,
|
|
"learning_rate": 3.553074588185692e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13715346157550812,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3389.4,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 1.483453237410072,
|
|
"grad_norm": 0.27926795443502295,
|
|
"learning_rate": 3.548317895918538e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.097773477435112,
|
|
"step": 1375,
|
|
"valid_targets_mean": 2652.3,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 1.4888489208633093,
|
|
"grad_norm": 0.2979618678579323,
|
|
"learning_rate": 3.543539244325967e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11990603059530258,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3397.5,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 1.4942446043165467,
|
|
"grad_norm": 0.31324738242568734,
|
|
"learning_rate": 3.538738701182152e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11759290099143982,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3299.4,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 1.4996402877697843,
|
|
"grad_norm": 0.3405088250107745,
|
|
"learning_rate": 3.533916334571748e-05,
|
|
"loss": 0.3223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09863434731960297,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2646.8,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 1.5050359712230215,
|
|
"grad_norm": 0.3088225176632682,
|
|
"learning_rate": 3.5290722128889254e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10546815395355225,
|
|
"step": 1395,
|
|
"valid_targets_mean": 2510.5,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 1.510431654676259,
|
|
"grad_norm": 0.3026311899379941,
|
|
"learning_rate": 3.5242064048363996e-05,
|
|
"loss": 0.3211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11105895042419434,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3139.3,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 1.5158273381294964,
|
|
"grad_norm": 0.28646147415166573,
|
|
"learning_rate": 3.519318979424458e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10199194401502609,
|
|
"step": 1405,
|
|
"valid_targets_mean": 2884.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 1.5212230215827338,
|
|
"grad_norm": 0.291123431290571,
|
|
"learning_rate": 3.514410005969979e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1161123514175415,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3203.1,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 1.5266187050359712,
|
|
"grad_norm": 0.3112143536952761,
|
|
"learning_rate": 3.509479554095453e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09655815362930298,
|
|
"step": 1415,
|
|
"valid_targets_mean": 2343.8,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 1.5320143884892086,
|
|
"grad_norm": 0.3154107027715446,
|
|
"learning_rate": 3.504527693727991e-05,
|
|
"loss": 0.324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10534416139125824,
|
|
"step": 1420,
|
|
"valid_targets_mean": 2488.7,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 1.5374100719424462,
|
|
"grad_norm": 0.2924036816259049,
|
|
"learning_rate": 3.499554495098333e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11656372249126434,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3664.6,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 1.5428057553956833,
|
|
"grad_norm": 0.5535948432993845,
|
|
"learning_rate": 3.494560028739854e-05,
|
|
"loss": 0.3734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17744848132133484,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3916.4,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 1.548201438848921,
|
|
"grad_norm": 0.33840442309151586,
|
|
"learning_rate": 3.489544365487564e-05,
|
|
"loss": 0.4799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1448664665222168,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3287.2,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 1.5535971223021583,
|
|
"grad_norm": 0.28039290419942675,
|
|
"learning_rate": 3.4845075764771006e-05,
|
|
"loss": 0.4754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.137318417429924,
|
|
"step": 1440,
|
|
"valid_targets_mean": 4194.9,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 1.5589928057553957,
|
|
"grad_norm": 0.3125834061595516,
|
|
"learning_rate": 3.479449733143722e-05,
|
|
"loss": 0.4962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15208642184734344,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3712.2,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 1.564388489208633,
|
|
"grad_norm": 0.3044041836326498,
|
|
"learning_rate": 3.474370907221294e-05,
|
|
"loss": 0.4845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14053666591644287,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3538.5,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 1.5697841726618704,
|
|
"grad_norm": 0.5576304682332786,
|
|
"learning_rate": 3.469271170741272e-05,
|
|
"loss": 0.4931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2267555296421051,
|
|
"step": 1455,
|
|
"valid_targets_mean": 1189.2,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 1.575179856115108,
|
|
"grad_norm": 0.281716231865097,
|
|
"learning_rate": 3.4641505960316784e-05,
|
|
"loss": 0.4794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15423545241355896,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4119.4,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 1.5805755395683452,
|
|
"grad_norm": 0.24315713359428015,
|
|
"learning_rate": 3.45900925571608e-05,
|
|
"loss": 0.4525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14715474843978882,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4418.8,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 1.5859712230215828,
|
|
"grad_norm": 0.2941326113298694,
|
|
"learning_rate": 3.453847222712554e-05,
|
|
"loss": 0.4825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13437074422836304,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3462.2,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 1.5913669064748202,
|
|
"grad_norm": 0.30488011917844526,
|
|
"learning_rate": 3.4486645702326576e-05,
|
|
"loss": 0.4722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18727123737335205,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3550.4,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 1.5967625899280575,
|
|
"grad_norm": 0.24919332116842013,
|
|
"learning_rate": 3.443461371780385e-05,
|
|
"loss": 0.4618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14549365639686584,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4635.3,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 1.6021582733812951,
|
|
"grad_norm": 0.2872649526091497,
|
|
"learning_rate": 3.438237701151131e-05,
|
|
"loss": 0.4774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16222310066223145,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3635.9,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 1.6075539568345323,
|
|
"grad_norm": 0.31293047124246504,
|
|
"learning_rate": 3.432993632430639e-05,
|
|
"loss": 0.4547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11748822778463364,
|
|
"step": 1490,
|
|
"valid_targets_mean": 2792.4,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 1.6129496402877699,
|
|
"grad_norm": 0.37389311045964846,
|
|
"learning_rate": 3.427729239993952e-05,
|
|
"loss": 0.4719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913475036621094,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4429.5,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 1.618345323741007,
|
|
"grad_norm": 0.2784374542939828,
|
|
"learning_rate": 3.4224445985043603e-05,
|
|
"loss": 0.4782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15074509382247925,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4782.1,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 1.6237410071942446,
|
|
"grad_norm": 0.2945371629528074,
|
|
"learning_rate": 3.417139782912339e-05,
|
|
"loss": 0.4661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14756985008716583,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4143.1,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 1.629136690647482,
|
|
"grad_norm": 0.295968878676988,
|
|
"learning_rate": 3.411814868454487e-05,
|
|
"loss": 0.4823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15383435785770416,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4065.9,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.6345323741007194,
|
|
"grad_norm": 0.28952491641425576,
|
|
"learning_rate": 3.406469930652458e-05,
|
|
"loss": 0.4682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15049080550670624,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4074.2,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 1.639928057553957,
|
|
"grad_norm": 0.2791927950066981,
|
|
"learning_rate": 3.401105045311891e-05,
|
|
"loss": 0.4526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13657569885253906,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4832.0,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 1.6453237410071941,
|
|
"grad_norm": 0.3188780901998309,
|
|
"learning_rate": 3.3957202885213384e-05,
|
|
"loss": 0.4792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18847545981407166,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3554.3,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 1.6507194244604317,
|
|
"grad_norm": 0.28948792238836474,
|
|
"learning_rate": 3.390315736651178e-05,
|
|
"loss": 0.4765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16858667135238647,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4176.4,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 1.6561151079136691,
|
|
"grad_norm": 0.4376287512915144,
|
|
"learning_rate": 3.3848914663525386e-05,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13602569699287415,
|
|
"step": 1535,
|
|
"valid_targets_mean": 5585.3,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 1.6615107913669065,
|
|
"grad_norm": 0.278608147458592,
|
|
"learning_rate": 3.379447554556209e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08939287066459656,
|
|
"step": 1540,
|
|
"valid_targets_mean": 6182.6,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 1.6669064748201439,
|
|
"grad_norm": 0.22679890729496202,
|
|
"learning_rate": 3.373984078471547e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11956652998924255,
|
|
"step": 1545,
|
|
"valid_targets_mean": 7839.3,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 1.6723021582733812,
|
|
"grad_norm": 0.2237936516120892,
|
|
"learning_rate": 3.368501115585384e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.103504478931427,
|
|
"step": 1550,
|
|
"valid_targets_mean": 6174.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 1.6776978417266188,
|
|
"grad_norm": 0.19593330680949675,
|
|
"learning_rate": 3.36299874366093e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11450635641813278,
|
|
"step": 1555,
|
|
"valid_targets_mean": 7711.5,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 1.683093525179856,
|
|
"grad_norm": 0.2336090872706087,
|
|
"learning_rate": 3.357477040736663e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07635249197483063,
|
|
"step": 1560,
|
|
"valid_targets_mean": 5962.4,
|
|
"valid_targets_min": 158
|
|
},
|
|
{
|
|
"epoch": 1.6884892086330936,
|
|
"grad_norm": 0.1821528161771427,
|
|
"learning_rate": 3.35193608512523e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07920248061418533,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5733.2,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 1.693884892086331,
|
|
"grad_norm": 0.5557242577189666,
|
|
"learning_rate": 3.3463759554123334e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059206750243902206,
|
|
"step": 1570,
|
|
"valid_targets_mean": 1183.0,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 1.6992805755395683,
|
|
"grad_norm": 0.18900350298934246,
|
|
"learning_rate": 3.340796730455613e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06798979640007019,
|
|
"step": 1575,
|
|
"valid_targets_mean": 5966.8,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.7046762589928057,
|
|
"grad_norm": 0.18574128383265723,
|
|
"learning_rate": 3.335198489383533e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06734254956245422,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4898.7,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 1.710071942446043,
|
|
"grad_norm": 0.1877750548645594,
|
|
"learning_rate": 3.3295813115942564e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08317604660987854,
|
|
"step": 1585,
|
|
"valid_targets_mean": 6202.8,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 1.7154676258992807,
|
|
"grad_norm": 0.18050959733755068,
|
|
"learning_rate": 3.323945276754522e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09186476469039917,
|
|
"step": 1590,
|
|
"valid_targets_mean": 7420.6,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 1.7208633093525179,
|
|
"grad_norm": 0.236884288755946,
|
|
"learning_rate": 3.318290464798509e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13468478620052338,
|
|
"step": 1595,
|
|
"valid_targets_mean": 7645.6,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 1.7262589928057555,
|
|
"grad_norm": 0.183682347468143,
|
|
"learning_rate": 3.312616955926711e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10095088928937912,
|
|
"step": 1600,
|
|
"valid_targets_mean": 6437.9,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 1.7316546762589928,
|
|
"grad_norm": 0.1902107144052719,
|
|
"learning_rate": 3.30692483060479e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09403005987405777,
|
|
"step": 1605,
|
|
"valid_targets_mean": 6452.5,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 1.7370503597122302,
|
|
"grad_norm": 0.18226191486720658,
|
|
"learning_rate": 3.301214169562441e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08484260737895966,
|
|
"step": 1610,
|
|
"valid_targets_mean": 6317.1,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 1.7424460431654676,
|
|
"grad_norm": 0.16528403540579456,
|
|
"learning_rate": 3.295485053792249e-05,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09109783172607422,
|
|
"step": 1615,
|
|
"valid_targets_mean": 6657.9,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 1.747841726618705,
|
|
"grad_norm": 0.405497086054236,
|
|
"learning_rate": 3.289737564548531e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08013225346803665,
|
|
"step": 1620,
|
|
"valid_targets_mean": 5992.4,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 1.7532374100719426,
|
|
"grad_norm": 0.15915583940693953,
|
|
"learning_rate": 3.283971783346194e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06978192925453186,
|
|
"step": 1625,
|
|
"valid_targets_mean": 6126.7,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 1.7586330935251797,
|
|
"grad_norm": 0.22057311318151201,
|
|
"learning_rate": 3.278187791959571e-05,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06387412548065186,
|
|
"step": 1630,
|
|
"valid_targets_mean": 1509.7,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 1.7640287769784173,
|
|
"grad_norm": 0.22698232131536936,
|
|
"learning_rate": 3.2723856724212675e-05,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0739467516541481,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5922.6,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 1.7694244604316547,
|
|
"grad_norm": 0.21794608652638248,
|
|
"learning_rate": 3.266565507020992e-05,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0649498701095581,
|
|
"step": 1640,
|
|
"valid_targets_mean": 5310.9,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 1.774820143884892,
|
|
"grad_norm": 0.2169782298333363,
|
|
"learning_rate": 3.2607273783043974e-05,
|
|
"loss": 0.1968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06289531290531158,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4497.7,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 1.7802158273381294,
|
|
"grad_norm": 0.1945364967410362,
|
|
"learning_rate": 3.254871369071897e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06429216265678406,
|
|
"step": 1650,
|
|
"valid_targets_mean": 5293.4,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 1.7856115107913668,
|
|
"grad_norm": 0.19928882757795213,
|
|
"learning_rate": 3.248997562377506e-05,
|
|
"loss": 0.1938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06664995849132538,
|
|
"step": 1655,
|
|
"valid_targets_mean": 5189.2,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 1.7910071942446044,
|
|
"grad_norm": 0.1988039732334096,
|
|
"learning_rate": 3.243106041527649e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06908870488405228,
|
|
"step": 1660,
|
|
"valid_targets_mean": 5132.1,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 1.7964028776978416,
|
|
"grad_norm": 0.20948665784604284,
|
|
"learning_rate": 3.2371968900799895e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05636874586343765,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4858.0,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 1.8017985611510792,
|
|
"grad_norm": 0.19838124396280366,
|
|
"learning_rate": 3.2312701918422384e-05,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06812453269958496,
|
|
"step": 1670,
|
|
"valid_targets_mean": 5312.7,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.8071942446043165,
|
|
"grad_norm": 0.18390969826184042,
|
|
"learning_rate": 3.22532603087097e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05537774786353111,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4486.5,
|
|
"valid_targets_min": 216
|
|
},
|
|
{
|
|
"epoch": 1.812589928057554,
|
|
"grad_norm": 0.1863083911039743,
|
|
"learning_rate": 3.219364491470422e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0528709813952446,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4421.3,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 1.8179856115107915,
|
|
"grad_norm": 0.1948090135430311,
|
|
"learning_rate": 3.213385658191312e-05,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06668928265571594,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5382.3,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 1.8233812949640287,
|
|
"grad_norm": 0.21632368047085526,
|
|
"learning_rate": 3.2073896158296255e-05,
|
|
"loss": 0.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05577654391527176,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4716.2,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 1.8287769784172663,
|
|
"grad_norm": 0.21254568933093587,
|
|
"learning_rate": 3.2013764494254224e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06339570134878159,
|
|
"step": 1695,
|
|
"valid_targets_mean": 5423.3,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 1.8341726618705037,
|
|
"grad_norm": 0.1941401638164744,
|
|
"learning_rate": 3.195346244261627e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0688885748386383,
|
|
"step": 1700,
|
|
"valid_targets_mean": 5819.8,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.839568345323741,
|
|
"grad_norm": 0.22943253549745418,
|
|
"learning_rate": 3.1892990858628185e-05,
|
|
"loss": 0.1828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06391825526952744,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4621.3,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.8449640287769784,
|
|
"grad_norm": 0.18772669043466453,
|
|
"learning_rate": 3.18323505999402e-05,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07347133755683899,
|
|
"step": 1710,
|
|
"valid_targets_mean": 6490.0,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 1.8503597122302158,
|
|
"grad_norm": 0.1852741707541549,
|
|
"learning_rate": 3.177154252659481e-05,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05065922066569328,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4670.9,
|
|
"valid_targets_min": 206
|
|
},
|
|
{
|
|
"epoch": 1.8557553956834534,
|
|
"grad_norm": 0.1746588311631276,
|
|
"learning_rate": 3.171056750101455e-05,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06347249448299408,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5857.8,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 1.8611510791366905,
|
|
"grad_norm": 0.2650385608669735,
|
|
"learning_rate": 3.164942638798982e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058201201260089874,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4251.2,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 1.8665467625899281,
|
|
"grad_norm": 0.2132337798674345,
|
|
"learning_rate": 3.158812005466656e-05,
|
|
"loss": 0.1799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05050525441765785,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3827.2,
|
|
"valid_targets_min": 161
|
|
},
|
|
{
|
|
"epoch": 1.8719424460431655,
|
|
"grad_norm": 0.2477645288492953,
|
|
"learning_rate": 3.1526649370534e-05,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06799399107694626,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3315.8,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 1.8773381294964029,
|
|
"grad_norm": 0.23871362596946374,
|
|
"learning_rate": 3.14650152074123e-05,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07939553260803223,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3913.5,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 1.8827338129496403,
|
|
"grad_norm": 0.2414981667318744,
|
|
"learning_rate": 3.140321843944019e-05,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06878504157066345,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3445.0,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 1.8881294964028776,
|
|
"grad_norm": 0.25088130268075454,
|
|
"learning_rate": 3.134125994306259e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08156734704971313,
|
|
"step": 1750,
|
|
"valid_targets_mean": 4029.2,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 1.8935251798561152,
|
|
"grad_norm": 0.24593727023556047,
|
|
"learning_rate": 3.1279140597018135e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07628435641527176,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3621.6,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 1.8989208633093524,
|
|
"grad_norm": 0.23327217771956207,
|
|
"learning_rate": 3.121686128232678e-05,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07808522880077362,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3852.1,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 1.90431654676259,
|
|
"grad_norm": 0.23347259639996987,
|
|
"learning_rate": 3.115442288227723e-05,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07228676974773407,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3520.1,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 1.9097122302158274,
|
|
"grad_norm": 0.24107156066280988,
|
|
"learning_rate": 3.109182628241449e-05,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07859019935131073,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4023.8,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 1.9151079136690647,
|
|
"grad_norm": 0.21941437857643178,
|
|
"learning_rate": 3.1029072370527225e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07826776057481766,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3957.6,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 1.9205035971223021,
|
|
"grad_norm": 0.25276007090490005,
|
|
"learning_rate": 3.096616203663524e-05,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07042327523231506,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3712.1,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 1.9258992805755395,
|
|
"grad_norm": 0.2350922198849918,
|
|
"learning_rate": 3.0903096172976814e-05,
|
|
"loss": 0.2148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07218310236930847,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4089.0,
|
|
"valid_targets_min": 2169
|
|
},
|
|
{
|
|
"epoch": 1.931294964028777,
|
|
"grad_norm": 0.2318314095199113,
|
|
"learning_rate": 3.083987567399604e-05,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06838338077068329,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3802.9,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.9366906474820142,
|
|
"grad_norm": 0.26754994846235386,
|
|
"learning_rate": 3.07765014363302e-05,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07452942430973053,
|
|
"step": 1795,
|
|
"valid_targets_mean": 3961.2,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 1.9420863309352518,
|
|
"grad_norm": 0.22789200465437448,
|
|
"learning_rate": 3.071297435879695e-05,
|
|
"loss": 0.2121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06451773643493652,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3813.9,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 1.9474820143884892,
|
|
"grad_norm": 0.24392442003872125,
|
|
"learning_rate": 3.064929534238168e-05,
|
|
"loss": 0.2183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0703209936618805,
|
|
"step": 1805,
|
|
"valid_targets_mean": 3918.7,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 1.9528776978417266,
|
|
"grad_norm": 0.4362202521792265,
|
|
"learning_rate": 3.0585465290224657e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10010881721973419,
|
|
"step": 1810,
|
|
"valid_targets_mean": 7150.2,
|
|
"valid_targets_min": 3704
|
|
},
|
|
{
|
|
"epoch": 1.958273381294964,
|
|
"grad_norm": 0.28630519449208164,
|
|
"learning_rate": 3.052148510760824e-05,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09028595685958862,
|
|
"step": 1815,
|
|
"valid_targets_mean": 7169.0,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 1.9636690647482014,
|
|
"grad_norm": 0.2705361595827519,
|
|
"learning_rate": 3.0457355701944058e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09457117319107056,
|
|
"step": 1820,
|
|
"valid_targets_mean": 6982.2,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 1.969064748201439,
|
|
"grad_norm": 0.25618021790920936,
|
|
"learning_rate": 3.0393077982760103e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0801270455121994,
|
|
"step": 1825,
|
|
"valid_targets_mean": 6963.0,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 1.974460431654676,
|
|
"grad_norm": 0.22969814881358513,
|
|
"learning_rate": 3.0328652861687876e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08513946831226349,
|
|
"step": 1830,
|
|
"valid_targets_mean": 6669.7,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.9798561151079137,
|
|
"grad_norm": 0.2325797726039593,
|
|
"learning_rate": 3.0264081252449427e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08494792878627777,
|
|
"step": 1835,
|
|
"valid_targets_mean": 6615.3,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 1.985251798561151,
|
|
"grad_norm": 0.20149873023550352,
|
|
"learning_rate": 3.0199364070844398e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07481887936592102,
|
|
"step": 1840,
|
|
"valid_targets_mean": 6520.3,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 1.9906474820143885,
|
|
"grad_norm": 0.2162804787671238,
|
|
"learning_rate": 3.0134502234737034e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07859468460083008,
|
|
"step": 1845,
|
|
"valid_targets_mean": 6486.0,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 1.996043165467626,
|
|
"grad_norm": 0.22763995137030757,
|
|
"learning_rate": 3.00694966640432e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08021897822618484,
|
|
"step": 1850,
|
|
"valid_targets_mean": 6009.3,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 2.0010791366906475,
|
|
"grad_norm": 2.5832198467844836,
|
|
"learning_rate": 3.0004348280717267e-05,
|
|
"loss": 0.3683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24019908905029297,
|
|
"step": 1855,
|
|
"valid_targets_mean": 7264.4,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 2.0064748201438847,
|
|
"grad_norm": 0.9220739241171466,
|
|
"learning_rate": 2.9939058008739104e-05,
|
|
"loss": 0.5562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20399007201194763,
|
|
"step": 1860,
|
|
"valid_targets_mean": 9100.2,
|
|
"valid_targets_min": 3178
|
|
},
|
|
{
|
|
"epoch": 2.0118705035971223,
|
|
"grad_norm": 0.8449988918171326,
|
|
"learning_rate": 2.9873626774100926e-05,
|
|
"loss": 0.4663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14136357605457306,
|
|
"step": 1865,
|
|
"valid_targets_mean": 6959.6,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 2.01726618705036,
|
|
"grad_norm": 0.39277349847362497,
|
|
"learning_rate": 2.9808055504794198e-05,
|
|
"loss": 0.4301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14355406165122986,
|
|
"step": 1870,
|
|
"valid_targets_mean": 7188.0,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 2.022661870503597,
|
|
"grad_norm": 0.2839666827176854,
|
|
"learning_rate": 2.9742345130796427e-05,
|
|
"loss": 0.3998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1481347382068634,
|
|
"step": 1875,
|
|
"valid_targets_mean": 6959.9,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 2.0280575539568346,
|
|
"grad_norm": 0.25971612345946227,
|
|
"learning_rate": 2.967649658405803e-05,
|
|
"loss": 0.3935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14542320370674133,
|
|
"step": 1880,
|
|
"valid_targets_mean": 7818.4,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 2.033453237410072,
|
|
"grad_norm": 0.25216306446189896,
|
|
"learning_rate": 2.9610510798489053e-05,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13150623440742493,
|
|
"step": 1885,
|
|
"valid_targets_mean": 7926.4,
|
|
"valid_targets_min": 3527
|
|
},
|
|
{
|
|
"epoch": 2.0388489208633094,
|
|
"grad_norm": 0.23581013976766343,
|
|
"learning_rate": 2.9544388709945996e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11412892490625381,
|
|
"step": 1890,
|
|
"valid_targets_mean": 6605.4,
|
|
"valid_targets_min": 2848
|
|
},
|
|
{
|
|
"epoch": 2.044244604316547,
|
|
"grad_norm": 0.23956586913030958,
|
|
"learning_rate": 2.9478131256218474e-05,
|
|
"loss": 0.3727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12359167635440826,
|
|
"step": 1895,
|
|
"valid_targets_mean": 6947.1,
|
|
"valid_targets_min": 2244
|
|
},
|
|
{
|
|
"epoch": 2.049640287769784,
|
|
"grad_norm": 0.26162826174273085,
|
|
"learning_rate": 2.9411739377015962e-05,
|
|
"loss": 0.3754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12847085297107697,
|
|
"step": 1900,
|
|
"valid_targets_mean": 7027.9,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 2.0550359712230217,
|
|
"grad_norm": 0.22668189025684513,
|
|
"learning_rate": 2.9345214013954434e-05,
|
|
"loss": 0.3704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14553312957286835,
|
|
"step": 1905,
|
|
"valid_targets_mean": 8959.9,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 2.060431654676259,
|
|
"grad_norm": 0.20126780665114438,
|
|
"learning_rate": 2.9278556110543052e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1123625636100769,
|
|
"step": 1910,
|
|
"valid_targets_mean": 9482.4,
|
|
"valid_targets_min": 5095
|
|
},
|
|
{
|
|
"epoch": 2.0658273381294965,
|
|
"grad_norm": 0.20513256779091957,
|
|
"learning_rate": 2.9211766612170732e-05,
|
|
"loss": 0.3414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10829517990350723,
|
|
"step": 1915,
|
|
"valid_targets_mean": 8913.7,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 2.0712230215827336,
|
|
"grad_norm": 0.20947342629164867,
|
|
"learning_rate": 2.9144846466092773e-05,
|
|
"loss": 0.3351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11553197354078293,
|
|
"step": 1920,
|
|
"valid_targets_mean": 8780.3,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 2.0766187050359712,
|
|
"grad_norm": 0.1971856374125698,
|
|
"learning_rate": 2.9077796621417414e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11218050122261047,
|
|
"step": 1925,
|
|
"valid_targets_mean": 9303.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 2.082014388489209,
|
|
"grad_norm": 0.2056159831107492,
|
|
"learning_rate": 2.9010618029092352e-05,
|
|
"loss": 0.3326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10995827615261078,
|
|
"step": 1930,
|
|
"valid_targets_mean": 9819.0,
|
|
"valid_targets_min": 5584
|
|
},
|
|
{
|
|
"epoch": 2.087410071942446,
|
|
"grad_norm": 0.2073054689906605,
|
|
"learning_rate": 2.8943311641891303e-05,
|
|
"loss": 0.331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11439375579357147,
|
|
"step": 1935,
|
|
"valid_targets_mean": 9100.7,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 2.0928057553956836,
|
|
"grad_norm": 0.21929281944097276,
|
|
"learning_rate": 2.8875878414400426e-05,
|
|
"loss": 0.3381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11166255176067352,
|
|
"step": 1940,
|
|
"valid_targets_mean": 9212.3,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 2.0982014388489207,
|
|
"grad_norm": 0.20306187802787823,
|
|
"learning_rate": 2.880831930300486e-05,
|
|
"loss": 0.3305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09923559427261353,
|
|
"step": 1945,
|
|
"valid_targets_mean": 8648.9,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 2.1035971223021583,
|
|
"grad_norm": 0.20307171567838236,
|
|
"learning_rate": 2.8740635265875066e-05,
|
|
"loss": 0.3329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12336774170398712,
|
|
"step": 1950,
|
|
"valid_targets_mean": 10183.4,
|
|
"valid_targets_min": 3886
|
|
},
|
|
{
|
|
"epoch": 2.1089928057553955,
|
|
"grad_norm": 0.2155619258959029,
|
|
"learning_rate": 2.8672827262953323e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10578882694244385,
|
|
"step": 1955,
|
|
"valid_targets_mean": 9594.6,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 2.114388489208633,
|
|
"grad_norm": 0.20447851208937154,
|
|
"learning_rate": 2.860489625594008e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10244246572256088,
|
|
"step": 1960,
|
|
"valid_targets_mean": 8928.9,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 2.1197841726618707,
|
|
"grad_norm": 0.20684919918944195,
|
|
"learning_rate": 2.8536843208280288e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1118353083729744,
|
|
"step": 1965,
|
|
"valid_targets_mean": 9772.7,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 2.125179856115108,
|
|
"grad_norm": 0.1899425988612872,
|
|
"learning_rate": 2.8468669085149812e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11148250102996826,
|
|
"step": 1970,
|
|
"valid_targets_mean": 10406.6,
|
|
"valid_targets_min": 5877
|
|
},
|
|
{
|
|
"epoch": 2.1305755395683454,
|
|
"grad_norm": 0.18752210493686924,
|
|
"learning_rate": 2.840037485344163e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11186987906694412,
|
|
"step": 1975,
|
|
"valid_targets_mean": 9535.4,
|
|
"valid_targets_min": 3557
|
|
},
|
|
{
|
|
"epoch": 2.1359712230215826,
|
|
"grad_norm": 0.19068810104822473,
|
|
"learning_rate": 2.8331961481752244e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10549414157867432,
|
|
"step": 1980,
|
|
"valid_targets_mean": 9136.9,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 2.14136690647482,
|
|
"grad_norm": 0.18880657653803673,
|
|
"learning_rate": 2.826342994036785e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09433126449584961,
|
|
"step": 1985,
|
|
"valid_targets_mean": 9388.7,
|
|
"valid_targets_min": 3282
|
|
},
|
|
{
|
|
"epoch": 2.1467625899280574,
|
|
"grad_norm": 0.22395125714492645,
|
|
"learning_rate": 2.8194781201250608e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10528635233640671,
|
|
"step": 1990,
|
|
"valid_targets_mean": 9729.2,
|
|
"valid_targets_min": 3231
|
|
},
|
|
{
|
|
"epoch": 2.152158273381295,
|
|
"grad_norm": 0.21093648258399933,
|
|
"learning_rate": 2.8126016238024876e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12127436697483063,
|
|
"step": 1995,
|
|
"valid_targets_mean": 10332.3,
|
|
"valid_targets_min": 4344
|
|
},
|
|
{
|
|
"epoch": 2.1575539568345325,
|
|
"grad_norm": 0.20323488140358548,
|
|
"learning_rate": 2.805713602596337e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11895902454853058,
|
|
"step": 2000,
|
|
"valid_targets_mean": 11408.0,
|
|
"valid_targets_min": 5726
|
|
},
|
|
{
|
|
"epoch": 2.1629496402877697,
|
|
"grad_norm": 0.20682938855325428,
|
|
"learning_rate": 2.798814154197335e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11659000813961029,
|
|
"step": 2005,
|
|
"valid_targets_mean": 10250.5,
|
|
"valid_targets_min": 2607
|
|
},
|
|
{
|
|
"epoch": 2.1683453237410073,
|
|
"grad_norm": 0.1851406428891927,
|
|
"learning_rate": 2.7919033764582752e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10548830777406693,
|
|
"step": 2010,
|
|
"valid_targets_mean": 9131.8,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 2.1737410071942445,
|
|
"grad_norm": 0.1934945794401406,
|
|
"learning_rate": 2.7849813673926333e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11139778792858124,
|
|
"step": 2015,
|
|
"valid_targets_mean": 10329.2,
|
|
"valid_targets_min": 5291
|
|
},
|
|
{
|
|
"epoch": 2.179136690647482,
|
|
"grad_norm": 0.22391497348069153,
|
|
"learning_rate": 2.778048225173174e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10432121902704239,
|
|
"step": 2020,
|
|
"valid_targets_mean": 8999.2,
|
|
"valid_targets_min": 2381
|
|
},
|
|
{
|
|
"epoch": 2.184532374100719,
|
|
"grad_norm": 0.19302072579726334,
|
|
"learning_rate": 2.7711040481305617e-05,
|
|
"loss": 0.318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11431066691875458,
|
|
"step": 2025,
|
|
"valid_targets_mean": 9227.1,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 2.189928057553957,
|
|
"grad_norm": 0.21083278639110703,
|
|
"learning_rate": 2.7641489347519637e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1014428436756134,
|
|
"step": 2030,
|
|
"valid_targets_mean": 9084.0,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 2.1953237410071944,
|
|
"grad_norm": 0.22040052262288112,
|
|
"learning_rate": 2.7571829836796525e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.092393659055233,
|
|
"step": 2035,
|
|
"valid_targets_mean": 9274.7,
|
|
"valid_targets_min": 4259
|
|
},
|
|
{
|
|
"epoch": 2.2007194244604316,
|
|
"grad_norm": 0.2080871280899813,
|
|
"learning_rate": 2.750206293709611e-05,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09870747476816177,
|
|
"step": 2040,
|
|
"valid_targets_mean": 8931.9,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 2.206115107913669,
|
|
"grad_norm": 0.21275851765527395,
|
|
"learning_rate": 2.7432189637901276e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1324768364429474,
|
|
"step": 2045,
|
|
"valid_targets_mean": 11522.8,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 2.2115107913669063,
|
|
"grad_norm": 0.2705249144128022,
|
|
"learning_rate": 2.736221093020393e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11588154733181,
|
|
"step": 2050,
|
|
"valid_targets_mean": 6154.2,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 2.216906474820144,
|
|
"grad_norm": 0.27737502598323743,
|
|
"learning_rate": 2.7292127806490962e-05,
|
|
"loss": 0.3402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1138405054807663,
|
|
"step": 2055,
|
|
"valid_targets_mean": 6070.9,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 2.222302158273381,
|
|
"grad_norm": 1.6380396895332892,
|
|
"learning_rate": 2.722194126073016e-05,
|
|
"loss": 0.6046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20924179255962372,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3383.0,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 2.2276978417266187,
|
|
"grad_norm": 0.6296178497991545,
|
|
"learning_rate": 2.7151652288356136e-05,
|
|
"loss": 0.6713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16544781625270844,
|
|
"step": 2065,
|
|
"valid_targets_mean": 2937.2,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 2.2330935251798563,
|
|
"grad_norm": 0.47105730893168246,
|
|
"learning_rate": 2.7081261886256157e-05,
|
|
"loss": 0.6071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2907264828681946,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4071.6,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 2.2384892086330934,
|
|
"grad_norm": 0.34443941973827225,
|
|
"learning_rate": 2.701077105275606e-05,
|
|
"loss": 0.5905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19621212780475616,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5239.0,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 2.243884892086331,
|
|
"grad_norm": 0.29144551800868296,
|
|
"learning_rate": 2.6940180787606053e-05,
|
|
"loss": 0.578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17461201548576355,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4039.2,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 2.249280575539568,
|
|
"grad_norm": 0.2933466216082999,
|
|
"learning_rate": 2.6869492091966557e-05,
|
|
"loss": 0.5556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15896697342395782,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3473.9,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 2.2546762589928058,
|
|
"grad_norm": 0.2955876343389498,
|
|
"learning_rate": 2.6798705968394024e-05,
|
|
"loss": 0.5655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16958436369895935,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3420.7,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 2.260071942446043,
|
|
"grad_norm": 0.30910503270498263,
|
|
"learning_rate": 2.672782342082667e-05,
|
|
"loss": 0.5498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24809867143630981,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3772.6,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 2.2654676258992805,
|
|
"grad_norm": 0.2640844728650452,
|
|
"learning_rate": 2.665684545457028e-05,
|
|
"loss": 0.5449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17317599058151245,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4168.5,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 2.270863309352518,
|
|
"grad_norm": 0.2446165891371924,
|
|
"learning_rate": 2.6585773076283918e-05,
|
|
"loss": 0.5471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12914419174194336,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3252.4,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 2.2762589928057553,
|
|
"grad_norm": 0.25250419378721883,
|
|
"learning_rate": 2.6514607293965686e-05,
|
|
"loss": 0.5383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21184009313583374,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4318.8,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 2.281654676258993,
|
|
"grad_norm": 0.2542801407036577,
|
|
"learning_rate": 2.6443349116938386e-05,
|
|
"loss": 0.5495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16121625900268555,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3735.0,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 2.28705035971223,
|
|
"grad_norm": 0.25977492557209775,
|
|
"learning_rate": 2.637199955583524e-05,
|
|
"loss": 0.5277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260357290506363,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4098.8,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 2.2924460431654676,
|
|
"grad_norm": 0.25286940423227167,
|
|
"learning_rate": 2.6300559622585545e-05,
|
|
"loss": 0.5355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16456982493400574,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4343.0,
|
|
"valid_targets_min": 1454
|
|
},
|
|
{
|
|
"epoch": 2.2978417266187052,
|
|
"grad_norm": 0.2597076346571475,
|
|
"learning_rate": 2.6229030330400296e-05,
|
|
"loss": 0.5484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16237759590148926,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3758.9,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 2.3032374100719424,
|
|
"grad_norm": 0.2677358203814037,
|
|
"learning_rate": 2.6157412693757874e-05,
|
|
"loss": 0.56,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1570054292678833,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3194.6,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 2.30863309352518,
|
|
"grad_norm": 0.2659525781925733,
|
|
"learning_rate": 2.6085707728389597e-05,
|
|
"loss": 0.5568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16920790076255798,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3790.3,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 2.314028776978417,
|
|
"grad_norm": 0.2656147434728084,
|
|
"learning_rate": 2.601391645126535e-05,
|
|
"loss": 0.5401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22559234499931335,
|
|
"step": 2145,
|
|
"valid_targets_mean": 4685.3,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 2.3194244604316547,
|
|
"grad_norm": 0.2519158907858138,
|
|
"learning_rate": 2.5942039880579152e-05,
|
|
"loss": 0.5405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14448854327201843,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3750.0,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 2.3248201438848923,
|
|
"grad_norm": 0.26784033799361795,
|
|
"learning_rate": 2.5870079035734695e-05,
|
|
"loss": 0.5472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17420896887779236,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4435.8,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.3302158273381295,
|
|
"grad_norm": 0.25101777510772394,
|
|
"learning_rate": 2.579803493733094e-05,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055257659405469894,
|
|
"step": 2160,
|
|
"valid_targets_mean": 5312.0,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 2.335611510791367,
|
|
"grad_norm": 0.24838761661655753,
|
|
"learning_rate": 2.572590860714758e-05,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0584455206990242,
|
|
"step": 2165,
|
|
"valid_targets_mean": 5549.4,
|
|
"valid_targets_min": 3000
|
|
},
|
|
{
|
|
"epoch": 2.3410071942446042,
|
|
"grad_norm": 0.20477363082512134,
|
|
"learning_rate": 2.56537010681306e-05,
|
|
"loss": 0.1554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04914896935224533,
|
|
"step": 2170,
|
|
"valid_targets_mean": 5357.1,
|
|
"valid_targets_min": 2787
|
|
},
|
|
{
|
|
"epoch": 2.346402877697842,
|
|
"grad_norm": 0.19909453580897385,
|
|
"learning_rate": 2.558141334437773e-05,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0604475699365139,
|
|
"step": 2175,
|
|
"valid_targets_mean": 5853.2,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 2.351798561151079,
|
|
"grad_norm": 0.1836141644719459,
|
|
"learning_rate": 2.550904646112392e-05,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04882563650608063,
|
|
"step": 2180,
|
|
"valid_targets_mean": 5323.5,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 2.3571942446043166,
|
|
"grad_norm": 0.1856485200354345,
|
|
"learning_rate": 2.5436601444726862e-05,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05029682442545891,
|
|
"step": 2185,
|
|
"valid_targets_mean": 5320.9,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 2.362589928057554,
|
|
"grad_norm": 0.18476152457216133,
|
|
"learning_rate": 2.536407932265234e-05,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04619450122117996,
|
|
"step": 2190,
|
|
"valid_targets_mean": 5294.3,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 2.3679856115107913,
|
|
"grad_norm": 0.19532639297594884,
|
|
"learning_rate": 2.529148112345972e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051936328411102295,
|
|
"step": 2195,
|
|
"valid_targets_mean": 5738.7,
|
|
"valid_targets_min": 3508
|
|
},
|
|
{
|
|
"epoch": 2.373381294964029,
|
|
"grad_norm": 0.19643171941058918,
|
|
"learning_rate": 2.521880787678737e-05,
|
|
"loss": 0.1545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048414599150419235,
|
|
"step": 2200,
|
|
"valid_targets_mean": 5052.5,
|
|
"valid_targets_min": 2487
|
|
},
|
|
{
|
|
"epoch": 2.378776978417266,
|
|
"grad_norm": 0.1776689037217625,
|
|
"learning_rate": 2.5146060613337997e-05,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048307500779628754,
|
|
"step": 2205,
|
|
"valid_targets_mean": 5108.5,
|
|
"valid_targets_min": 3236
|
|
},
|
|
{
|
|
"epoch": 2.3841726618705037,
|
|
"grad_norm": 0.17026897403636018,
|
|
"learning_rate": 2.5073240364864086e-05,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04738203063607216,
|
|
"step": 2210,
|
|
"valid_targets_mean": 5397.8,
|
|
"valid_targets_min": 2803
|
|
},
|
|
{
|
|
"epoch": 2.389568345323741,
|
|
"grad_norm": 0.1921781080480417,
|
|
"learning_rate": 2.5000348164153247e-05,
|
|
"loss": 0.1467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04668160155415535,
|
|
"step": 2215,
|
|
"valid_targets_mean": 5513.7,
|
|
"valid_targets_min": 2867
|
|
},
|
|
{
|
|
"epoch": 2.3949640287769784,
|
|
"grad_norm": 0.19437634803440088,
|
|
"learning_rate": 2.4927385045013567e-05,
|
|
"loss": 0.1487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04980098828673363,
|
|
"step": 2220,
|
|
"valid_targets_mean": 5217.7,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 2.400359712230216,
|
|
"grad_norm": 0.18770063138069082,
|
|
"learning_rate": 2.485435204225893e-05,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045249972492456436,
|
|
"step": 2225,
|
|
"valid_targets_mean": 5169.1,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 2.405755395683453,
|
|
"grad_norm": 0.17884190958152046,
|
|
"learning_rate": 2.478125019169438e-05,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049188874661922455,
|
|
"step": 2230,
|
|
"valid_targets_mean": 5809.1,
|
|
"valid_targets_min": 2841
|
|
},
|
|
{
|
|
"epoch": 2.411151079136691,
|
|
"grad_norm": 0.16915665190595466,
|
|
"learning_rate": 2.47080805301014e-05,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05089600011706352,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5502.1,
|
|
"valid_targets_min": 2911
|
|
},
|
|
{
|
|
"epoch": 2.416546762589928,
|
|
"grad_norm": 0.20370488788725408,
|
|
"learning_rate": 2.4634844095223217e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04950815439224243,
|
|
"step": 2240,
|
|
"valid_targets_mean": 5274.9,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 2.4219424460431656,
|
|
"grad_norm": 0.18414832274791612,
|
|
"learning_rate": 2.4561541925750085e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04702651873230934,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4966.8,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 2.4273381294964027,
|
|
"grad_norm": 0.18851110270652716,
|
|
"learning_rate": 2.4488175061304547e-05,
|
|
"loss": 0.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05456995964050293,
|
|
"step": 2250,
|
|
"valid_targets_mean": 5898.0,
|
|
"valid_targets_min": 3337
|
|
},
|
|
{
|
|
"epoch": 2.4327338129496403,
|
|
"grad_norm": 0.1866594412423347,
|
|
"learning_rate": 2.44147445424267e-05,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0492844358086586,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5214.2,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 2.438129496402878,
|
|
"grad_norm": 0.21509382440220742,
|
|
"learning_rate": 2.434125141055943e-05,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06625856459140778,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3787.2,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 2.443525179856115,
|
|
"grad_norm": 0.3346216949840149,
|
|
"learning_rate": 2.4267696708033645e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09866371750831604,
|
|
"step": 2265,
|
|
"valid_targets_mean": 2707.7,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 2.4489208633093527,
|
|
"grad_norm": 0.3209924289805423,
|
|
"learning_rate": 2.4194081478053487e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10660812258720398,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3315.3,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 2.45431654676259,
|
|
"grad_norm": 0.36543678467188445,
|
|
"learning_rate": 2.4120406764681565e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09318758547306061,
|
|
"step": 2275,
|
|
"valid_targets_mean": 2977.7,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 2.4597122302158274,
|
|
"grad_norm": 0.3447594759513527,
|
|
"learning_rate": 2.4046673612824076e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08789114654064178,
|
|
"step": 2280,
|
|
"valid_targets_mean": 2580.0,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 2.4651079136690646,
|
|
"grad_norm": 0.3345741553220532,
|
|
"learning_rate": 2.3972883068216072e-05,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07869979739189148,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2145.7,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 2.470503597122302,
|
|
"grad_norm": 0.33405721485477824,
|
|
"learning_rate": 2.389903617740658e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09479786455631256,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3349.0,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 2.4758992805755398,
|
|
"grad_norm": 0.34287805788217235,
|
|
"learning_rate": 2.3825133987743748e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0783834457397461,
|
|
"step": 2295,
|
|
"valid_targets_mean": 2337.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 2.481294964028777,
|
|
"grad_norm": 0.3151214309066818,
|
|
"learning_rate": 2.3751177547360057e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08666113764047623,
|
|
"step": 2300,
|
|
"valid_targets_mean": 2693.7,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 2.4866906474820145,
|
|
"grad_norm": 0.3225187279410181,
|
|
"learning_rate": 2.367716790515735e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09313815087080002,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3172.9,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 2.4920863309352517,
|
|
"grad_norm": 0.3062610084816234,
|
|
"learning_rate": 2.3603106110792063e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07530894130468369,
|
|
"step": 2310,
|
|
"valid_targets_mean": 2427.7,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 2.4974820143884893,
|
|
"grad_norm": 0.3007538106630295,
|
|
"learning_rate": 2.3528993214660262e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08663170039653778,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3079.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 2.5028776978417264,
|
|
"grad_norm": 0.30388439748890966,
|
|
"learning_rate": 2.3454830267882792e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08898089826107025,
|
|
"step": 2320,
|
|
"valid_targets_mean": 2843.0,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 2.508273381294964,
|
|
"grad_norm": 0.29160528511334227,
|
|
"learning_rate": 2.3380618322290345e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06712646037340164,
|
|
"step": 2325,
|
|
"valid_targets_mean": 2625.9,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 2.5136690647482016,
|
|
"grad_norm": 0.3084600876323316,
|
|
"learning_rate": 2.3306358430408536e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07984612137079239,
|
|
"step": 2330,
|
|
"valid_targets_mean": 2795.7,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 2.5190647482014388,
|
|
"grad_norm": 0.3599556711116708,
|
|
"learning_rate": 2.3232051645443016e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1011911928653717,
|
|
"step": 2335,
|
|
"valid_targets_mean": 2658.8,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 2.5244604316546764,
|
|
"grad_norm": 0.30299427674161883,
|
|
"learning_rate": 2.3157699021264475e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10779663920402527,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3810.7,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 2.5298561151079135,
|
|
"grad_norm": 0.3405896311872269,
|
|
"learning_rate": 2.3083301612393748e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11688866466283798,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3041.2,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 2.535251798561151,
|
|
"grad_norm": 0.30526223931244434,
|
|
"learning_rate": 2.300886047398683e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08250236511230469,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3060.2,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 2.5406474820143883,
|
|
"grad_norm": 0.33912286191908014,
|
|
"learning_rate": 2.2934376661819923e-05,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08803585171699524,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2596.9,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 2.546043165467626,
|
|
"grad_norm": 0.29324617016403226,
|
|
"learning_rate": 2.285985123227445e-05,
|
|
"loss": 0.4422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15671974420547485,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4241.0,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 2.5514388489208635,
|
|
"grad_norm": 0.302974981448077,
|
|
"learning_rate": 2.2785285242322085e-05,
|
|
"loss": 0.4509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14567139744758606,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4306.0,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 2.5568345323741006,
|
|
"grad_norm": 0.2827631924577153,
|
|
"learning_rate": 2.271067974950976e-05,
|
|
"loss": 0.4474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13706371188163757,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3592.4,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 2.5622302158273382,
|
|
"grad_norm": 0.29086096116713167,
|
|
"learning_rate": 2.263603581194467e-05,
|
|
"loss": 0.4384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11793051660060883,
|
|
"step": 2375,
|
|
"valid_targets_mean": 2727.4,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 2.5676258992805754,
|
|
"grad_norm": 0.2502709680578223,
|
|
"learning_rate": 2.256135448827924e-05,
|
|
"loss": 0.4383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16860954463481903,
|
|
"step": 2380,
|
|
"valid_targets_mean": 4547.5,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 2.573021582733813,
|
|
"grad_norm": 0.2696792173382063,
|
|
"learning_rate": 2.2486636837696155e-05,
|
|
"loss": 0.4568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12222659587860107,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3017.2,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 2.57841726618705,
|
|
"grad_norm": 0.31484162042789854,
|
|
"learning_rate": 2.241188391989329e-05,
|
|
"loss": 0.4312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14461642503738403,
|
|
"step": 2390,
|
|
"valid_targets_mean": 2964.7,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 2.5838129496402877,
|
|
"grad_norm": 0.2730625486872717,
|
|
"learning_rate": 2.233709679506873e-05,
|
|
"loss": 0.4077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08030517399311066,
|
|
"step": 2395,
|
|
"valid_targets_mean": 940.8,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 2.5892086330935253,
|
|
"grad_norm": 0.22725333399815317,
|
|
"learning_rate": 2.226227652390569e-05,
|
|
"loss": 0.4569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18271669745445251,
|
|
"step": 2400,
|
|
"valid_targets_mean": 5461.8,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 2.5946043165467625,
|
|
"grad_norm": 0.24560658488764234,
|
|
"learning_rate": 2.2187424167557496e-05,
|
|
"loss": 0.4266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1908819079399109,
|
|
"step": 2405,
|
|
"valid_targets_mean": 5619.4,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"grad_norm": 0.2655840527630924,
|
|
"learning_rate": 2.2112540787632514e-05,
|
|
"loss": 0.4381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12869419157505035,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3569.9,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 2.6053956834532377,
|
|
"grad_norm": 0.26855040746360276,
|
|
"learning_rate": 2.203762744617914e-05,
|
|
"loss": 0.4209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17156773805618286,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3463.0,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 2.610791366906475,
|
|
"grad_norm": 0.25409219673893313,
|
|
"learning_rate": 2.1962685205670673e-05,
|
|
"loss": 0.4187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1594664305448532,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4937.0,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 2.616187050359712,
|
|
"grad_norm": 0.2755050622444617,
|
|
"learning_rate": 2.1887715128990302e-05,
|
|
"loss": 0.4455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1451021134853363,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3598.3,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 2.6215827338129496,
|
|
"grad_norm": 0.2540931945700002,
|
|
"learning_rate": 2.1812718279415988e-05,
|
|
"loss": 0.4396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12404369562864304,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3548.3,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 2.626978417266187,
|
|
"grad_norm": 0.37738013084123756,
|
|
"learning_rate": 2.1737695720605413e-05,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1567433923482895,
|
|
"step": 2435,
|
|
"valid_targets_mean": 1681.6,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 2.6323741007194243,
|
|
"grad_norm": 0.24819586144346067,
|
|
"learning_rate": 2.1662648516580894e-05,
|
|
"loss": 0.4276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10583257675170898,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3609.7,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 2.637769784172662,
|
|
"grad_norm": 0.2596481328688287,
|
|
"learning_rate": 2.1587577731714267e-05,
|
|
"loss": 0.4355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16093915700912476,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4945.1,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 2.6431654676258995,
|
|
"grad_norm": 0.29187243310741845,
|
|
"learning_rate": 2.1512484430711833e-05,
|
|
"loss": 0.42,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14649483561515808,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3720.4,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 2.6485611510791367,
|
|
"grad_norm": 0.26067773321809035,
|
|
"learning_rate": 2.1437369678599196e-05,
|
|
"loss": 0.4445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1730954945087433,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4374.7,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 2.653956834532374,
|
|
"grad_norm": 0.24436359497859442,
|
|
"learning_rate": 2.136223454070622e-05,
|
|
"loss": 0.4257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12462647259235382,
|
|
"step": 2460,
|
|
"valid_targets_mean": 4073.6,
|
|
"valid_targets_min": 1764
|
|
},
|
|
{
|
|
"epoch": 2.6593525179856115,
|
|
"grad_norm": 0.19511981977381054,
|
|
"learning_rate": 2.1287080082651896e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07759175449609756,
|
|
"step": 2465,
|
|
"valid_targets_mean": 6295.9,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 2.664748201438849,
|
|
"grad_norm": 0.2047735932154193,
|
|
"learning_rate": 2.1211907370329213e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08802257478237152,
|
|
"step": 2470,
|
|
"valid_targets_mean": 6297.3,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 2.670143884892086,
|
|
"grad_norm": 0.21235941752325105,
|
|
"learning_rate": 2.1136717469890067e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07533913850784302,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5215.5,
|
|
"valid_targets_min": 184
|
|
},
|
|
{
|
|
"epoch": 2.675539568345324,
|
|
"grad_norm": 0.19378717290144606,
|
|
"learning_rate": 2.1061511447730125e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09396026283502579,
|
|
"step": 2480,
|
|
"valid_targets_mean": 6649.1,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 2.6809352517985614,
|
|
"grad_norm": 0.2237767602052978,
|
|
"learning_rate": 2.0986290370473694e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04603438079357147,
|
|
"step": 2485,
|
|
"valid_targets_mean": 1256.4,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 2.6863309352517986,
|
|
"grad_norm": 0.19641636758899317,
|
|
"learning_rate": 2.091105530495861e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08663879334926605,
|
|
"step": 2490,
|
|
"valid_targets_mean": 6641.7,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 2.6917266187050357,
|
|
"grad_norm": 0.17914583808472495,
|
|
"learning_rate": 2.0835807318221097e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08184070885181427,
|
|
"step": 2495,
|
|
"valid_targets_mean": 6290.7,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 2.6971223021582733,
|
|
"grad_norm": 0.17340336160504974,
|
|
"learning_rate": 2.076054747748066e-05,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09275300800800323,
|
|
"step": 2500,
|
|
"valid_targets_mean": 7629.2,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 2.702517985611511,
|
|
"grad_norm": 0.17713137894836306,
|
|
"learning_rate": 2.0685276850124883e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09552741050720215,
|
|
"step": 2505,
|
|
"valid_targets_mean": 7520.3,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 2.707913669064748,
|
|
"grad_norm": 0.19467476587217702,
|
|
"learning_rate": 2.060999650369437e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11022981256246567,
|
|
"step": 2510,
|
|
"valid_targets_mean": 7877.2,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 2.7133093525179857,
|
|
"grad_norm": 0.17435694923368494,
|
|
"learning_rate": 2.0534707505867552e-05,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07876952737569809,
|
|
"step": 2515,
|
|
"valid_targets_mean": 6407.7,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 2.7187050359712233,
|
|
"grad_norm": 0.17198201782215813,
|
|
"learning_rate": 2.0459410924445548e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12855146825313568,
|
|
"step": 2520,
|
|
"valid_targets_mean": 8744.6,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 2.7241007194244604,
|
|
"grad_norm": 0.1763122993748313,
|
|
"learning_rate": 2.038410782733707e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07334549725055695,
|
|
"step": 2525,
|
|
"valid_targets_mean": 5903.2,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 2.7294964028776976,
|
|
"grad_norm": 0.17096824640193334,
|
|
"learning_rate": 2.03087992825432e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07596874237060547,
|
|
"step": 2530,
|
|
"valid_targets_mean": 5620.8,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 2.734892086330935,
|
|
"grad_norm": 0.18687867633338442,
|
|
"learning_rate": 2.023348635814231e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07282720506191254,
|
|
"step": 2535,
|
|
"valid_targets_mean": 6053.4,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 2.7402877697841728,
|
|
"grad_norm": 0.17921535199134472,
|
|
"learning_rate": 2.015817012227486e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07806003093719482,
|
|
"step": 2540,
|
|
"valid_targets_mean": 5808.6,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 2.74568345323741,
|
|
"grad_norm": 0.2805053460313604,
|
|
"learning_rate": 2.0082851643128308e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06770668923854828,
|
|
"step": 2545,
|
|
"valid_targets_mean": 1165.8,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 2.7510791366906475,
|
|
"grad_norm": 0.18479653481858851,
|
|
"learning_rate": 2.0007531988921912e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07146448642015457,
|
|
"step": 2550,
|
|
"valid_targets_mean": 6545.5,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 2.756474820143885,
|
|
"grad_norm": 0.18206808069809832,
|
|
"learning_rate": 1.9932212227891594e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0848810076713562,
|
|
"step": 2555,
|
|
"valid_targets_mean": 7082.3,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 2.7618705035971223,
|
|
"grad_norm": 0.21900500356527897,
|
|
"learning_rate": 1.9856893428274777e-05,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05731198191642761,
|
|
"step": 2560,
|
|
"valid_targets_mean": 5055.3,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 2.76726618705036,
|
|
"grad_norm": 0.22190272663202767,
|
|
"learning_rate": 1.978157665829529e-05,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056094370782375336,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5057.8,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 2.772661870503597,
|
|
"grad_norm": 0.19901412842240734,
|
|
"learning_rate": 1.9706262986148118e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05947216600179672,
|
|
"step": 2570,
|
|
"valid_targets_mean": 4976.6,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 2.7780575539568346,
|
|
"grad_norm": 0.21964490563275182,
|
|
"learning_rate": 1.9630953479984373e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056389324367046356,
|
|
"step": 2575,
|
|
"valid_targets_mean": 4991.0,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 2.783453237410072,
|
|
"grad_norm": 0.2006456015374971,
|
|
"learning_rate": 1.955564920789603e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061025287955999374,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4929.2,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 2.7888489208633094,
|
|
"grad_norm": 0.20179770750838041,
|
|
"learning_rate": 1.9480351237900872e-05,
|
|
"loss": 0.1676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05790459364652634,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4816.0,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 2.794244604316547,
|
|
"grad_norm": 0.1936638563592105,
|
|
"learning_rate": 1.940506063792726e-05,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056022923439741135,
|
|
"step": 2590,
|
|
"valid_targets_mean": 5106.1,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 2.799640287769784,
|
|
"grad_norm": 0.1807682253470473,
|
|
"learning_rate": 1.9329778475799063e-05,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0503389909863472,
|
|
"step": 2595,
|
|
"valid_targets_mean": 5717.9,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 2.8050359712230217,
|
|
"grad_norm": 0.20027298513950031,
|
|
"learning_rate": 1.925450581922047e-05,
|
|
"loss": 0.1644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049683425575494766,
|
|
"step": 2600,
|
|
"valid_targets_mean": 4223.5,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 2.810431654676259,
|
|
"grad_norm": 0.1954172099406615,
|
|
"learning_rate": 1.9179243735760836e-05,
|
|
"loss": 0.1639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059270285069942474,
|
|
"step": 2605,
|
|
"valid_targets_mean": 5358.9,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 2.8158273381294965,
|
|
"grad_norm": 0.20831053448190678,
|
|
"learning_rate": 1.9103993292839584e-05,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06249257177114487,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4907.8,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 2.8212230215827336,
|
|
"grad_norm": 0.19081109890938494,
|
|
"learning_rate": 1.9028755557711043e-05,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05977249890565872,
|
|
"step": 2615,
|
|
"valid_targets_mean": 5368.9,
|
|
"valid_targets_min": 207
|
|
},
|
|
{
|
|
"epoch": 2.8266187050359712,
|
|
"grad_norm": 0.1876885049333318,
|
|
"learning_rate": 1.8953531597449284e-05,
|
|
"loss": 0.1634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04898893088102341,
|
|
"step": 2620,
|
|
"valid_targets_mean": 5002.8,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 2.832014388489209,
|
|
"grad_norm": 0.22064068293748118,
|
|
"learning_rate": 1.887832247893304e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05769013985991478,
|
|
"step": 2625,
|
|
"valid_targets_mean": 4689.6,
|
|
"valid_targets_min": 199
|
|
},
|
|
{
|
|
"epoch": 2.837410071942446,
|
|
"grad_norm": 0.21590483770775484,
|
|
"learning_rate": 1.8803129268830547e-05,
|
|
"loss": 0.1647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05320272594690323,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4091.0,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 2.8428057553956836,
|
|
"grad_norm": 0.19317207915801277,
|
|
"learning_rate": 1.8727953033584408e-05,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05065399408340454,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4954.5,
|
|
"valid_targets_min": 174
|
|
},
|
|
{
|
|
"epoch": 2.8482014388489207,
|
|
"grad_norm": 0.18113616600158086,
|
|
"learning_rate": 1.865279483939647e-05,
|
|
"loss": 0.1688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05231694132089615,
|
|
"step": 2640,
|
|
"valid_targets_mean": 6080.0,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 2.8535971223021583,
|
|
"grad_norm": 0.19183317497424815,
|
|
"learning_rate": 1.857765575221272e-05,
|
|
"loss": 0.1639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051487114280462265,
|
|
"step": 2645,
|
|
"valid_targets_mean": 5143.1,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 2.8589928057553955,
|
|
"grad_norm": 0.19032974483928453,
|
|
"learning_rate": 1.850253683770816e-05,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05299080163240433,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4991.2,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 2.864388489208633,
|
|
"grad_norm": 0.19536319487376552,
|
|
"learning_rate": 1.8427439161271677e-05,
|
|
"loss": 0.1608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05653288960456848,
|
|
"step": 2655,
|
|
"valid_targets_mean": 5415.2,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 2.8697841726618707,
|
|
"grad_norm": 0.20241230862523865,
|
|
"learning_rate": 1.835236378799095e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054744653403759,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4573.3,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 2.875179856115108,
|
|
"grad_norm": 0.25886046457386086,
|
|
"learning_rate": 1.827731178263734e-05,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06750328838825226,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3596.5,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 2.8805755395683454,
|
|
"grad_norm": 0.24179822858699052,
|
|
"learning_rate": 1.8202284209650795e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06854850053787231,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4110.0,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 2.8859712230215826,
|
|
"grad_norm": 0.24923322346664661,
|
|
"learning_rate": 1.8127282133124725e-05,
|
|
"loss": 0.1991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07103882730007172,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3978.1,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 2.89136690647482,
|
|
"grad_norm": 0.23392671402360837,
|
|
"learning_rate": 1.805230661679096e-05,
|
|
"loss": 0.198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0668768435716629,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3586.4,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 2.8967625899280574,
|
|
"grad_norm": 0.22841580235364703,
|
|
"learning_rate": 1.7977358724004614e-05,
|
|
"loss": 0.1963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057418979704380035,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3404.6,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 2.902158273381295,
|
|
"grad_norm": 0.2297428241404763,
|
|
"learning_rate": 1.7902439517729018e-05,
|
|
"loss": 0.1937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06158944219350815,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3613.8,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 2.9075539568345325,
|
|
"grad_norm": 0.22139505222001168,
|
|
"learning_rate": 1.7827550060520672e-05,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06660352647304535,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4137.4,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 2.9129496402877697,
|
|
"grad_norm": 0.21825241773590015,
|
|
"learning_rate": 1.7752691414514152e-05,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06937259435653687,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3908.7,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 2.9183453237410073,
|
|
"grad_norm": 0.24333957517701515,
|
|
"learning_rate": 1.767786464140704e-05,
|
|
"loss": 0.1916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05929997190833092,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3654.4,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 2.9237410071942445,
|
|
"grad_norm": 0.2262414595930043,
|
|
"learning_rate": 1.7603070802444864e-05,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06109467148780823,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3634.9,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 2.929136690647482,
|
|
"grad_norm": 0.23945224036472848,
|
|
"learning_rate": 1.7528310958406074e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05540970340371132,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3326.6,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 2.934532374100719,
|
|
"grad_norm": 0.2311610284301094,
|
|
"learning_rate": 1.7453586169586984e-05,
|
|
"loss": 0.1893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06149996817111969,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3783.7,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 2.939928057553957,
|
|
"grad_norm": 0.22409066423162088,
|
|
"learning_rate": 1.7378897495786703e-05,
|
|
"loss": 0.1884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0680890679359436,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3980.5,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 2.9453237410071944,
|
|
"grad_norm": 0.21798501442696328,
|
|
"learning_rate": 1.7304245996292163e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058000218123197556,
|
|
"step": 2730,
|
|
"valid_targets_mean": 3953.6,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 2.9507194244604316,
|
|
"grad_norm": 0.2362697377554128,
|
|
"learning_rate": 1.722963272986304e-05,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06063360720872879,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3690.6,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 2.956115107913669,
|
|
"grad_norm": 0.34632671242791885,
|
|
"learning_rate": 1.7155058754716794e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08828511834144592,
|
|
"step": 2740,
|
|
"valid_targets_mean": 7081.2,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 2.9615107913669063,
|
|
"grad_norm": 0.2930052013481956,
|
|
"learning_rate": 1.708052512851358e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07716353982686996,
|
|
"step": 2745,
|
|
"valid_targets_mean": 6851.4,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 2.966906474820144,
|
|
"grad_norm": 0.23595198263480982,
|
|
"learning_rate": 1.700603290834134e-05,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08548437803983688,
|
|
"step": 2750,
|
|
"valid_targets_mean": 7234.0,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 2.972302158273381,
|
|
"grad_norm": 0.25171416116524065,
|
|
"learning_rate": 1.6931583150700747e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07578200101852417,
|
|
"step": 2755,
|
|
"valid_targets_mean": 5840.9,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 2.9776978417266187,
|
|
"grad_norm": 0.2177299131400412,
|
|
"learning_rate": 1.6857176911490243e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07491976022720337,
|
|
"step": 2760,
|
|
"valid_targets_mean": 6519.6,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 2.9830935251798563,
|
|
"grad_norm": 0.1916975928233975,
|
|
"learning_rate": 1.6782815245991055e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06933717429637909,
|
|
"step": 2765,
|
|
"valid_targets_mean": 6225.4,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 2.9884892086330934,
|
|
"grad_norm": 0.2074453733209192,
|
|
"learning_rate": 1.670849920885225e-05,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07327165454626083,
|
|
"step": 2770,
|
|
"valid_targets_mean": 6809.8,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 2.993884892086331,
|
|
"grad_norm": 0.19796181710528035,
|
|
"learning_rate": 1.663422985407576e-05,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06189326196908951,
|
|
"step": 2775,
|
|
"valid_targets_mean": 5612.7,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 2.9992805755395686,
|
|
"grad_norm": 0.1993611811033315,
|
|
"learning_rate": 1.6560008235001413e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0739506334066391,
|
|
"step": 2780,
|
|
"valid_targets_mean": 6633.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 3.00431654676259,
|
|
"grad_norm": 1.4721439528541718,
|
|
"learning_rate": 1.6485835404292037e-05,
|
|
"loss": 0.5719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17806357145309448,
|
|
"step": 2785,
|
|
"valid_targets_mean": 6954.8,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 3.0097122302158272,
|
|
"grad_norm": 0.7880878962809471,
|
|
"learning_rate": 1.6411712413918513e-05,
|
|
"loss": 0.4832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1704634726047516,
|
|
"step": 2790,
|
|
"valid_targets_mean": 8220.0,
|
|
"valid_targets_min": 2842
|
|
},
|
|
{
|
|
"epoch": 3.015107913669065,
|
|
"grad_norm": 0.5377423389247046,
|
|
"learning_rate": 1.6337640315144826e-05,
|
|
"loss": 0.428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12837132811546326,
|
|
"step": 2795,
|
|
"valid_targets_mean": 7166.5,
|
|
"valid_targets_min": 2501
|
|
},
|
|
{
|
|
"epoch": 3.020503597122302,
|
|
"grad_norm": 0.3832314104388192,
|
|
"learning_rate": 1.6263620158513206e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12578965723514557,
|
|
"step": 2800,
|
|
"valid_targets_mean": 6987.4,
|
|
"valid_targets_min": 1709
|
|
},
|
|
{
|
|
"epoch": 3.0258992805755396,
|
|
"grad_norm": 0.2674147101713292,
|
|
"learning_rate": 1.6189652993829196e-05,
|
|
"loss": 0.3686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12702161073684692,
|
|
"step": 2805,
|
|
"valid_targets_mean": 7609.8,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 3.031294964028777,
|
|
"grad_norm": 0.24270014346313198,
|
|
"learning_rate": 1.6115739870146768e-05,
|
|
"loss": 0.3673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11748575419187546,
|
|
"step": 2810,
|
|
"valid_targets_mean": 7067.3,
|
|
"valid_targets_min": 2449
|
|
},
|
|
{
|
|
"epoch": 3.0366906474820143,
|
|
"grad_norm": 0.2310840203140876,
|
|
"learning_rate": 1.604188183575344e-05,
|
|
"loss": 0.358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10949267446994781,
|
|
"step": 2815,
|
|
"valid_targets_mean": 7091.4,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 3.042086330935252,
|
|
"grad_norm": 0.2028949133850599,
|
|
"learning_rate": 1.5968079938155427e-05,
|
|
"loss": 0.3477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12724344432353973,
|
|
"step": 2820,
|
|
"valid_targets_mean": 7742.7,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 3.047482014388489,
|
|
"grad_norm": 0.21085918179655694,
|
|
"learning_rate": 1.589433522406278e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1323000192642212,
|
|
"step": 2825,
|
|
"valid_targets_mean": 8509.0,
|
|
"valid_targets_min": 3043
|
|
},
|
|
{
|
|
"epoch": 3.0528776978417267,
|
|
"grad_norm": 0.22281643375619622,
|
|
"learning_rate": 1.58206487393745e-05,
|
|
"loss": 0.3482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11260676383972168,
|
|
"step": 2830,
|
|
"valid_targets_mean": 6931.7,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 3.058273381294964,
|
|
"grad_norm": 0.19236474200741302,
|
|
"learning_rate": 1.5747021529163777e-05,
|
|
"loss": 0.338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10623641312122345,
|
|
"step": 2835,
|
|
"valid_targets_mean": 9712.0,
|
|
"valid_targets_min": 2685
|
|
},
|
|
{
|
|
"epoch": 3.0636690647482014,
|
|
"grad_norm": 0.18059689326800968,
|
|
"learning_rate": 1.56734546376631e-05,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10958150774240494,
|
|
"step": 2840,
|
|
"valid_targets_mean": 9876.8,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 3.069064748201439,
|
|
"grad_norm": 0.18465697352218607,
|
|
"learning_rate": 1.5599949108249503e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10314854979515076,
|
|
"step": 2845,
|
|
"valid_targets_mean": 9530.9,
|
|
"valid_targets_min": 5556
|
|
},
|
|
{
|
|
"epoch": 3.074460431654676,
|
|
"grad_norm": 0.1915781326838779,
|
|
"learning_rate": 1.552650598342971e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10979904234409332,
|
|
"step": 2850,
|
|
"valid_targets_mean": 9594.7,
|
|
"valid_targets_min": 4603
|
|
},
|
|
{
|
|
"epoch": 3.079856115107914,
|
|
"grad_norm": 0.18137263501670903,
|
|
"learning_rate": 1.54531263048254e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10585586726665497,
|
|
"step": 2855,
|
|
"valid_targets_mean": 9466.0,
|
|
"valid_targets_min": 6389
|
|
},
|
|
{
|
|
"epoch": 3.085251798561151,
|
|
"grad_norm": 0.18103810142781393,
|
|
"learning_rate": 1.5379811113158413e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1029638946056366,
|
|
"step": 2860,
|
|
"valid_targets_mean": 9956.4,
|
|
"valid_targets_min": 4744
|
|
},
|
|
{
|
|
"epoch": 3.0906474820143885,
|
|
"grad_norm": 0.18241325983790493,
|
|
"learning_rate": 1.530656144823597e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10875703394412994,
|
|
"step": 2865,
|
|
"valid_targets_mean": 9960.5,
|
|
"valid_targets_min": 6172
|
|
},
|
|
{
|
|
"epoch": 3.0960431654676257,
|
|
"grad_norm": 0.1945511189321989,
|
|
"learning_rate": 1.5233378348935972e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10400672256946564,
|
|
"step": 2870,
|
|
"valid_targets_mean": 9848.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.1014388489208633,
|
|
"grad_norm": 0.18722365330706534,
|
|
"learning_rate": 1.5160262853192231e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10277149081230164,
|
|
"step": 2875,
|
|
"valid_targets_mean": 9407.0,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 3.106834532374101,
|
|
"grad_norm": 0.1895327196517961,
|
|
"learning_rate": 1.5087215997979754e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10089821368455887,
|
|
"step": 2880,
|
|
"valid_targets_mean": 9471.5,
|
|
"valid_targets_min": 4297
|
|
},
|
|
{
|
|
"epoch": 3.112230215827338,
|
|
"grad_norm": 0.2041207074883115,
|
|
"learning_rate": 1.501423881930004e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10081613808870316,
|
|
"step": 2885,
|
|
"valid_targets_mean": 8577.6,
|
|
"valid_targets_min": 3933
|
|
},
|
|
{
|
|
"epoch": 3.1176258992805757,
|
|
"grad_norm": 0.19253395239706503,
|
|
"learning_rate": 1.4941332352166385e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10833731293678284,
|
|
"step": 2890,
|
|
"valid_targets_mean": 9960.7,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 3.123021582733813,
|
|
"grad_norm": 0.17754044218543655,
|
|
"learning_rate": 1.4868497630589222e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09834058582782745,
|
|
"step": 2895,
|
|
"valid_targets_mean": 9948.3,
|
|
"valid_targets_min": 3344
|
|
},
|
|
{
|
|
"epoch": 3.1284172661870504,
|
|
"grad_norm": 0.1811977755795112,
|
|
"learning_rate": 1.4795735687561406e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09427311271429062,
|
|
"step": 2900,
|
|
"valid_targets_mean": 9342.2,
|
|
"valid_targets_min": 3715
|
|
},
|
|
{
|
|
"epoch": 3.133812949640288,
|
|
"grad_norm": 0.1897170658353231,
|
|
"learning_rate": 1.4723047555043626e-05,
|
|
"loss": 0.318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10737341642379761,
|
|
"step": 2905,
|
|
"valid_targets_mean": 9508.4,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 3.139208633093525,
|
|
"grad_norm": 0.1673031348554612,
|
|
"learning_rate": 1.4650434263949725e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09941360354423523,
|
|
"step": 2910,
|
|
"valid_targets_mean": 9982.4,
|
|
"valid_targets_min": 3744
|
|
},
|
|
{
|
|
"epoch": 3.1446043165467628,
|
|
"grad_norm": 0.1732946307770958,
|
|
"learning_rate": 1.4577896844132102e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10349894315004349,
|
|
"step": 2915,
|
|
"valid_targets_mean": 9832.3,
|
|
"valid_targets_min": 5100
|
|
},
|
|
{
|
|
"epoch": 3.15,
|
|
"grad_norm": 0.22987021086079454,
|
|
"learning_rate": 1.4505436324367073e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10665814578533173,
|
|
"step": 2920,
|
|
"valid_targets_mean": 10835.8,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 3.1553956834532375,
|
|
"grad_norm": 0.1856961185924119,
|
|
"learning_rate": 1.4433053732340331e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09954410791397095,
|
|
"step": 2925,
|
|
"valid_targets_mean": 10256.2,
|
|
"valid_targets_min": 5379
|
|
},
|
|
{
|
|
"epoch": 3.1607913669064747,
|
|
"grad_norm": 0.18638332042444405,
|
|
"learning_rate": 1.436075009463234e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08939959108829498,
|
|
"step": 2930,
|
|
"valid_targets_mean": 9251.9,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 3.1661870503597123,
|
|
"grad_norm": 0.19354905534708294,
|
|
"learning_rate": 1.4288526436703762e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1079421415925026,
|
|
"step": 2935,
|
|
"valid_targets_mean": 11007.1,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 3.17158273381295,
|
|
"grad_norm": 0.1826186216376976,
|
|
"learning_rate": 1.4216383782880935e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10259517282247543,
|
|
"step": 2940,
|
|
"valid_targets_mean": 9657.8,
|
|
"valid_targets_min": 2733
|
|
},
|
|
{
|
|
"epoch": 3.176978417266187,
|
|
"grad_norm": 0.18696471264333897,
|
|
"learning_rate": 1.4144323156341351e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12087966501712799,
|
|
"step": 2945,
|
|
"valid_targets_mean": 10753.2,
|
|
"valid_targets_min": 5581
|
|
},
|
|
{
|
|
"epoch": 3.1823741007194246,
|
|
"grad_norm": 0.19975348122239708,
|
|
"learning_rate": 1.4072345579099135e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09997809678316116,
|
|
"step": 2950,
|
|
"valid_targets_mean": 9586.8,
|
|
"valid_targets_min": 3841
|
|
},
|
|
{
|
|
"epoch": 3.1877697841726618,
|
|
"grad_norm": 0.19032939041366578,
|
|
"learning_rate": 1.400045207199053e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09176912158727646,
|
|
"step": 2955,
|
|
"valid_targets_mean": 9004.5,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 3.1931654676258994,
|
|
"grad_norm": 0.18608810734369322,
|
|
"learning_rate": 1.3928643654659449e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09072847664356232,
|
|
"step": 2960,
|
|
"valid_targets_mean": 9294.9,
|
|
"valid_targets_min": 4130
|
|
},
|
|
{
|
|
"epoch": 3.1985611510791365,
|
|
"grad_norm": 0.19753961392791367,
|
|
"learning_rate": 1.3856921345543013e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10142326354980469,
|
|
"step": 2965,
|
|
"valid_targets_mean": 10179.0,
|
|
"valid_targets_min": 4267
|
|
},
|
|
{
|
|
"epoch": 3.203956834532374,
|
|
"grad_norm": 0.19127962304344204,
|
|
"learning_rate": 1.3785286161857068e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0897255390882492,
|
|
"step": 2970,
|
|
"valid_targets_mean": 8532.7,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 3.2093525179856117,
|
|
"grad_norm": 0.2450791493473481,
|
|
"learning_rate": 1.3713739119581817e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11320159584283829,
|
|
"step": 2975,
|
|
"valid_targets_mean": 6747.5,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 3.214748201438849,
|
|
"grad_norm": 0.24064545741780569,
|
|
"learning_rate": 1.3642281233447367e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10055461525917053,
|
|
"step": 2980,
|
|
"valid_targets_mean": 5423.5,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 3.2201438848920865,
|
|
"grad_norm": 2.195043492827009,
|
|
"learning_rate": 1.3570913516919352e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682761251926422,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3791.8,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 3.2255395683453236,
|
|
"grad_norm": 0.9484799611603877,
|
|
"learning_rate": 1.3499636982184542e-05,
|
|
"loss": 0.6847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2080811709165573,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4159.7,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 3.2309352517985612,
|
|
"grad_norm": 0.574926143037513,
|
|
"learning_rate": 1.342845264013653e-05,
|
|
"loss": 0.6109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18268665671348572,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3654.0,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 3.2363309352517984,
|
|
"grad_norm": 0.5309210952372077,
|
|
"learning_rate": 1.3357361500361354e-05,
|
|
"loss": 0.5902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1671333909034729,
|
|
"step": 3000,
|
|
"valid_targets_mean": 2945.1,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 3.241726618705036,
|
|
"grad_norm": 0.31057399309082817,
|
|
"learning_rate": 1.3286364571123183e-05,
|
|
"loss": 0.5594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19803191721439362,
|
|
"step": 3005,
|
|
"valid_targets_mean": 4179.1,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 3.2471223021582736,
|
|
"grad_norm": 0.2738817928407398,
|
|
"learning_rate": 1.321546285935005e-05,
|
|
"loss": 0.5225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16975221037864685,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4048.5,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 3.2525179856115107,
|
|
"grad_norm": 0.2669583079265725,
|
|
"learning_rate": 1.3144657370619522e-05,
|
|
"loss": 0.5143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16530829668045044,
|
|
"step": 3015,
|
|
"valid_targets_mean": 4272.8,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 3.2579136690647483,
|
|
"grad_norm": 0.2675996697377583,
|
|
"learning_rate": 1.3073949109144492e-05,
|
|
"loss": 0.5273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25211331248283386,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4842.6,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 3.2633093525179855,
|
|
"grad_norm": 0.28089282501910373,
|
|
"learning_rate": 1.3003339077758874e-05,
|
|
"loss": 0.5155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20179277658462524,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4255.8,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 3.268705035971223,
|
|
"grad_norm": 0.25665605074031295,
|
|
"learning_rate": 1.2932828277903444e-05,
|
|
"loss": 0.5135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15966060757637024,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4050.4,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 3.2741007194244602,
|
|
"grad_norm": 0.27919049884369196,
|
|
"learning_rate": 1.2862417709611595e-05,
|
|
"loss": 0.489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15662714838981628,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3251.1,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 3.279496402877698,
|
|
"grad_norm": 0.2714130793862663,
|
|
"learning_rate": 1.2792108371495153e-05,
|
|
"loss": 0.5015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1709236353635788,
|
|
"step": 3040,
|
|
"valid_targets_mean": 3961.1,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 3.2848920863309354,
|
|
"grad_norm": 0.22885428056033116,
|
|
"learning_rate": 1.2721901260730252e-05,
|
|
"loss": 0.5032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18469157814979553,
|
|
"step": 3045,
|
|
"valid_targets_mean": 5018.0,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 3.2902877697841726,
|
|
"grad_norm": 0.2585428689083819,
|
|
"learning_rate": 1.2651797373043149e-05,
|
|
"loss": 0.5039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18414464592933655,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4231.1,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 3.29568345323741,
|
|
"grad_norm": 0.2589902865153202,
|
|
"learning_rate": 1.2581797702696142e-05,
|
|
"loss": 0.502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15420642495155334,
|
|
"step": 3055,
|
|
"valid_targets_mean": 3704.2,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 3.3010791366906473,
|
|
"grad_norm": 0.2731927331281264,
|
|
"learning_rate": 1.2511903242473406e-05,
|
|
"loss": 0.5178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1583746373653412,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3797.5,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 3.306474820143885,
|
|
"grad_norm": 0.2665022823039429,
|
|
"learning_rate": 1.2442114983666982e-05,
|
|
"loss": 0.5164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1687554568052292,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3499.0,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 3.311870503597122,
|
|
"grad_norm": 0.2259924020071165,
|
|
"learning_rate": 1.2372433916062694e-05,
|
|
"loss": 0.5046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.177450031042099,
|
|
"step": 3070,
|
|
"valid_targets_mean": 5092.9,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 3.3172661870503597,
|
|
"grad_norm": 0.2509298336388502,
|
|
"learning_rate": 1.2302861027926074e-05,
|
|
"loss": 0.5144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20815099775791168,
|
|
"step": 3075,
|
|
"valid_targets_mean": 4786.8,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 3.3226618705035973,
|
|
"grad_norm": 0.25549311769377925,
|
|
"learning_rate": 1.2233397305988413e-05,
|
|
"loss": 0.5156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17190776765346527,
|
|
"step": 3080,
|
|
"valid_targets_mean": 4219.8,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 3.3280575539568344,
|
|
"grad_norm": 0.37591818302114655,
|
|
"learning_rate": 1.2164043735432704e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05981592833995819,
|
|
"step": 3085,
|
|
"valid_targets_mean": 5702.6,
|
|
"valid_targets_min": 2936
|
|
},
|
|
{
|
|
"epoch": 3.333453237410072,
|
|
"grad_norm": 0.2468543763139429,
|
|
"learning_rate": 1.209480129987971e-05,
|
|
"loss": 0.1536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05121966451406479,
|
|
"step": 3090,
|
|
"valid_targets_mean": 5383.9,
|
|
"valid_targets_min": 3193
|
|
},
|
|
{
|
|
"epoch": 3.338848920863309,
|
|
"grad_norm": 0.21571590171688082,
|
|
"learning_rate": 1.2025670981373994e-05,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046277180314064026,
|
|
"step": 3095,
|
|
"valid_targets_mean": 5404.5,
|
|
"valid_targets_min": 3254
|
|
},
|
|
{
|
|
"epoch": 3.344244604316547,
|
|
"grad_norm": 0.17908978933295625,
|
|
"learning_rate": 1.1956653760369998e-05,
|
|
"loss": 0.1405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04676622897386551,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5669.6,
|
|
"valid_targets_min": 2339
|
|
},
|
|
{
|
|
"epoch": 3.349640287769784,
|
|
"grad_norm": 0.17695678190723851,
|
|
"learning_rate": 1.188775061571815e-05,
|
|
"loss": 0.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045716866850852966,
|
|
"step": 3105,
|
|
"valid_targets_mean": 5712.7,
|
|
"valid_targets_min": 2963
|
|
},
|
|
{
|
|
"epoch": 3.3550359712230216,
|
|
"grad_norm": 0.18059773117944575,
|
|
"learning_rate": 1.1818962524650943e-05,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041910864412784576,
|
|
"step": 3110,
|
|
"valid_targets_mean": 5044.9,
|
|
"valid_targets_min": 2759
|
|
},
|
|
{
|
|
"epoch": 3.360431654676259,
|
|
"grad_norm": 0.18917468289989336,
|
|
"learning_rate": 1.1750290462769108e-05,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04040202870965004,
|
|
"step": 3115,
|
|
"valid_targets_mean": 5219.8,
|
|
"valid_targets_min": 2654
|
|
},
|
|
{
|
|
"epoch": 3.3658273381294963,
|
|
"grad_norm": 0.17018816777127968,
|
|
"learning_rate": 1.1681735404027785e-05,
|
|
"loss": 0.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04280207306146622,
|
|
"step": 3120,
|
|
"valid_targets_mean": 5413.7,
|
|
"valid_targets_min": 3427
|
|
},
|
|
{
|
|
"epoch": 3.371223021582734,
|
|
"grad_norm": 0.1818303483764404,
|
|
"learning_rate": 1.161329832072267e-05,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04157731682062149,
|
|
"step": 3125,
|
|
"valid_targets_mean": 5462.3,
|
|
"valid_targets_min": 3458
|
|
},
|
|
{
|
|
"epoch": 3.376618705035971,
|
|
"grad_norm": 0.23056103041128162,
|
|
"learning_rate": 1.154498018347626e-05,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046163760125637054,
|
|
"step": 3130,
|
|
"valid_targets_mean": 5651.2,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 3.3820143884892087,
|
|
"grad_norm": 0.1774350498326126,
|
|
"learning_rate": 1.1476781961224088e-05,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0398004949092865,
|
|
"step": 3135,
|
|
"valid_targets_mean": 5431.2,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 3.387410071942446,
|
|
"grad_norm": 0.18490966360062808,
|
|
"learning_rate": 1.1408704621200952e-05,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04081105440855026,
|
|
"step": 3140,
|
|
"valid_targets_mean": 5013.1,
|
|
"valid_targets_min": 2521
|
|
},
|
|
{
|
|
"epoch": 3.3928057553956834,
|
|
"grad_norm": 0.17415411456862706,
|
|
"learning_rate": 1.1340749128927216e-05,
|
|
"loss": 0.1285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03824597969651222,
|
|
"step": 3145,
|
|
"valid_targets_mean": 5014.3,
|
|
"valid_targets_min": 2703
|
|
},
|
|
{
|
|
"epoch": 3.398201438848921,
|
|
"grad_norm": 0.16963093423760253,
|
|
"learning_rate": 1.1272916448195116e-05,
|
|
"loss": 0.1285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03941604495048523,
|
|
"step": 3150,
|
|
"valid_targets_mean": 5006.2,
|
|
"valid_targets_min": 2670
|
|
},
|
|
{
|
|
"epoch": 3.403597122302158,
|
|
"grad_norm": 0.17132399704257772,
|
|
"learning_rate": 1.1205207541055097e-05,
|
|
"loss": 0.128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044332653284072876,
|
|
"step": 3155,
|
|
"valid_targets_mean": 6038.5,
|
|
"valid_targets_min": 3631
|
|
},
|
|
{
|
|
"epoch": 3.4089928057553958,
|
|
"grad_norm": 0.189805137100359,
|
|
"learning_rate": 1.1137623367802149e-05,
|
|
"loss": 0.1272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04539632052183151,
|
|
"step": 3160,
|
|
"valid_targets_mean": 4937.4,
|
|
"valid_targets_min": 2598
|
|
},
|
|
{
|
|
"epoch": 3.414388489208633,
|
|
"grad_norm": 0.18308592495080148,
|
|
"learning_rate": 1.1070164886962193e-05,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04230461269617081,
|
|
"step": 3165,
|
|
"valid_targets_mean": 5467.6,
|
|
"valid_targets_min": 2990
|
|
},
|
|
{
|
|
"epoch": 3.4197841726618705,
|
|
"grad_norm": 0.1716106210115639,
|
|
"learning_rate": 1.1002833055278525e-05,
|
|
"loss": 0.1298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0428653359413147,
|
|
"step": 3170,
|
|
"valid_targets_mean": 5111.8,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 3.4251798561151077,
|
|
"grad_norm": 0.18416957273198956,
|
|
"learning_rate": 1.0935628827698186e-05,
|
|
"loss": 0.1337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04544125497341156,
|
|
"step": 3175,
|
|
"valid_targets_mean": 5763.9,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 3.4305755395683453,
|
|
"grad_norm": 0.18756785082766872,
|
|
"learning_rate": 1.0868553157358435e-05,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039061181247234344,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4922.0,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 3.435971223021583,
|
|
"grad_norm": 0.18151397773504624,
|
|
"learning_rate": 1.080160699557328e-05,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04015054181218147,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5118.9,
|
|
"valid_targets_min": 2923
|
|
},
|
|
{
|
|
"epoch": 3.44136690647482,
|
|
"grad_norm": 0.3670947302076255,
|
|
"learning_rate": 1.0734791291819921e-05,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07513515651226044,
|
|
"step": 3190,
|
|
"valid_targets_mean": 2572.3,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 3.4467625899280576,
|
|
"grad_norm": 0.34920414841427133,
|
|
"learning_rate": 1.0668106993725318e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07188170403242111,
|
|
"step": 3195,
|
|
"valid_targets_mean": 2910.9,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 3.4521582733812948,
|
|
"grad_norm": 0.3377830678885108,
|
|
"learning_rate": 1.0601555047052745e-05,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08541694283485413,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3065.4,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 3.4575539568345324,
|
|
"grad_norm": 0.3607792036500072,
|
|
"learning_rate": 1.0535136395688372e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060083627700805664,
|
|
"step": 3205,
|
|
"valid_targets_mean": 2485.7,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 3.46294964028777,
|
|
"grad_norm": 0.310614021877421,
|
|
"learning_rate": 1.0468851981627904e-05,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06349574774503708,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3224.3,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 3.468345323741007,
|
|
"grad_norm": 0.35123268019759096,
|
|
"learning_rate": 1.0402702744963146e-05,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06994736194610596,
|
|
"step": 3215,
|
|
"valid_targets_mean": 2620.0,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 3.4737410071942447,
|
|
"grad_norm": 0.3396291771640694,
|
|
"learning_rate": 1.0336689623868792e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06363092362880707,
|
|
"step": 3220,
|
|
"valid_targets_mean": 2364.1,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 3.479136690647482,
|
|
"grad_norm": 0.3599906015179587,
|
|
"learning_rate": 1.0270813554588993e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07357560843229294,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2866.9,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.4845323741007195,
|
|
"grad_norm": 0.3512068380591037,
|
|
"learning_rate": 1.0205075471424166e-05,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07829339802265167,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2796.1,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 3.489928057553957,
|
|
"grad_norm": 0.34026565974869727,
|
|
"learning_rate": 1.0139476306717704e-05,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0605873167514801,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2757.5,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 3.4953237410071942,
|
|
"grad_norm": 0.34380516129398314,
|
|
"learning_rate": 1.0074016990842753e-05,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06255850195884705,
|
|
"step": 3240,
|
|
"valid_targets_mean": 2478.7,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.5007194244604314,
|
|
"grad_norm": 0.38137126072736294,
|
|
"learning_rate": 1.0008698452189051e-05,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07222181558609009,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3050.7,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 3.506115107913669,
|
|
"grad_norm": 0.35556958015617235,
|
|
"learning_rate": 9.943521617149703e-06,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08335576951503754,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3265.0,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 3.5115107913669066,
|
|
"grad_norm": 0.3706091179653219,
|
|
"learning_rate": 9.878487410108082e-06,
|
|
"loss": 0.2143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07880270481109619,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3089.6,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 3.5169064748201437,
|
|
"grad_norm": 0.3456980312604565,
|
|
"learning_rate": 9.813596753424747e-06,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07443439960479736,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3113.7,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 3.5223021582733813,
|
|
"grad_norm": 0.3556120388389541,
|
|
"learning_rate": 9.748850567424263e-06,
|
|
"loss": 0.2101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06670576333999634,
|
|
"step": 3265,
|
|
"valid_targets_mean": 2633.3,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 3.527697841726619,
|
|
"grad_norm": 0.34815662495799243,
|
|
"learning_rate": 9.684249770382273e-06,
|
|
"loss": 0.2131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06356480717658997,
|
|
"step": 3270,
|
|
"valid_targets_mean": 2690.3,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 3.533093525179856,
|
|
"grad_norm": 0.3779135904446837,
|
|
"learning_rate": 9.619795278512367e-06,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07572119683027267,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2393.4,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 3.5384892086330937,
|
|
"grad_norm": 0.3469120955492533,
|
|
"learning_rate": 9.55548800595316e-06,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07303174585103989,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3139.9,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 3.543884892086331,
|
|
"grad_norm": 0.7544949010954228,
|
|
"learning_rate": 9.491328864755282e-06,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17298613488674164,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3908.5,
|
|
"valid_targets_min": 1510
|
|
},
|
|
{
|
|
"epoch": 3.5492805755395684,
|
|
"grad_norm": 0.3306621193580137,
|
|
"learning_rate": 9.427318764868467e-06,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17003647983074188,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4539.9,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 3.5546762589928056,
|
|
"grad_norm": 0.2655039896375555,
|
|
"learning_rate": 9.363458614128663e-06,
|
|
"loss": 0.3954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12262386083602905,
|
|
"step": 3295,
|
|
"valid_targets_mean": 4636.7,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 3.560071942446043,
|
|
"grad_norm": 0.3085314477484598,
|
|
"learning_rate": 9.29974931824509e-06,
|
|
"loss": 0.4309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13631683588027954,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3688.9,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 3.565467625899281,
|
|
"grad_norm": 0.2684403979256175,
|
|
"learning_rate": 9.236191780787465e-06,
|
|
"loss": 0.4084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13161887228488922,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3569.2,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 3.570863309352518,
|
|
"grad_norm": 0.27976524104990674,
|
|
"learning_rate": 9.172786903173174e-06,
|
|
"loss": 0.427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1613619327545166,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3664.9,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 3.5762589928057555,
|
|
"grad_norm": 0.25541511500336656,
|
|
"learning_rate": 9.109535584654455e-06,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1270734965801239,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4255.6,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 3.5816546762589927,
|
|
"grad_norm": 0.2551906934859995,
|
|
"learning_rate": 9.046438722305665e-06,
|
|
"loss": 0.3882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11989418417215347,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3859.6,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 3.5870503597122303,
|
|
"grad_norm": 0.26401207627760803,
|
|
"learning_rate": 8.983497211010557e-06,
|
|
"loss": 0.4123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14510339498519897,
|
|
"step": 3325,
|
|
"valid_targets_mean": 4090.2,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 3.5924460431654675,
|
|
"grad_norm": 0.2642994713229998,
|
|
"learning_rate": 8.920711943449596e-06,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.140409916639328,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3309.9,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 3.597841726618705,
|
|
"grad_norm": 0.2364732855492538,
|
|
"learning_rate": 8.858083810087277e-06,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09750233590602875,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2271.8,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 3.6032374100719426,
|
|
"grad_norm": 0.23294370774674716,
|
|
"learning_rate": 8.795613699159509e-06,
|
|
"loss": 0.4064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12239314615726471,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4197.5,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 3.60863309352518,
|
|
"grad_norm": 0.2407367117695906,
|
|
"learning_rate": 8.733302496661035e-06,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1599186658859253,
|
|
"step": 3345,
|
|
"valid_targets_mean": 5003.2,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 3.6140287769784174,
|
|
"grad_norm": 0.26494552033783003,
|
|
"learning_rate": 8.671151086332834e-06,
|
|
"loss": 0.4059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14456617832183838,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4006.6,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 3.6194244604316546,
|
|
"grad_norm": 0.2494623491066543,
|
|
"learning_rate": 8.609160349649579e-06,
|
|
"loss": 0.3996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12199956923723221,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3161.7,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 3.624820143884892,
|
|
"grad_norm": 0.2456044339723056,
|
|
"learning_rate": 8.5473311658072e-06,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14817792177200317,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4580.7,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 3.6302158273381293,
|
|
"grad_norm": 0.27292788741147445,
|
|
"learning_rate": 8.485664411710344e-06,
|
|
"loss": 0.4109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1454233080148697,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3757.0,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 3.635611510791367,
|
|
"grad_norm": 0.2837191462935445,
|
|
"learning_rate": 8.42416096195997e-06,
|
|
"loss": 0.4052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11270572245121002,
|
|
"step": 3370,
|
|
"valid_targets_mean": 2430.5,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 3.6410071942446045,
|
|
"grad_norm": 0.30014152736317484,
|
|
"learning_rate": 8.362821688840947e-06,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.086814746260643,
|
|
"step": 3375,
|
|
"valid_targets_mean": 981.8,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 3.6464028776978417,
|
|
"grad_norm": 0.2590935747655084,
|
|
"learning_rate": 8.301647462309663e-06,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.144926518201828,
|
|
"step": 3380,
|
|
"valid_targets_mean": 4691.9,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 3.6517985611510793,
|
|
"grad_norm": 0.23716657681616593,
|
|
"learning_rate": 8.240639149981736e-06,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13629931211471558,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4924.3,
|
|
"valid_targets_min": 1620
|
|
},
|
|
{
|
|
"epoch": 3.6571942446043164,
|
|
"grad_norm": 0.37517490035393974,
|
|
"learning_rate": 8.179797617119609e-06,
|
|
"loss": 0.3564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10653556883335114,
|
|
"step": 3390,
|
|
"valid_targets_mean": 6950.2,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 3.662589928057554,
|
|
"grad_norm": 0.2106337377558918,
|
|
"learning_rate": 8.11912372662041e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08563907444477081,
|
|
"step": 3395,
|
|
"valid_targets_mean": 6872.7,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 3.667985611510791,
|
|
"grad_norm": 0.22911919996623728,
|
|
"learning_rate": 8.058618339003604e-06,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045383356511592865,
|
|
"step": 3400,
|
|
"valid_targets_mean": 1881.6,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 3.6733812949640288,
|
|
"grad_norm": 0.18403173546473453,
|
|
"learning_rate": 7.998282312398844e-06,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08383271098136902,
|
|
"step": 3405,
|
|
"valid_targets_mean": 5967.8,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 3.6787769784172664,
|
|
"grad_norm": 0.16644095792759414,
|
|
"learning_rate": 7.938116502533783e-06,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08651818335056305,
|
|
"step": 3410,
|
|
"valid_targets_mean": 7220.3,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 3.6841726618705035,
|
|
"grad_norm": 0.19940741390918143,
|
|
"learning_rate": 7.878121762721941e-06,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07390053570270538,
|
|
"step": 3415,
|
|
"valid_targets_mean": 6546.8,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 3.689568345323741,
|
|
"grad_norm": 0.16938156374137475,
|
|
"learning_rate": 7.818298943850625e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06701171398162842,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5979.1,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 3.6949640287769783,
|
|
"grad_norm": 0.17280746059379493,
|
|
"learning_rate": 7.758648894368796e-06,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08725966513156891,
|
|
"step": 3425,
|
|
"valid_targets_mean": 6519.6,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 3.700359712230216,
|
|
"grad_norm": 0.1692997666702135,
|
|
"learning_rate": 7.699172460275104e-06,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09557026624679565,
|
|
"step": 3430,
|
|
"valid_targets_mean": 6011.3,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 3.705755395683453,
|
|
"grad_norm": 0.18796310287515394,
|
|
"learning_rate": 7.639870485105887e-06,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11375115066766739,
|
|
"step": 3435,
|
|
"valid_targets_mean": 6978.6,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 3.7111510791366906,
|
|
"grad_norm": 0.16289793982164782,
|
|
"learning_rate": 7.580743809923128e-06,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06761450320482254,
|
|
"step": 3440,
|
|
"valid_targets_mean": 7419.2,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 3.716546762589928,
|
|
"grad_norm": 0.16791462206490956,
|
|
"learning_rate": 7.52179327330264e-06,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07186362147331238,
|
|
"step": 3445,
|
|
"valid_targets_mean": 6317.3,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 3.7219424460431654,
|
|
"grad_norm": 0.17601030650811558,
|
|
"learning_rate": 7.463019711322084e-06,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09529225528240204,
|
|
"step": 3450,
|
|
"valid_targets_mean": 6492.8,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 3.727338129496403,
|
|
"grad_norm": 0.1651712256613414,
|
|
"learning_rate": 7.404423957549152e-06,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07639043778181076,
|
|
"step": 3455,
|
|
"valid_targets_mean": 6018.3,
|
|
"valid_targets_min": 176
|
|
},
|
|
{
|
|
"epoch": 3.73273381294964,
|
|
"grad_norm": 0.193512145368139,
|
|
"learning_rate": 7.346006843029733e-06,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06116504222154617,
|
|
"step": 3460,
|
|
"valid_targets_mean": 2794.3,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 3.7381294964028777,
|
|
"grad_norm": 0.17987845249553192,
|
|
"learning_rate": 7.287769196276129e-06,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07108516246080399,
|
|
"step": 3465,
|
|
"valid_targets_mean": 5186.0,
|
|
"valid_targets_min": 195
|
|
},
|
|
{
|
|
"epoch": 3.743525179856115,
|
|
"grad_norm": 0.17410329990454582,
|
|
"learning_rate": 7.229711843255321e-06,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06696879118680954,
|
|
"step": 3470,
|
|
"valid_targets_mean": 5268.3,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 3.7489208633093525,
|
|
"grad_norm": 0.17210706802820935,
|
|
"learning_rate": 7.171835607377206e-06,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09211978316307068,
|
|
"step": 3475,
|
|
"valid_targets_mean": 7726.2,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 3.75431654676259,
|
|
"grad_norm": 0.18951989635368768,
|
|
"learning_rate": 7.114141309482974e-06,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08110567927360535,
|
|
"step": 3480,
|
|
"valid_targets_mean": 6938.9,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 3.7597122302158272,
|
|
"grad_norm": 0.3619489375193364,
|
|
"learning_rate": 7.056629767833447e-06,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0695662871003151,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4534.9,
|
|
"valid_targets_min": 218
|
|
},
|
|
{
|
|
"epoch": 3.765107913669065,
|
|
"grad_norm": 0.21599595444948597,
|
|
"learning_rate": 6.999301798097462e-06,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05994763225317001,
|
|
"step": 3490,
|
|
"valid_targets_mean": 6093.9,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 3.7705035971223024,
|
|
"grad_norm": 0.20223301135077798,
|
|
"learning_rate": 6.942158213340304e-06,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05822937190532684,
|
|
"step": 3495,
|
|
"valid_targets_mean": 5790.1,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 3.7758992805755396,
|
|
"grad_norm": 0.2001441987460689,
|
|
"learning_rate": 6.885199824012194e-06,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052550069987773895,
|
|
"step": 3500,
|
|
"valid_targets_mean": 5209.1,
|
|
"valid_targets_min": 213
|
|
},
|
|
{
|
|
"epoch": 3.7812949640287767,
|
|
"grad_norm": 0.19769716816268143,
|
|
"learning_rate": 6.828427437936778e-06,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04585304483771324,
|
|
"step": 3505,
|
|
"valid_targets_mean": 4721.6,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 3.7866906474820143,
|
|
"grad_norm": 0.19506321959923253,
|
|
"learning_rate": 6.771841860299671e-06,
|
|
"loss": 0.1511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047429971396923065,
|
|
"step": 3510,
|
|
"valid_targets_mean": 5201.5,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 3.792086330935252,
|
|
"grad_norm": 0.2093791369153954,
|
|
"learning_rate": 6.715443893637032e-06,
|
|
"loss": 0.1513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05228327587246895,
|
|
"step": 3515,
|
|
"valid_targets_mean": 5476.3,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 3.797482014388489,
|
|
"grad_norm": 0.1989753228984215,
|
|
"learning_rate": 6.659234337824221e-06,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05198853090405464,
|
|
"step": 3520,
|
|
"valid_targets_mean": 4997.8,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 3.8028776978417267,
|
|
"grad_norm": 0.18681440427219523,
|
|
"learning_rate": 6.603213990064395e-06,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04533997178077698,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4857.9,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 3.8082733812949643,
|
|
"grad_norm": 0.20049913400647104,
|
|
"learning_rate": 6.5473836448772345e-06,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046048425137996674,
|
|
"step": 3530,
|
|
"valid_targets_mean": 4449.8,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 3.8136690647482014,
|
|
"grad_norm": 0.18837822856949898,
|
|
"learning_rate": 6.491744094087677e-06,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04687801003456116,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4912.2,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 3.8190647482014386,
|
|
"grad_norm": 0.18943571862206843,
|
|
"learning_rate": 6.436296126814681e-06,
|
|
"loss": 0.1496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05041279271245003,
|
|
"step": 3540,
|
|
"valid_targets_mean": 5112.4,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 3.824460431654676,
|
|
"grad_norm": 0.1897631751812687,
|
|
"learning_rate": 6.38104052946003e-06,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03828595578670502,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4016.1,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 3.829856115107914,
|
|
"grad_norm": 0.19171194124837537,
|
|
"learning_rate": 6.3259780856971844e-06,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053711626678705215,
|
|
"step": 3550,
|
|
"valid_targets_mean": 5291.6,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 3.835251798561151,
|
|
"grad_norm": 0.20589429924683475,
|
|
"learning_rate": 6.271109576460166e-06,
|
|
"loss": 0.1513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054746113717556,
|
|
"step": 3555,
|
|
"valid_targets_mean": 5184.2,
|
|
"valid_targets_min": 186
|
|
},
|
|
{
|
|
"epoch": 3.8406474820143885,
|
|
"grad_norm": 0.18665774873939112,
|
|
"learning_rate": 6.216435779932479e-06,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05191022902727127,
|
|
"step": 3560,
|
|
"valid_targets_mean": 5581.1,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 3.846043165467626,
|
|
"grad_norm": 0.18193928804597234,
|
|
"learning_rate": 6.161957471536075e-06,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05108371376991272,
|
|
"step": 3565,
|
|
"valid_targets_mean": 5395.4,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 3.8514388489208633,
|
|
"grad_norm": 0.1827803451412632,
|
|
"learning_rate": 6.107675423920372e-06,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0430062972009182,
|
|
"step": 3570,
|
|
"valid_targets_mean": 5007.3,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 3.8568345323741005,
|
|
"grad_norm": 0.19272529969571828,
|
|
"learning_rate": 6.05359040695126e-06,
|
|
"loss": 0.1508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05689335614442825,
|
|
"step": 3575,
|
|
"valid_targets_mean": 6297.9,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 3.862230215827338,
|
|
"grad_norm": 0.18551337767200662,
|
|
"learning_rate": 5.999703187700212e-06,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03685840964317322,
|
|
"step": 3580,
|
|
"valid_targets_mean": 4157.3,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 3.8676258992805757,
|
|
"grad_norm": 0.19890266402332615,
|
|
"learning_rate": 5.946014530433397e-06,
|
|
"loss": 0.1428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04012000560760498,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4138.9,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 3.873021582733813,
|
|
"grad_norm": 0.37334229053170936,
|
|
"learning_rate": 5.8925251966008355e-06,
|
|
"loss": 0.1549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06925609707832336,
|
|
"step": 3590,
|
|
"valid_targets_mean": 3849.6,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 3.8784172661870504,
|
|
"grad_norm": 0.26678812726834306,
|
|
"learning_rate": 5.839235944825603e-06,
|
|
"loss": 0.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05801556259393692,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3484.7,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 3.883812949640288,
|
|
"grad_norm": 0.2494810952230535,
|
|
"learning_rate": 5.786147530893078e-06,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06330271810293198,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3891.3,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 3.889208633093525,
|
|
"grad_norm": 0.2373337128107526,
|
|
"learning_rate": 5.7332607077402065e-06,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06238527223467827,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3766.3,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 3.8946043165467623,
|
|
"grad_norm": 0.24557888665487393,
|
|
"learning_rate": 5.680576225444856e-06,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052700042724609375,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3374.2,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 3.9,
|
|
"grad_norm": 0.24914950008563871,
|
|
"learning_rate": 5.628094831215112e-06,
|
|
"loss": 0.1751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06017230451107025,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3836.8,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 3.9053956834532375,
|
|
"grad_norm": 0.22990341415252533,
|
|
"learning_rate": 5.575817269378772e-06,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06638500094413757,
|
|
"step": 3620,
|
|
"valid_targets_mean": 4268.9,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 3.9107913669064747,
|
|
"grad_norm": 0.23392541806295633,
|
|
"learning_rate": 5.523744281372707e-06,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06184132397174835,
|
|
"step": 3625,
|
|
"valid_targets_mean": 4129.8,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 3.9161870503597123,
|
|
"grad_norm": 0.23492291992076886,
|
|
"learning_rate": 5.471876605732394e-06,
|
|
"loss": 0.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0506136417388916,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3266.7,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 3.92158273381295,
|
|
"grad_norm": 0.2461057375406083,
|
|
"learning_rate": 5.420214978081417e-06,
|
|
"loss": 0.1716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0638493075966835,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3789.9,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 3.926978417266187,
|
|
"grad_norm": 0.23907265279038917,
|
|
"learning_rate": 5.368760131121047e-06,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05548835173249245,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3599.3,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 3.9323741007194246,
|
|
"grad_norm": 0.2349336988242476,
|
|
"learning_rate": 5.3175127946198654e-06,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05123763903975487,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3419.1,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 3.9377697841726618,
|
|
"grad_norm": 0.2283847107284831,
|
|
"learning_rate": 5.266473695403356e-06,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06528165936470032,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3964.0,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 3.9431654676258994,
|
|
"grad_norm": 0.221175268663429,
|
|
"learning_rate": 5.215643557343675e-06,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050042733550071716,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3559.0,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 3.9485611510791365,
|
|
"grad_norm": 0.23900130230665878,
|
|
"learning_rate": 5.165023101349331e-06,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057939011603593826,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3680.2,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 3.953956834532374,
|
|
"grad_norm": 0.5919625244928386,
|
|
"learning_rate": 5.1146130453549525e-06,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09014970064163208,
|
|
"step": 3665,
|
|
"valid_targets_mean": 7089.3,
|
|
"valid_targets_min": 3370
|
|
},
|
|
{
|
|
"epoch": 3.9593525179856117,
|
|
"grad_norm": 0.3674093511108311,
|
|
"learning_rate": 5.0644141043111725e-06,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07007373869419098,
|
|
"step": 3670,
|
|
"valid_targets_mean": 6154.5,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 3.964748201438849,
|
|
"grad_norm": 0.24986057123395483,
|
|
"learning_rate": 5.0144269901744105e-06,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08185514807701111,
|
|
"step": 3675,
|
|
"valid_targets_mean": 7828.9,
|
|
"valid_targets_min": 3788
|
|
},
|
|
{
|
|
"epoch": 3.9701438848920865,
|
|
"grad_norm": 0.24068527391654715,
|
|
"learning_rate": 4.96465241189682e-06,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07114623486995697,
|
|
"step": 3680,
|
|
"valid_targets_mean": 6304.4,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 3.9755395683453236,
|
|
"grad_norm": 0.21453308953468134,
|
|
"learning_rate": 4.915091075416225e-06,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06761802732944489,
|
|
"step": 3685,
|
|
"valid_targets_mean": 6442.1,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 3.9809352517985612,
|
|
"grad_norm": 0.22272245677638786,
|
|
"learning_rate": 4.865743683646094e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08194775879383087,
|
|
"step": 3690,
|
|
"valid_targets_mean": 7300.7,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 3.9863309352517984,
|
|
"grad_norm": 0.2040522495305898,
|
|
"learning_rate": 4.8166109364656085e-06,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07368536293506622,
|
|
"step": 3695,
|
|
"valid_targets_mean": 7358.2,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 3.991726618705036,
|
|
"grad_norm": 0.18607900821381004,
|
|
"learning_rate": 4.767693530709674e-06,
|
|
"loss": 0.2069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06476297229528427,
|
|
"step": 3700,
|
|
"valid_targets_mean": 6207.4,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 3.9971223021582736,
|
|
"grad_norm": 0.18489439992509052,
|
|
"learning_rate": 4.718992160159104e-06,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0752868726849556,
|
|
"step": 3705,
|
|
"valid_targets_mean": 6631.0,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 4.002158273381295,
|
|
"grad_norm": 2.4144627583279457,
|
|
"learning_rate": 4.6705075155307354e-06,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17306122183799744,
|
|
"step": 3710,
|
|
"valid_targets_mean": 6370.5,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 4.007553956834532,
|
|
"grad_norm": 1.6684153499201237,
|
|
"learning_rate": 4.622240284467647e-06,
|
|
"loss": 0.5654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19864599406719208,
|
|
"step": 3715,
|
|
"valid_targets_mean": 7651.2,
|
|
"valid_targets_min": 2581
|
|
},
|
|
{
|
|
"epoch": 4.012949640287769,
|
|
"grad_norm": 0.9642944442694577,
|
|
"learning_rate": 4.574191151529415e-06,
|
|
"loss": 0.4858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1602593958377838,
|
|
"step": 3720,
|
|
"valid_targets_mean": 7917.3,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 4.018345323741007,
|
|
"grad_norm": 0.6830254598312485,
|
|
"learning_rate": 4.5263607981823855e-06,
|
|
"loss": 0.4311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1456221342086792,
|
|
"step": 3725,
|
|
"valid_targets_mean": 7465.7,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 4.0237410071942445,
|
|
"grad_norm": 0.4854257810033399,
|
|
"learning_rate": 4.478749902790025e-06,
|
|
"loss": 0.386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11903608590364456,
|
|
"step": 3730,
|
|
"valid_targets_mean": 6986.5,
|
|
"valid_targets_min": 2745
|
|
},
|
|
{
|
|
"epoch": 4.029136690647482,
|
|
"grad_norm": 0.347046726474395,
|
|
"learning_rate": 4.431359140603295e-06,
|
|
"loss": 0.3693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1039695218205452,
|
|
"step": 3735,
|
|
"valid_targets_mean": 6226.8,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 4.03453237410072,
|
|
"grad_norm": 0.2772273113222892,
|
|
"learning_rate": 4.384189183751064e-06,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1042383536696434,
|
|
"step": 3740,
|
|
"valid_targets_mean": 6504.0,
|
|
"valid_targets_min": 3466
|
|
},
|
|
{
|
|
"epoch": 4.039928057553957,
|
|
"grad_norm": 0.24453608735547125,
|
|
"learning_rate": 4.337240701230607e-06,
|
|
"loss": 0.3452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11573148518800735,
|
|
"step": 3745,
|
|
"valid_targets_mean": 7776.8,
|
|
"valid_targets_min": 2749
|
|
},
|
|
{
|
|
"epoch": 4.045323741007194,
|
|
"grad_norm": 0.21586608039984373,
|
|
"learning_rate": 4.2905143588980765e-06,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09866153448820114,
|
|
"step": 3750,
|
|
"valid_targets_mean": 6354.8,
|
|
"valid_targets_min": 1941
|
|
},
|
|
{
|
|
"epoch": 4.050719424460432,
|
|
"grad_norm": 0.19191105204259262,
|
|
"learning_rate": 4.244010819459083e-06,
|
|
"loss": 0.3396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09359422326087952,
|
|
"step": 3755,
|
|
"valid_targets_mean": 6384.4,
|
|
"valid_targets_min": 2441
|
|
},
|
|
{
|
|
"epoch": 4.056115107913669,
|
|
"grad_norm": 0.19682556379330507,
|
|
"learning_rate": 4.19773074245929e-06,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11499135196208954,
|
|
"step": 3760,
|
|
"valid_targets_mean": 7465.9,
|
|
"valid_targets_min": 2228
|
|
},
|
|
{
|
|
"epoch": 4.061510791366906,
|
|
"grad_norm": 0.19508509021880163,
|
|
"learning_rate": 4.151674784275062e-06,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09474806487560272,
|
|
"step": 3765,
|
|
"valid_targets_mean": 8703.7,
|
|
"valid_targets_min": 4392
|
|
},
|
|
{
|
|
"epoch": 4.066906474820144,
|
|
"grad_norm": 0.17061222178614796,
|
|
"learning_rate": 4.105843598104153e-06,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09427034854888916,
|
|
"step": 3770,
|
|
"valid_targets_mean": 9525.9,
|
|
"valid_targets_min": 3807
|
|
},
|
|
{
|
|
"epoch": 4.072302158273382,
|
|
"grad_norm": 0.17160340610702685,
|
|
"learning_rate": 4.060237833956446e-06,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09868845343589783,
|
|
"step": 3775,
|
|
"valid_targets_mean": 8976.7,
|
|
"valid_targets_min": 2713
|
|
},
|
|
{
|
|
"epoch": 4.077697841726619,
|
|
"grad_norm": 0.1660506816601597,
|
|
"learning_rate": 4.014858138644724e-06,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10053142160177231,
|
|
"step": 3780,
|
|
"valid_targets_mean": 9808.0,
|
|
"valid_targets_min": 5333
|
|
},
|
|
{
|
|
"epoch": 4.083093525179856,
|
|
"grad_norm": 0.16521429813205252,
|
|
"learning_rate": 3.969705155775525e-06,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09201164543628693,
|
|
"step": 3785,
|
|
"valid_targets_mean": 8583.8,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 4.088489208633094,
|
|
"grad_norm": 0.1743429431344338,
|
|
"learning_rate": 3.924779525739955e-06,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09561678022146225,
|
|
"step": 3790,
|
|
"valid_targets_mean": 8976.7,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 4.093884892086331,
|
|
"grad_norm": 0.16162255075936824,
|
|
"learning_rate": 3.88008188570468e-06,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09835436940193176,
|
|
"step": 3795,
|
|
"valid_targets_mean": 8862.6,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 4.099280575539568,
|
|
"grad_norm": 0.16984248631136462,
|
|
"learning_rate": 3.835612869602839e-06,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09938459098339081,
|
|
"step": 3800,
|
|
"valid_targets_mean": 9068.4,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 4.104676258992805,
|
|
"grad_norm": 0.16478510362001053,
|
|
"learning_rate": 3.7913731081250603e-06,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10577350854873657,
|
|
"step": 3805,
|
|
"valid_targets_mean": 10089.5,
|
|
"valid_targets_min": 5419
|
|
},
|
|
{
|
|
"epoch": 4.1100719424460435,
|
|
"grad_norm": 0.17561269067273957,
|
|
"learning_rate": 3.7473632287105322e-06,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09386192262172699,
|
|
"step": 3810,
|
|
"valid_targets_mean": 9003.8,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 4.115467625899281,
|
|
"grad_norm": 0.16591728151196222,
|
|
"learning_rate": 3.7035838555380844e-06,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10163618624210358,
|
|
"step": 3815,
|
|
"valid_targets_mean": 10012.9,
|
|
"valid_targets_min": 5197
|
|
},
|
|
{
|
|
"epoch": 4.120863309352518,
|
|
"grad_norm": 0.16246614186024727,
|
|
"learning_rate": 3.6600356095173715e-06,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10004321485757828,
|
|
"step": 3820,
|
|
"valid_targets_mean": 9826.7,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 4.126258992805756,
|
|
"grad_norm": 0.16120743976737234,
|
|
"learning_rate": 3.61671910828e-06,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08791302144527435,
|
|
"step": 3825,
|
|
"valid_targets_mean": 8842.1,
|
|
"valid_targets_min": 4239
|
|
},
|
|
{
|
|
"epoch": 4.131654676258993,
|
|
"grad_norm": 0.1687100714276889,
|
|
"learning_rate": 3.573634966170851e-06,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0904637798666954,
|
|
"step": 3830,
|
|
"valid_targets_mean": 9048.3,
|
|
"valid_targets_min": 5085
|
|
},
|
|
{
|
|
"epoch": 4.13705035971223,
|
|
"grad_norm": 0.15543439629807607,
|
|
"learning_rate": 3.5307837942393076e-06,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0958443284034729,
|
|
"step": 3835,
|
|
"valid_targets_mean": 9484.0,
|
|
"valid_targets_min": 4512
|
|
},
|
|
{
|
|
"epoch": 4.142446043165467,
|
|
"grad_norm": 0.15415266147076465,
|
|
"learning_rate": 3.4881662002305917e-06,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09960229694843292,
|
|
"step": 3840,
|
|
"valid_targets_mean": 9844.3,
|
|
"valid_targets_min": 3863
|
|
},
|
|
{
|
|
"epoch": 4.147841726618705,
|
|
"grad_norm": 0.15358476623633452,
|
|
"learning_rate": 3.4457827885771966e-06,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08859887719154358,
|
|
"step": 3845,
|
|
"valid_targets_mean": 9963.1,
|
|
"valid_targets_min": 5225
|
|
},
|
|
{
|
|
"epoch": 4.1532374100719425,
|
|
"grad_norm": 0.16508878820243172,
|
|
"learning_rate": 3.4036341603902477e-06,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08978402614593506,
|
|
"step": 3850,
|
|
"valid_targets_mean": 9133.7,
|
|
"valid_targets_min": 2782
|
|
},
|
|
{
|
|
"epoch": 4.15863309352518,
|
|
"grad_norm": 0.16896043458632393,
|
|
"learning_rate": 3.3617209134510187e-06,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10617595911026001,
|
|
"step": 3855,
|
|
"valid_targets_mean": 9841.2,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 4.164028776978418,
|
|
"grad_norm": 0.15226816259721462,
|
|
"learning_rate": 3.320043642202444e-06,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08858337253332138,
|
|
"step": 3860,
|
|
"valid_targets_mean": 9260.1,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 4.169424460431655,
|
|
"grad_norm": 0.1726541751606985,
|
|
"learning_rate": 3.278602937740676e-06,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09757785499095917,
|
|
"step": 3865,
|
|
"valid_targets_mean": 9212.2,
|
|
"valid_targets_min": 3467
|
|
},
|
|
{
|
|
"epoch": 4.174820143884892,
|
|
"grad_norm": 0.15730236563057853,
|
|
"learning_rate": 3.237399387806732e-06,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09825129806995392,
|
|
"step": 3870,
|
|
"valid_targets_mean": 10565.9,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 4.180215827338129,
|
|
"grad_norm": 0.15865269776441218,
|
|
"learning_rate": 3.1964335767781084e-06,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10032807290554047,
|
|
"step": 3875,
|
|
"valid_targets_mean": 10235.8,
|
|
"valid_targets_min": 5141
|
|
},
|
|
{
|
|
"epoch": 4.185611510791367,
|
|
"grad_norm": 0.18503196247653247,
|
|
"learning_rate": 3.1557060856605528e-06,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10101167857646942,
|
|
"step": 3880,
|
|
"valid_targets_mean": 10594.5,
|
|
"valid_targets_min": 4701
|
|
},
|
|
{
|
|
"epoch": 4.191007194244604,
|
|
"grad_norm": 0.15803701127225425,
|
|
"learning_rate": 3.115217492079774e-06,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09037478268146515,
|
|
"step": 3885,
|
|
"valid_targets_mean": 9161.7,
|
|
"valid_targets_min": 3022
|
|
},
|
|
{
|
|
"epoch": 4.1964028776978415,
|
|
"grad_norm": 0.1625560963562969,
|
|
"learning_rate": 3.0749683702732745e-06,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09223361313343048,
|
|
"step": 3890,
|
|
"valid_targets_mean": 9593.7,
|
|
"valid_targets_min": 5120
|
|
},
|
|
{
|
|
"epoch": 4.2017985611510795,
|
|
"grad_norm": 0.16436761098275438,
|
|
"learning_rate": 3.0349592910822024e-06,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08967969566583633,
|
|
"step": 3895,
|
|
"valid_targets_mean": 9896.2,
|
|
"valid_targets_min": 3759
|
|
},
|
|
{
|
|
"epoch": 4.207194244604317,
|
|
"grad_norm": 0.1631968746338814,
|
|
"learning_rate": 2.99519082194325e-06,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09515407681465149,
|
|
"step": 3900,
|
|
"valid_targets_mean": 9676.5,
|
|
"valid_targets_min": 4171
|
|
},
|
|
{
|
|
"epoch": 4.212589928057554,
|
|
"grad_norm": 0.20605164099177786,
|
|
"learning_rate": 2.9556635268806165e-06,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09326553344726562,
|
|
"step": 3905,
|
|
"valid_targets_mean": 6269.3,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 4.217985611510791,
|
|
"grad_norm": 0.21540492069674103,
|
|
"learning_rate": 2.916377966497996e-06,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09968217462301254,
|
|
"step": 3910,
|
|
"valid_targets_mean": 6197.9,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 4.223381294964029,
|
|
"grad_norm": 1.6834676052354365,
|
|
"learning_rate": 2.8773346979706307e-06,
|
|
"loss": 0.616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27662307024002075,
|
|
"step": 3915,
|
|
"valid_targets_mean": 5028.6,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 4.228776978417266,
|
|
"grad_norm": 1.3418501984584648,
|
|
"learning_rate": 2.8385342750374235e-06,
|
|
"loss": 0.6442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19028660655021667,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3933.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 4.234172661870503,
|
|
"grad_norm": 0.8810408795061826,
|
|
"learning_rate": 2.7999772479930555e-06,
|
|
"loss": 0.6138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1705428808927536,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3912.4,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 4.239568345323741,
|
|
"grad_norm": 0.5408860755160454,
|
|
"learning_rate": 2.7616641636802067e-06,
|
|
"loss": 0.5575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1930476576089859,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3427.5,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 4.2449640287769785,
|
|
"grad_norm": 0.3655672777739378,
|
|
"learning_rate": 2.723595565481787e-06,
|
|
"loss": 0.5387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14913193881511688,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4285.9,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 4.250359712230216,
|
|
"grad_norm": 0.36745117863636767,
|
|
"learning_rate": 2.6857719933132355e-06,
|
|
"loss": 0.5147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19718420505523682,
|
|
"step": 3940,
|
|
"valid_targets_mean": 4571.2,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 4.255755395683453,
|
|
"grad_norm": 0.30569634371798116,
|
|
"learning_rate": 2.648193983614862e-06,
|
|
"loss": 0.5066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14298444986343384,
|
|
"step": 3945,
|
|
"valid_targets_mean": 3360.0,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 4.261151079136691,
|
|
"grad_norm": 0.2776957534999629,
|
|
"learning_rate": 2.610862069344233e-06,
|
|
"loss": 0.5067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15335969626903534,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3748.8,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 4.266546762589928,
|
|
"grad_norm": 0.2556120848845968,
|
|
"learning_rate": 2.5737767799686266e-06,
|
|
"loss": 0.4896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21875327825546265,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4315.9,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 4.271942446043165,
|
|
"grad_norm": 0.23237444614718647,
|
|
"learning_rate": 2.5369386414575114e-06,
|
|
"loss": 0.4774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17238110303878784,
|
|
"step": 3960,
|
|
"valid_targets_mean": 5165.7,
|
|
"valid_targets_min": 1332
|
|
},
|
|
{
|
|
"epoch": 4.277338129496403,
|
|
"grad_norm": 0.2508003785672165,
|
|
"learning_rate": 2.500348176275074e-06,
|
|
"loss": 0.4795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14252984523773193,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4124.3,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 4.28273381294964,
|
|
"grad_norm": 0.26186296138613574,
|
|
"learning_rate": 2.4640059033728524e-06,
|
|
"loss": 0.4878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16442754864692688,
|
|
"step": 3970,
|
|
"valid_targets_mean": 4181.1,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 4.2881294964028775,
|
|
"grad_norm": 0.263094121837153,
|
|
"learning_rate": 2.42791233818233e-06,
|
|
"loss": 0.4631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13875924050807953,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3683.8,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 4.293525179856115,
|
|
"grad_norm": 0.24671300464390836,
|
|
"learning_rate": 2.3920679926076473e-06,
|
|
"loss": 0.5,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24331173300743103,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4964.7,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 4.298920863309353,
|
|
"grad_norm": 0.23557861507215638,
|
|
"learning_rate": 2.356473375018342e-06,
|
|
"loss": 0.4724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16546548902988434,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4663.9,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 4.30431654676259,
|
|
"grad_norm": 0.22531490245285107,
|
|
"learning_rate": 2.3211289902421252e-06,
|
|
"loss": 0.5032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1752905547618866,
|
|
"step": 3990,
|
|
"valid_targets_mean": 5404.8,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 4.309712230215827,
|
|
"grad_norm": 0.2367124630545605,
|
|
"learning_rate": 2.286035339557755e-06,
|
|
"loss": 0.4856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15066088736057281,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4281.5,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 4.315107913669065,
|
|
"grad_norm": 0.24574231859994697,
|
|
"learning_rate": 2.2511929206878656e-06,
|
|
"loss": 0.4838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18354865908622742,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4570.4,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 4.320503597122302,
|
|
"grad_norm": 0.26056741797657784,
|
|
"learning_rate": 2.216602227791986e-06,
|
|
"loss": 0.5008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1855241060256958,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4505.6,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 4.325899280575539,
|
|
"grad_norm": 0.2251890469132029,
|
|
"learning_rate": 2.182263751459468e-06,
|
|
"loss": 0.4738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13076771795749664,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4379.0,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 4.331294964028777,
|
|
"grad_norm": 0.40290627332315454,
|
|
"learning_rate": 2.1481779787025503e-06,
|
|
"loss": 0.1866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04982835054397583,
|
|
"step": 4015,
|
|
"valid_targets_mean": 5018.2,
|
|
"valid_targets_min": 2523
|
|
},
|
|
{
|
|
"epoch": 4.336690647482015,
|
|
"grad_norm": 0.2431848775855715,
|
|
"learning_rate": 2.1143453929494707e-06,
|
|
"loss": 0.1428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0441715270280838,
|
|
"step": 4020,
|
|
"valid_targets_mean": 5262.3,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 4.342086330935252,
|
|
"grad_norm": 0.2083430275394758,
|
|
"learning_rate": 2.080766474037579e-06,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039096444845199585,
|
|
"step": 4025,
|
|
"valid_targets_mean": 5112.8,
|
|
"valid_targets_min": 3095
|
|
},
|
|
{
|
|
"epoch": 4.347482014388489,
|
|
"grad_norm": 0.18318837389622952,
|
|
"learning_rate": 2.0474416982065447e-06,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03949829936027527,
|
|
"step": 4030,
|
|
"valid_targets_mean": 5352.1,
|
|
"valid_targets_min": 2034
|
|
},
|
|
{
|
|
"epoch": 4.352877697841727,
|
|
"grad_norm": 0.18639178706476692,
|
|
"learning_rate": 2.0143715380916062e-06,
|
|
"loss": 0.1254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04284051060676575,
|
|
"step": 4035,
|
|
"valid_targets_mean": 5610.1,
|
|
"valid_targets_min": 3069
|
|
},
|
|
{
|
|
"epoch": 4.358273381294964,
|
|
"grad_norm": 0.17327072737107785,
|
|
"learning_rate": 1.9815564627168627e-06,
|
|
"loss": 0.1239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04036891460418701,
|
|
"step": 4040,
|
|
"valid_targets_mean": 5206.3,
|
|
"valid_targets_min": 3294
|
|
},
|
|
{
|
|
"epoch": 4.363669064748201,
|
|
"grad_norm": 0.1717928754549885,
|
|
"learning_rate": 1.9489969374886344e-06,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042224980890750885,
|
|
"step": 4045,
|
|
"valid_targets_mean": 5573.9,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 4.369064748201438,
|
|
"grad_norm": 0.1700131811657217,
|
|
"learning_rate": 1.9166934241888224e-06,
|
|
"loss": 0.1261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041751161217689514,
|
|
"step": 4050,
|
|
"valid_targets_mean": 5469.3,
|
|
"valid_targets_min": 3506
|
|
},
|
|
{
|
|
"epoch": 4.3744604316546765,
|
|
"grad_norm": 0.16433048671471462,
|
|
"learning_rate": 1.8846463809684223e-06,
|
|
"loss": 0.1235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04064381867647171,
|
|
"step": 4055,
|
|
"valid_targets_mean": 5436.2,
|
|
"valid_targets_min": 2886
|
|
},
|
|
{
|
|
"epoch": 4.379856115107914,
|
|
"grad_norm": 0.16562460790003314,
|
|
"learning_rate": 1.8528562623409719e-06,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04423912614583969,
|
|
"step": 4060,
|
|
"valid_targets_mean": 5540.5,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 4.385251798561151,
|
|
"grad_norm": 0.16584172579232853,
|
|
"learning_rate": 1.8213235191761192e-06,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041528090834617615,
|
|
"step": 4065,
|
|
"valid_targets_mean": 5831.1,
|
|
"valid_targets_min": 2745
|
|
},
|
|
{
|
|
"epoch": 4.390647482014389,
|
|
"grad_norm": 0.15803214039170202,
|
|
"learning_rate": 1.790048598693257e-06,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0385250449180603,
|
|
"step": 4070,
|
|
"valid_targets_mean": 5347.5,
|
|
"valid_targets_min": 2417
|
|
},
|
|
{
|
|
"epoch": 4.396043165467626,
|
|
"grad_norm": 0.16827432707340964,
|
|
"learning_rate": 1.759031944455134e-06,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03851519525051117,
|
|
"step": 4075,
|
|
"valid_targets_mean": 5258.5,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 4.401438848920863,
|
|
"grad_norm": 0.16877898340787573,
|
|
"learning_rate": 1.7282739963616047e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03800782188773155,
|
|
"step": 4080,
|
|
"valid_targets_mean": 5099.8,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 4.4068345323741,
|
|
"grad_norm": 0.16816463932625791,
|
|
"learning_rate": 1.6977751906433582e-06,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040275245904922485,
|
|
"step": 4085,
|
|
"valid_targets_mean": 5578.4,
|
|
"valid_targets_min": 2798
|
|
},
|
|
{
|
|
"epoch": 4.412230215827338,
|
|
"grad_norm": 0.16399999029336793,
|
|
"learning_rate": 1.6675359598557462e-06,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03474169969558716,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5036.6,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 4.4176258992805755,
|
|
"grad_norm": 0.17372421794415135,
|
|
"learning_rate": 1.637556732872667e-06,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037744954228401184,
|
|
"step": 4095,
|
|
"valid_targets_mean": 5270.8,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 4.423021582733813,
|
|
"grad_norm": 0.176115703464519,
|
|
"learning_rate": 1.607837934880432e-06,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04005633294582367,
|
|
"step": 4100,
|
|
"valid_targets_mean": 5462.3,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 4.428417266187051,
|
|
"grad_norm": 0.18474488498928326,
|
|
"learning_rate": 1.5783799873717942e-06,
|
|
"loss": 0.1233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038850087672472,
|
|
"step": 4105,
|
|
"valid_targets_mean": 5422.2,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 4.433812949640288,
|
|
"grad_norm": 0.168834394940518,
|
|
"learning_rate": 1.54918330813993e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03854703530669212,
|
|
"step": 4110,
|
|
"valid_targets_mean": 5195.3,
|
|
"valid_targets_min": 3110
|
|
},
|
|
{
|
|
"epoch": 4.439208633093525,
|
|
"grad_norm": 0.40025057518256296,
|
|
"learning_rate": 1.5202483112725298e-06,
|
|
"loss": 0.1481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08728505671024323,
|
|
"step": 4115,
|
|
"valid_targets_mean": 2715.2,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 4.444604316546762,
|
|
"grad_norm": 0.3773580224551266,
|
|
"learning_rate": 1.4915754071459176e-06,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08585727214813232,
|
|
"step": 4120,
|
|
"valid_targets_mean": 3519.0,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 4.45,
|
|
"grad_norm": 0.38343914916587485,
|
|
"learning_rate": 1.4631650024192401e-06,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0678052306175232,
|
|
"step": 4125,
|
|
"valid_targets_mean": 2402.4,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 4.455395683453237,
|
|
"grad_norm": 0.35847716926453377,
|
|
"learning_rate": 1.4350175000286991e-06,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07550107687711716,
|
|
"step": 4130,
|
|
"valid_targets_mean": 2721.7,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 4.4607913669064745,
|
|
"grad_norm": 0.328381001425428,
|
|
"learning_rate": 1.4071332991818199e-06,
|
|
"loss": 0.2052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06264539062976837,
|
|
"step": 4135,
|
|
"valid_targets_mean": 2738.3,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 4.4661870503597125,
|
|
"grad_norm": 0.34427115888761467,
|
|
"learning_rate": 1.3795127953518029e-06,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0674482062458992,
|
|
"step": 4140,
|
|
"valid_targets_mean": 2599.6,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 4.47158273381295,
|
|
"grad_norm": 0.3520811769364616,
|
|
"learning_rate": 1.3521563802719273e-06,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07591848075389862,
|
|
"step": 4145,
|
|
"valid_targets_mean": 2981.7,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 4.476978417266187,
|
|
"grad_norm": 0.33785138097380185,
|
|
"learning_rate": 1.3250644419299663e-06,
|
|
"loss": 0.1991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0705064982175827,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3400.5,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 4.482374100719424,
|
|
"grad_norm": 0.3368578534727321,
|
|
"learning_rate": 1.2982373645627066e-06,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0626872330904007,
|
|
"step": 4155,
|
|
"valid_targets_mean": 2426.8,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 4.487769784172662,
|
|
"grad_norm": 0.3057601447210225,
|
|
"learning_rate": 1.271675528650489e-06,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06140948086977005,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3239.3,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 4.493165467625899,
|
|
"grad_norm": 0.3240770464155883,
|
|
"learning_rate": 1.2453793109118162e-06,
|
|
"loss": 0.1943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05388593673706055,
|
|
"step": 4165,
|
|
"valid_targets_mean": 2395.9,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 4.498561151079136,
|
|
"grad_norm": 0.3320743159843599,
|
|
"learning_rate": 1.219349084298007e-06,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05719321593642235,
|
|
"step": 4170,
|
|
"valid_targets_mean": 2764.9,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 4.503956834532374,
|
|
"grad_norm": 0.35180831073085195,
|
|
"learning_rate": 1.1935852179879048e-06,
|
|
"loss": 0.1946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05476302281022072,
|
|
"step": 4175,
|
|
"valid_targets_mean": 2182.6,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 4.5093525179856115,
|
|
"grad_norm": 0.32022389967944026,
|
|
"learning_rate": 1.1680880773826587e-06,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05728494003415108,
|
|
"step": 4180,
|
|
"valid_targets_mean": 2757.8,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 4.514748201438849,
|
|
"grad_norm": 0.328829645928971,
|
|
"learning_rate": 1.142858024100517e-06,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05419033765792847,
|
|
"step": 4185,
|
|
"valid_targets_mean": 2362.7,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 4.520143884892086,
|
|
"grad_norm": 0.32013175007760714,
|
|
"learning_rate": 1.1178954159717036e-06,
|
|
"loss": 0.1889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07098886370658875,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3076.5,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 4.525539568345324,
|
|
"grad_norm": 0.30654439241851067,
|
|
"learning_rate": 1.0932006070333601e-06,
|
|
"loss": 0.1866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0531986728310585,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3429.8,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 4.530935251798561,
|
|
"grad_norm": 0.34984039410040474,
|
|
"learning_rate": 1.0687739475245085e-06,
|
|
"loss": 0.1893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0673193633556366,
|
|
"step": 4200,
|
|
"valid_targets_mean": 2517.6,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 4.536330935251798,
|
|
"grad_norm": 0.3459021612111168,
|
|
"learning_rate": 1.0446157838810889e-06,
|
|
"loss": 0.194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061149854212999344,
|
|
"step": 4205,
|
|
"valid_targets_mean": 2430.2,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 4.541726618705036,
|
|
"grad_norm": 0.5606399552055146,
|
|
"learning_rate": 1.020726458731045e-06,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1264970302581787,
|
|
"step": 4210,
|
|
"valid_targets_mean": 3270.8,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 4.547122302158273,
|
|
"grad_norm": 0.7668987925906029,
|
|
"learning_rate": 9.971063108894597e-07,
|
|
"loss": 0.437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1218995451927185,
|
|
"step": 4215,
|
|
"valid_targets_mean": 4209.8,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 4.5525179856115106,
|
|
"grad_norm": 0.6367537758192925,
|
|
"learning_rate": 9.737556753537736e-07,
|
|
"loss": 0.4227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11969755589962006,
|
|
"step": 4220,
|
|
"valid_targets_mean": 4164.6,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 4.557913669064748,
|
|
"grad_norm": 0.5225725757163068,
|
|
"learning_rate": 9.506748832989876e-07,
|
|
"loss": 0.4084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10996963828802109,
|
|
"step": 4225,
|
|
"valid_targets_mean": 2817.8,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 4.563309352517986,
|
|
"grad_norm": 0.37013694923272683,
|
|
"learning_rate": 9.278642620730217e-07,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13914895057678223,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4294.8,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 4.568705035971223,
|
|
"grad_norm": 0.2831466418157125,
|
|
"learning_rate": 9.05324135192025e-07,
|
|
"loss": 0.3733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10350144654512405,
|
|
"step": 4235,
|
|
"valid_targets_mean": 5346.6,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 4.57410071942446,
|
|
"grad_norm": 0.2647336617515172,
|
|
"learning_rate": 8.830548223358182e-07,
|
|
"loss": 0.4203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18230444192886353,
|
|
"step": 4240,
|
|
"valid_targets_mean": 4882.9,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 4.579496402877698,
|
|
"grad_norm": 0.2460565044912175,
|
|
"learning_rate": 8.610566393433428e-07,
|
|
"loss": 0.3734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13274991512298584,
|
|
"step": 4245,
|
|
"valid_targets_mean": 3881.3,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 4.584892086330935,
|
|
"grad_norm": 0.2752204009354723,
|
|
"learning_rate": 8.393298982081877e-07,
|
|
"loss": 0.3793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1701105386018753,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4113.5,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 4.590287769784172,
|
|
"grad_norm": 0.2257592455156471,
|
|
"learning_rate": 8.178749070741699e-07,
|
|
"loss": 0.3871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11546632647514343,
|
|
"step": 4255,
|
|
"valid_targets_mean": 3982.2,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 4.5956834532374105,
|
|
"grad_norm": 0.2353639571547811,
|
|
"learning_rate": 7.96691970230945e-07,
|
|
"loss": 0.3837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12696246802806854,
|
|
"step": 4260,
|
|
"valid_targets_mean": 4060.7,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 4.601079136690648,
|
|
"grad_norm": 0.2507415859561529,
|
|
"learning_rate": 7.757813881097176e-07,
|
|
"loss": 0.3844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1402038037776947,
|
|
"step": 4265,
|
|
"valid_targets_mean": 3746.6,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 4.606474820143885,
|
|
"grad_norm": 0.2409641360157786,
|
|
"learning_rate": 7.551434572789685e-07,
|
|
"loss": 0.3719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13427484035491943,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3811.0,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 4.611870503597122,
|
|
"grad_norm": 0.23081322424908368,
|
|
"learning_rate": 7.347784704402295e-07,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10092706233263016,
|
|
"step": 4275,
|
|
"valid_targets_mean": 3178.8,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 4.61726618705036,
|
|
"grad_norm": 0.23615056515824565,
|
|
"learning_rate": 7.146867164239734e-07,
|
|
"loss": 0.4007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12947946786880493,
|
|
"step": 4280,
|
|
"valid_targets_mean": 3965.6,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 4.622661870503597,
|
|
"grad_norm": 0.2552825906747145,
|
|
"learning_rate": 6.948684801854777e-07,
|
|
"loss": 0.3724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11590872704982758,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3233.6,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 4.628057553956834,
|
|
"grad_norm": 0.25014659127212396,
|
|
"learning_rate": 6.753240428008067e-07,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11511124670505524,
|
|
"step": 4290,
|
|
"valid_targets_mean": 3789.5,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 4.633453237410072,
|
|
"grad_norm": 0.2342255370600225,
|
|
"learning_rate": 6.560536814628205e-07,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09665738046169281,
|
|
"step": 4295,
|
|
"valid_targets_mean": 3069.0,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 4.6388489208633095,
|
|
"grad_norm": 0.22365208459304617,
|
|
"learning_rate": 6.370576694772301e-07,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11407418549060822,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4238.5,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 4.644244604316547,
|
|
"grad_norm": 0.2475045143415356,
|
|
"learning_rate": 6.183362762587508e-07,
|
|
"loss": 0.3778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12490449100732803,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3785.9,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 4.649640287769785,
|
|
"grad_norm": 0.2440971036906907,
|
|
"learning_rate": 5.998897673272463e-07,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377415657043457,
|
|
"step": 4310,
|
|
"valid_targets_mean": 3915.9,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 4.655035971223022,
|
|
"grad_norm": 0.2648489963008713,
|
|
"learning_rate": 5.817184043039814e-07,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07149966061115265,
|
|
"step": 4315,
|
|
"valid_targets_mean": 1047.1,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 4.660431654676259,
|
|
"grad_norm": 0.3606135475279402,
|
|
"learning_rate": 5.638224449079199e-07,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10155492275953293,
|
|
"step": 4320,
|
|
"valid_targets_mean": 8119.0,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 4.665827338129496,
|
|
"grad_norm": 0.3438523651401393,
|
|
"learning_rate": 5.462021429520525e-07,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07968807220458984,
|
|
"step": 4325,
|
|
"valid_targets_mean": 5534.6,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 4.671223021582734,
|
|
"grad_norm": 0.27432049752077337,
|
|
"learning_rate": 5.288577483397994e-07,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1107410341501236,
|
|
"step": 4330,
|
|
"valid_targets_mean": 7868.5,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 4.676618705035971,
|
|
"grad_norm": 0.2294186376372348,
|
|
"learning_rate": 5.117895070614797e-07,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08071195334196091,
|
|
"step": 4335,
|
|
"valid_targets_mean": 7399.9,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 4.6820143884892085,
|
|
"grad_norm": 0.2723921592858481,
|
|
"learning_rate": 4.949976611908058e-07,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10066443681716919,
|
|
"step": 4340,
|
|
"valid_targets_mean": 6082.5,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 4.6874100719424465,
|
|
"grad_norm": 0.20717409419158783,
|
|
"learning_rate": 4.784824488814588e-07,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06997359544038773,
|
|
"step": 4345,
|
|
"valid_targets_mean": 5861.0,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 4.692805755395684,
|
|
"grad_norm": 0.19276466889343785,
|
|
"learning_rate": 4.622441043637094e-07,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09250351786613464,
|
|
"step": 4350,
|
|
"valid_targets_mean": 6673.4,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 4.698201438848921,
|
|
"grad_norm": 0.18705838877375955,
|
|
"learning_rate": 4.4628285794109827e-07,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07174015045166016,
|
|
"step": 4355,
|
|
"valid_targets_mean": 6232.8,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 4.703597122302158,
|
|
"grad_norm": 0.17618471801825594,
|
|
"learning_rate": 4.3059893598716984e-07,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060418836772441864,
|
|
"step": 4360,
|
|
"valid_targets_mean": 5522.1,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 4.708992805755396,
|
|
"grad_norm": 0.18043700954349173,
|
|
"learning_rate": 4.151925609422502e-07,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09037858247756958,
|
|
"step": 4365,
|
|
"valid_targets_mean": 7316.1,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 4.714388489208633,
|
|
"grad_norm": 0.18243987864207276,
|
|
"learning_rate": 4.0006395131030997e-07,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10382995009422302,
|
|
"step": 4370,
|
|
"valid_targets_mean": 7420.2,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 4.71978417266187,
|
|
"grad_norm": 0.30294113308095083,
|
|
"learning_rate": 3.852133216558529e-07,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05052044242620468,
|
|
"step": 4375,
|
|
"valid_targets_mean": 1202.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.725179856115108,
|
|
"grad_norm": 0.166407015530814,
|
|
"learning_rate": 3.706408826008767e-07,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06354958564043045,
|
|
"step": 4380,
|
|
"valid_targets_mean": 6558.3,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 4.7305755395683455,
|
|
"grad_norm": 0.15936691292842942,
|
|
"learning_rate": 3.563468408218862e-07,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04878155142068863,
|
|
"step": 4385,
|
|
"valid_targets_mean": 5374.2,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 4.735971223021583,
|
|
"grad_norm": 0.1790096814786103,
|
|
"learning_rate": 3.423313990469601e-07,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07422822713851929,
|
|
"step": 4390,
|
|
"valid_targets_mean": 6666.2,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 4.74136690647482,
|
|
"grad_norm": 0.18096516767992651,
|
|
"learning_rate": 3.285947560528846e-07,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07195042073726654,
|
|
"step": 4395,
|
|
"valid_targets_mean": 6206.8,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 4.746762589928058,
|
|
"grad_norm": 0.17864778079059726,
|
|
"learning_rate": 3.1513710666231543e-07,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08867624402046204,
|
|
"step": 4400,
|
|
"valid_targets_mean": 6207.1,
|
|
"valid_targets_min": 135
|
|
},
|
|
{
|
|
"epoch": 4.752158273381295,
|
|
"grad_norm": 0.16904106365614788,
|
|
"learning_rate": 3.0195864174103586e-07,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08758699148893356,
|
|
"step": 4405,
|
|
"valid_targets_mean": 6138.9,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 4.757553956834532,
|
|
"grad_norm": 0.16290587320660668,
|
|
"learning_rate": 2.890595481952341e-07,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08792467415332794,
|
|
"step": 4410,
|
|
"valid_targets_mean": 7026.4,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 4.76294964028777,
|
|
"grad_norm": 0.3052682289860589,
|
|
"learning_rate": 2.764400089688635e-07,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056131742894649506,
|
|
"step": 4415,
|
|
"valid_targets_mean": 5165.2,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 4.768345323741007,
|
|
"grad_norm": 0.27823571213892445,
|
|
"learning_rate": 2.6410020304104e-07,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051333166658878326,
|
|
"step": 4420,
|
|
"valid_targets_mean": 5186.9,
|
|
"valid_targets_min": 214
|
|
},
|
|
{
|
|
"epoch": 4.7737410071942445,
|
|
"grad_norm": 0.24163050953364038,
|
|
"learning_rate": 2.520403054235021e-07,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05068341642618179,
|
|
"step": 4425,
|
|
"valid_targets_mean": 4295.1,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 4.779136690647482,
|
|
"grad_norm": 0.22547548060635522,
|
|
"learning_rate": 2.402604871581438e-07,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049500636756420135,
|
|
"step": 4430,
|
|
"valid_targets_mean": 4718.3,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 4.78453237410072,
|
|
"grad_norm": 0.19752808193677948,
|
|
"learning_rate": 2.2876091531456977e-07,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04920800030231476,
|
|
"step": 4435,
|
|
"valid_targets_mean": 5277.9,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 4.789928057553957,
|
|
"grad_norm": 0.19942557180801115,
|
|
"learning_rate": 2.1754175298773328e-07,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050529010593891144,
|
|
"step": 4440,
|
|
"valid_targets_mean": 4981.3,
|
|
"valid_targets_min": 216
|
|
},
|
|
{
|
|
"epoch": 4.795323741007194,
|
|
"grad_norm": 0.18947848661594152,
|
|
"learning_rate": 2.0660315929563523e-07,
|
|
"loss": 0.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050830066204071045,
|
|
"step": 4445,
|
|
"valid_targets_mean": 5764.1,
|
|
"valid_targets_min": 195
|
|
},
|
|
{
|
|
"epoch": 4.800719424460432,
|
|
"grad_norm": 0.18872289549632462,
|
|
"learning_rate": 1.9594528937704193e-07,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05120057612657547,
|
|
"step": 4450,
|
|
"valid_targets_mean": 5180.8,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 4.806115107913669,
|
|
"grad_norm": 0.18326172098340449,
|
|
"learning_rate": 1.855682943893089e-07,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052478909492492676,
|
|
"step": 4455,
|
|
"valid_targets_mean": 5028.8,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 4.811510791366906,
|
|
"grad_norm": 0.1778200881722506,
|
|
"learning_rate": 1.7547232150622262e-07,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04512188211083412,
|
|
"step": 4460,
|
|
"valid_targets_mean": 5127.5,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 4.816906474820144,
|
|
"grad_norm": 0.18516690630675472,
|
|
"learning_rate": 1.6565751391591999e-07,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049813345074653625,
|
|
"step": 4465,
|
|
"valid_targets_mean": 5064.0,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 4.822302158273382,
|
|
"grad_norm": 0.18393919341105275,
|
|
"learning_rate": 1.5612401081885665e-07,
|
|
"loss": 0.141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049292586743831635,
|
|
"step": 4470,
|
|
"valid_targets_mean": 5678.2,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 4.827697841726619,
|
|
"grad_norm": 0.19541054299274788,
|
|
"learning_rate": 1.468719474258262e-07,
|
|
"loss": 0.1419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042604271322488785,
|
|
"step": 4475,
|
|
"valid_targets_mean": 4230.2,
|
|
"valid_targets_min": 188
|
|
},
|
|
{
|
|
"epoch": 4.833093525179856,
|
|
"grad_norm": 0.18899110764490626,
|
|
"learning_rate": 1.3790145495605534e-07,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045704878866672516,
|
|
"step": 4480,
|
|
"valid_targets_mean": 4367.6,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 4.838489208633094,
|
|
"grad_norm": 0.1863727102253402,
|
|
"learning_rate": 1.2921266063532722e-07,
|
|
"loss": 0.141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038941722363233566,
|
|
"step": 4485,
|
|
"valid_targets_mean": 4216.6,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 4.843884892086331,
|
|
"grad_norm": 0.18834498800708907,
|
|
"learning_rate": 1.208056876941921e-07,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059718284755945206,
|
|
"step": 4490,
|
|
"valid_targets_mean": 6083.8,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 4.849280575539568,
|
|
"grad_norm": 0.16515536186082128,
|
|
"learning_rate": 1.1268065536621076e-07,
|
|
"loss": 0.1461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04720407351851463,
|
|
"step": 4495,
|
|
"valid_targets_mean": 5689.1,
|
|
"valid_targets_min": 179
|
|
},
|
|
{
|
|
"epoch": 4.854676258992805,
|
|
"grad_norm": 0.18299265551336197,
|
|
"learning_rate": 1.0483767888626039e-07,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052138544619083405,
|
|
"step": 4500,
|
|
"valid_targets_mean": 6020.2,
|
|
"valid_targets_min": 209
|
|
},
|
|
{
|
|
"epoch": 4.8600719424460435,
|
|
"grad_norm": 0.18689126705025794,
|
|
"learning_rate": 9.727686948891368e-08,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047048501670360565,
|
|
"step": 4505,
|
|
"valid_targets_mean": 5016.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 4.865467625899281,
|
|
"grad_norm": 0.18529743285230532,
|
|
"learning_rate": 8.999833440684447e-08,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04977427423000336,
|
|
"step": 4510,
|
|
"valid_targets_mean": 5500.6,
|
|
"valid_targets_min": 164
|
|
},
|
|
{
|
|
"epoch": 4.870863309352518,
|
|
"grad_norm": 0.1829693478533776,
|
|
"learning_rate": 8.300217686932232e-08,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03828730434179306,
|
|
"step": 4515,
|
|
"valid_targets_mean": 4179.7,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 4.876258992805756,
|
|
"grad_norm": 0.33402148578356017,
|
|
"learning_rate": 7.628849610073596e-08,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06414258480072021,
|
|
"step": 4520,
|
|
"valid_targets_mean": 4050.7,
|
|
"valid_targets_min": 2141
|
|
},
|
|
{
|
|
"epoch": 4.881654676258993,
|
|
"grad_norm": 0.3156074791784976,
|
|
"learning_rate": 6.985738731919434e-08,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05222707614302635,
|
|
"step": 4525,
|
|
"valid_targets_mean": 3359.9,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 4.88705035971223,
|
|
"grad_norm": 0.3055004271993576,
|
|
"learning_rate": 6.370894173517439e-08,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06711511313915253,
|
|
"step": 4530,
|
|
"valid_targets_mean": 4043.0,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 4.892446043165467,
|
|
"grad_norm": 0.3009476924778784,
|
|
"learning_rate": 5.7843246550219887e-08,
|
|
"loss": 0.179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06701280921697617,
|
|
"step": 4535,
|
|
"valid_targets_mean": 4069.6,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 4.897841726618705,
|
|
"grad_norm": 0.29068767080649144,
|
|
"learning_rate": 5.2260384955717944e-08,
|
|
"loss": 0.1719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0508812814950943,
|
|
"step": 4540,
|
|
"valid_targets_mean": 3723.7,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 4.9032374100719425,
|
|
"grad_norm": 0.3004813574555883,
|
|
"learning_rate": 4.6960436131708864e-08,
|
|
"loss": 0.1754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06259141117334366,
|
|
"step": 4545,
|
|
"valid_targets_mean": 3596.0,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 4.90863309352518,
|
|
"grad_norm": 0.2788605292002887,
|
|
"learning_rate": 4.194347524576703e-08,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07024587690830231,
|
|
"step": 4550,
|
|
"valid_targets_mean": 4496.0,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 4.914028776978418,
|
|
"grad_norm": 0.2764457611136166,
|
|
"learning_rate": 3.72095734519351e-08,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05928371846675873,
|
|
"step": 4555,
|
|
"valid_targets_mean": 3859.5,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 4.919424460431655,
|
|
"grad_norm": 0.2752211357197214,
|
|
"learning_rate": 3.27587978897137e-08,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05350375548005104,
|
|
"step": 4560,
|
|
"valid_targets_mean": 3512.5,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 4.924820143884892,
|
|
"grad_norm": 0.26037223736981274,
|
|
"learning_rate": 2.8591211683108853e-08,
|
|
"loss": 0.1687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0520026832818985,
|
|
"step": 4565,
|
|
"valid_targets_mean": 3790.1,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 4.930215827338129,
|
|
"grad_norm": 0.2724751865671966,
|
|
"learning_rate": 2.4706873939739363e-08,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05120614171028137,
|
|
"step": 4570,
|
|
"valid_targets_mean": 3425.4,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 4.935611510791367,
|
|
"grad_norm": 0.2866891488104517,
|
|
"learning_rate": 2.110583974999081e-08,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056625403463840485,
|
|
"step": 4575,
|
|
"valid_targets_mean": 3593.8,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 4.941007194244604,
|
|
"grad_norm": 0.27249120820622225,
|
|
"learning_rate": 1.7788160186249514e-08,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04923334717750549,
|
|
"step": 4580,
|
|
"valid_targets_mean": 3521.3,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 4.9464028776978415,
|
|
"grad_norm": 0.27961615447781774,
|
|
"learning_rate": 1.4753882302160905e-08,
|
|
"loss": 0.1694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06226489692926407,
|
|
"step": 4585,
|
|
"valid_targets_mean": 3750.6,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 4.9517985611510795,
|
|
"grad_norm": 0.4472567042929453,
|
|
"learning_rate": 1.2003049131972255e-08,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0990709662437439,
|
|
"step": 4590,
|
|
"valid_targets_mean": 6018.4,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 4.957194244604317,
|
|
"grad_norm": 0.5859956772385759,
|
|
"learning_rate": 9.535699689919853e-09,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08023358881473541,
|
|
"step": 4595,
|
|
"valid_targets_mean": 5978.5,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 4.962589928057554,
|
|
"grad_norm": 0.5625743385263059,
|
|
"learning_rate": 7.351868969676101e-09,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.077103391289711,
|
|
"step": 4600,
|
|
"valid_targets_mean": 6960.8,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 4.967985611510791,
|
|
"grad_norm": 0.5908353529031541,
|
|
"learning_rate": 5.451587943856584e-09,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07843770831823349,
|
|
"step": 4605,
|
|
"valid_targets_mean": 6535.1,
|
|
"valid_targets_min": 3083
|
|
},
|
|
{
|
|
"epoch": 4.973381294964029,
|
|
"grad_norm": 0.5578217960620926,
|
|
"learning_rate": 3.834883563567093e-09,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06963906437158585,
|
|
"step": 4610,
|
|
"valid_targets_mean": 6275.1,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 4.978776978417266,
|
|
"grad_norm": 0.5854507218728615,
|
|
"learning_rate": 2.501778758043916e-09,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08105125278234482,
|
|
"step": 4615,
|
|
"valid_targets_mean": 6823.4,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 4.984172661870503,
|
|
"grad_norm": 0.5204993831952275,
|
|
"learning_rate": 1.4522924343074453e-09,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07896731048822403,
|
|
"step": 4620,
|
|
"valid_targets_mean": 7386.0,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 4.989568345323741,
|
|
"grad_norm": 0.549195675689145,
|
|
"learning_rate": 6.864394769068305e-10,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06798575073480606,
|
|
"step": 4625,
|
|
"valid_targets_mean": 6080.2,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 4.9949640287769785,
|
|
"grad_norm": 0.5632419914247869,
|
|
"learning_rate": 2.042307477001515e-10,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06996813416481018,
|
|
"step": 4630,
|
|
"valid_targets_mean": 6183.4,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.9002204314725685,
|
|
"learning_rate": 5.673085712309956e-12,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21446937322616577,
|
|
"step": 4635,
|
|
"valid_targets_mean": 6732.3,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21446937322616577,
|
|
"step": 4635,
|
|
"total_flos": 1.3771174172166193e+19,
|
|
"train_loss": 0.058854675498723724,
|
|
"train_runtime": 21175.3179,
|
|
"train_samples_per_second": 21.0,
|
|
"train_steps_per_second": 0.219,
|
|
"valid_targets_mean": 6732.3,
|
|
"valid_targets_min": 192
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4635,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.3771174172166193e+19,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|