Model: laion/nemosci-tasrep-a1mfc-gfistaqc-dev1-scaff-maxeps__Qwen3-8B Source: Original Platform
8936 lines
249 KiB
JSON
8936 lines
249 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4040,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.006190672719768881,
|
|
"grad_norm": 13.69081014790444,
|
|
"learning_rate": 3.9603960396039606e-07,
|
|
"loss": 0.9671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3308084309101105,
|
|
"step": 5,
|
|
"valid_targets_mean": 8170.6,
|
|
"valid_targets_min": 3077
|
|
},
|
|
{
|
|
"epoch": 0.012381345439537762,
|
|
"grad_norm": 13.034829504673615,
|
|
"learning_rate": 8.910891089108911e-07,
|
|
"loss": 0.9687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31032028794288635,
|
|
"step": 10,
|
|
"valid_targets_mean": 6980.7,
|
|
"valid_targets_min": 2298
|
|
},
|
|
{
|
|
"epoch": 0.018572018159306643,
|
|
"grad_norm": 8.434791094177731,
|
|
"learning_rate": 1.3861386138613863e-06,
|
|
"loss": 0.929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717992961406708,
|
|
"step": 15,
|
|
"valid_targets_mean": 6148.2,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 0.024762690879075525,
|
|
"grad_norm": 4.47895542732865,
|
|
"learning_rate": 1.8811881188118813e-06,
|
|
"loss": 0.8687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29125693440437317,
|
|
"step": 20,
|
|
"valid_targets_mean": 7165.8,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 0.03095336359884441,
|
|
"grad_norm": 2.218495795385028,
|
|
"learning_rate": 2.3762376237623762e-06,
|
|
"loss": 0.7967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26372790336608887,
|
|
"step": 25,
|
|
"valid_targets_mean": 7202.3,
|
|
"valid_targets_min": 2907
|
|
},
|
|
{
|
|
"epoch": 0.03714403631861329,
|
|
"grad_norm": 1.6946683061646124,
|
|
"learning_rate": 2.8712871287128712e-06,
|
|
"loss": 0.7695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575187087059021,
|
|
"step": 30,
|
|
"valid_targets_mean": 7477.5,
|
|
"valid_targets_min": 2714
|
|
},
|
|
{
|
|
"epoch": 0.04333470903838217,
|
|
"grad_norm": 1.3286699429731896,
|
|
"learning_rate": 3.3663366336633666e-06,
|
|
"loss": 0.738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24754995107650757,
|
|
"step": 35,
|
|
"valid_targets_mean": 7314.0,
|
|
"valid_targets_min": 2883
|
|
},
|
|
{
|
|
"epoch": 0.04952538175815105,
|
|
"grad_norm": 0.8945921047165601,
|
|
"learning_rate": 3.861386138613862e-06,
|
|
"loss": 0.6911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22195573151111603,
|
|
"step": 40,
|
|
"valid_targets_mean": 7126.6,
|
|
"valid_targets_min": 2597
|
|
},
|
|
{
|
|
"epoch": 0.055716054477919934,
|
|
"grad_norm": 0.7078981612795681,
|
|
"learning_rate": 4.356435643564357e-06,
|
|
"loss": 0.6665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21039897203445435,
|
|
"step": 45,
|
|
"valid_targets_mean": 7369.8,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 0.06190672719768882,
|
|
"grad_norm": 0.5819460565367259,
|
|
"learning_rate": 4.851485148514852e-06,
|
|
"loss": 0.6321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2004292607307434,
|
|
"step": 50,
|
|
"valid_targets_mean": 7356.4,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 0.0680973999174577,
|
|
"grad_norm": 0.458137276322268,
|
|
"learning_rate": 5.346534653465347e-06,
|
|
"loss": 0.5821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18201720714569092,
|
|
"step": 55,
|
|
"valid_targets_mean": 8719.0,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.07428807263722657,
|
|
"grad_norm": 0.38701701479667944,
|
|
"learning_rate": 5.841584158415842e-06,
|
|
"loss": 0.5437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19013774394989014,
|
|
"step": 60,
|
|
"valid_targets_mean": 9029.5,
|
|
"valid_targets_min": 4655
|
|
},
|
|
{
|
|
"epoch": 0.08047874535699547,
|
|
"grad_norm": 0.29827604264005525,
|
|
"learning_rate": 6.336633663366337e-06,
|
|
"loss": 0.517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17409640550613403,
|
|
"step": 65,
|
|
"valid_targets_mean": 10019.8,
|
|
"valid_targets_min": 4996
|
|
},
|
|
{
|
|
"epoch": 0.08666941807676434,
|
|
"grad_norm": 0.26277963732645887,
|
|
"learning_rate": 6.831683168316833e-06,
|
|
"loss": 0.498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16459758579730988,
|
|
"step": 70,
|
|
"valid_targets_mean": 9413.2,
|
|
"valid_targets_min": 3337
|
|
},
|
|
{
|
|
"epoch": 0.09286009079653322,
|
|
"grad_norm": 0.23794598916895981,
|
|
"learning_rate": 7.326732673267327e-06,
|
|
"loss": 0.4912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17116118967533112,
|
|
"step": 75,
|
|
"valid_targets_mean": 10127.1,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 0.0990507635163021,
|
|
"grad_norm": 0.22780805082758482,
|
|
"learning_rate": 7.821782178217822e-06,
|
|
"loss": 0.4705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16602832078933716,
|
|
"step": 80,
|
|
"valid_targets_mean": 9319.1,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 0.10524143623607099,
|
|
"grad_norm": 0.22692592515290985,
|
|
"learning_rate": 8.316831683168318e-06,
|
|
"loss": 0.472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15565122663974762,
|
|
"step": 85,
|
|
"valid_targets_mean": 9109.9,
|
|
"valid_targets_min": 4379
|
|
},
|
|
{
|
|
"epoch": 0.11143210895583987,
|
|
"grad_norm": 0.2345829892139307,
|
|
"learning_rate": 8.811881188118812e-06,
|
|
"loss": 0.4558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14707699418067932,
|
|
"step": 90,
|
|
"valid_targets_mean": 9313.2,
|
|
"valid_targets_min": 3452
|
|
},
|
|
{
|
|
"epoch": 0.11762278167560875,
|
|
"grad_norm": 0.22107772815416807,
|
|
"learning_rate": 9.306930693069308e-06,
|
|
"loss": 0.4479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1460593044757843,
|
|
"step": 95,
|
|
"valid_targets_mean": 8640.9,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 0.12381345439537764,
|
|
"grad_norm": 0.23984264331073588,
|
|
"learning_rate": 9.801980198019802e-06,
|
|
"loss": 0.4453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.146380215883255,
|
|
"step": 100,
|
|
"valid_targets_mean": 9029.9,
|
|
"valid_targets_min": 4552
|
|
},
|
|
{
|
|
"epoch": 0.1300041271151465,
|
|
"grad_norm": 0.21473127293939723,
|
|
"learning_rate": 1.0297029702970298e-05,
|
|
"loss": 0.4367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14747458696365356,
|
|
"step": 105,
|
|
"valid_targets_mean": 10390.2,
|
|
"valid_targets_min": 4833
|
|
},
|
|
{
|
|
"epoch": 0.1361947998349154,
|
|
"grad_norm": 0.20218197630478985,
|
|
"learning_rate": 1.0792079207920793e-05,
|
|
"loss": 0.427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1468656212091446,
|
|
"step": 110,
|
|
"valid_targets_mean": 9771.6,
|
|
"valid_targets_min": 3641
|
|
},
|
|
{
|
|
"epoch": 0.14238547255468428,
|
|
"grad_norm": 0.20760474857113903,
|
|
"learning_rate": 1.1287128712871288e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13152442872524261,
|
|
"step": 115,
|
|
"valid_targets_mean": 9316.0,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 0.14857614527445315,
|
|
"grad_norm": 0.22873210624395904,
|
|
"learning_rate": 1.1782178217821782e-05,
|
|
"loss": 0.4244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14190971851348877,
|
|
"step": 120,
|
|
"valid_targets_mean": 9838.7,
|
|
"valid_targets_min": 4324
|
|
},
|
|
{
|
|
"epoch": 0.15476681799422204,
|
|
"grad_norm": 0.23652304476658506,
|
|
"learning_rate": 1.2277227722772278e-05,
|
|
"loss": 0.4297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14609889686107635,
|
|
"step": 125,
|
|
"valid_targets_mean": 9693.1,
|
|
"valid_targets_min": 3811
|
|
},
|
|
{
|
|
"epoch": 0.16095749071399093,
|
|
"grad_norm": 0.1986844472029466,
|
|
"learning_rate": 1.2772277227722773e-05,
|
|
"loss": 0.4144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12527498602867126,
|
|
"step": 130,
|
|
"valid_targets_mean": 9686.9,
|
|
"valid_targets_min": 3338
|
|
},
|
|
{
|
|
"epoch": 0.1671481634337598,
|
|
"grad_norm": 0.25219013469561774,
|
|
"learning_rate": 1.326732673267327e-05,
|
|
"loss": 0.4043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12631601095199585,
|
|
"step": 135,
|
|
"valid_targets_mean": 9590.6,
|
|
"valid_targets_min": 4785
|
|
},
|
|
{
|
|
"epoch": 0.17333883615352869,
|
|
"grad_norm": 0.22138098796675187,
|
|
"learning_rate": 1.3762376237623762e-05,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14375868439674377,
|
|
"step": 140,
|
|
"valid_targets_mean": 10718.9,
|
|
"valid_targets_min": 4892
|
|
},
|
|
{
|
|
"epoch": 0.17952950887329758,
|
|
"grad_norm": 0.24747546412869526,
|
|
"learning_rate": 1.4257425742574257e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1552199125289917,
|
|
"step": 145,
|
|
"valid_targets_mean": 10753.2,
|
|
"valid_targets_min": 3836
|
|
},
|
|
{
|
|
"epoch": 0.18572018159306644,
|
|
"grad_norm": 0.22825091909135112,
|
|
"learning_rate": 1.4752475247524753e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11909246444702148,
|
|
"step": 150,
|
|
"valid_targets_mean": 8877.9,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 0.19191085431283533,
|
|
"grad_norm": 0.22829758676467732,
|
|
"learning_rate": 1.5247524752475249e-05,
|
|
"loss": 0.4038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1441432535648346,
|
|
"step": 155,
|
|
"valid_targets_mean": 11518.0,
|
|
"valid_targets_min": 4504
|
|
},
|
|
{
|
|
"epoch": 0.1981015270326042,
|
|
"grad_norm": 0.24016694185637358,
|
|
"learning_rate": 1.5742574257425743e-05,
|
|
"loss": 0.4014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14530834555625916,
|
|
"step": 160,
|
|
"valid_targets_mean": 10316.1,
|
|
"valid_targets_min": 4842
|
|
},
|
|
{
|
|
"epoch": 0.2042921997523731,
|
|
"grad_norm": 0.2288022584557003,
|
|
"learning_rate": 1.623762376237624e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342070996761322,
|
|
"step": 165,
|
|
"valid_targets_mean": 10630.5,
|
|
"valid_targets_min": 5085
|
|
},
|
|
{
|
|
"epoch": 0.21048287247214198,
|
|
"grad_norm": 0.23574030555112244,
|
|
"learning_rate": 1.6732673267326735e-05,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14268429577350616,
|
|
"step": 170,
|
|
"valid_targets_mean": 10322.6,
|
|
"valid_targets_min": 4928
|
|
},
|
|
{
|
|
"epoch": 0.21667354519191084,
|
|
"grad_norm": 0.2606308986433419,
|
|
"learning_rate": 1.722772277227723e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13740146160125732,
|
|
"step": 175,
|
|
"valid_targets_mean": 10297.2,
|
|
"valid_targets_min": 3888
|
|
},
|
|
{
|
|
"epoch": 0.22286421791167974,
|
|
"grad_norm": 0.2454577536042925,
|
|
"learning_rate": 1.7722772277227723e-05,
|
|
"loss": 0.3902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12690019607543945,
|
|
"step": 180,
|
|
"valid_targets_mean": 8726.7,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 0.22905489063144863,
|
|
"grad_norm": 0.2767119067943713,
|
|
"learning_rate": 1.821782178217822e-05,
|
|
"loss": 0.3848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11502474546432495,
|
|
"step": 185,
|
|
"valid_targets_mean": 8473.2,
|
|
"valid_targets_min": 3703
|
|
},
|
|
{
|
|
"epoch": 0.2352455633512175,
|
|
"grad_norm": 0.270989480925465,
|
|
"learning_rate": 1.8712871287128715e-05,
|
|
"loss": 0.3873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303223818540573,
|
|
"step": 190,
|
|
"valid_targets_mean": 10562.8,
|
|
"valid_targets_min": 4059
|
|
},
|
|
{
|
|
"epoch": 0.24143623607098638,
|
|
"grad_norm": 0.3282614953212404,
|
|
"learning_rate": 1.920792079207921e-05,
|
|
"loss": 0.4083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13379837572574615,
|
|
"step": 195,
|
|
"valid_targets_mean": 5732.4,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 0.24762690879075527,
|
|
"grad_norm": 0.35566059361350466,
|
|
"learning_rate": 1.9702970297029703e-05,
|
|
"loss": 0.4242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1384144127368927,
|
|
"step": 200,
|
|
"valid_targets_mean": 5909.7,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 0.25381758151052414,
|
|
"grad_norm": 3.8505179105168446,
|
|
"learning_rate": 2.01980198019802e-05,
|
|
"loss": 0.671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.352144718170166,
|
|
"step": 205,
|
|
"valid_targets_mean": 3622.3,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 0.260008254230293,
|
|
"grad_norm": 1.3365385167189445,
|
|
"learning_rate": 2.0693069306930695e-05,
|
|
"loss": 0.9063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3086371421813965,
|
|
"step": 210,
|
|
"valid_targets_mean": 4234.8,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 0.2661989269500619,
|
|
"grad_norm": 0.5237536915162603,
|
|
"learning_rate": 2.1188118811881192e-05,
|
|
"loss": 0.7899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18529243767261505,
|
|
"step": 215,
|
|
"valid_targets_mean": 3496.8,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 0.2723895996698308,
|
|
"grad_norm": 0.4167116326378586,
|
|
"learning_rate": 2.1683168316831686e-05,
|
|
"loss": 0.8408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752990126609802,
|
|
"step": 220,
|
|
"valid_targets_mean": 4790.7,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 0.27858027238959965,
|
|
"grad_norm": 0.4091555558318782,
|
|
"learning_rate": 2.217821782178218e-05,
|
|
"loss": 0.764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29285866022109985,
|
|
"step": 225,
|
|
"valid_targets_mean": 4529.3,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 0.28477094510936857,
|
|
"grad_norm": 0.3571054813067285,
|
|
"learning_rate": 2.2673267326732675e-05,
|
|
"loss": 0.7213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22011533379554749,
|
|
"step": 230,
|
|
"valid_targets_mean": 3307.1,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 0.29096161782913743,
|
|
"grad_norm": 0.355702797977271,
|
|
"learning_rate": 2.316831683168317e-05,
|
|
"loss": 0.7285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23454296588897705,
|
|
"step": 235,
|
|
"valid_targets_mean": 4017.8,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 0.2971522905489063,
|
|
"grad_norm": 0.3060968134707049,
|
|
"learning_rate": 2.3663366336633663e-05,
|
|
"loss": 0.6957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14198237657546997,
|
|
"step": 240,
|
|
"valid_targets_mean": 1564.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 0.3033429632686752,
|
|
"grad_norm": 0.331642035940389,
|
|
"learning_rate": 2.415841584158416e-05,
|
|
"loss": 0.7136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24295559525489807,
|
|
"step": 245,
|
|
"valid_targets_mean": 4694.7,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 0.3095336359884441,
|
|
"grad_norm": 0.3386893568315838,
|
|
"learning_rate": 2.4653465346534655e-05,
|
|
"loss": 0.6877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27461788058280945,
|
|
"step": 250,
|
|
"valid_targets_mean": 4543.5,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 0.31572430870821294,
|
|
"grad_norm": 0.37502232741017205,
|
|
"learning_rate": 2.5148514851485152e-05,
|
|
"loss": 0.6707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2397470623254776,
|
|
"step": 255,
|
|
"valid_targets_mean": 3509.4,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 0.32191498142798186,
|
|
"grad_norm": 0.3079888983818421,
|
|
"learning_rate": 2.5643564356435646e-05,
|
|
"loss": 0.6803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1660553514957428,
|
|
"step": 260,
|
|
"valid_targets_mean": 3599.4,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 0.3281056541477507,
|
|
"grad_norm": 0.31582108101484946,
|
|
"learning_rate": 2.613861386138614e-05,
|
|
"loss": 0.6422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11140985786914825,
|
|
"step": 265,
|
|
"valid_targets_mean": 980.5,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 0.3342963268675196,
|
|
"grad_norm": 0.336290963734261,
|
|
"learning_rate": 2.6633663366336638e-05,
|
|
"loss": 0.6788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24496959149837494,
|
|
"step": 270,
|
|
"valid_targets_mean": 4849.9,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 0.3404869995872885,
|
|
"grad_norm": 0.31348913946973966,
|
|
"learning_rate": 2.7128712871287132e-05,
|
|
"loss": 0.6554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2219613790512085,
|
|
"step": 275,
|
|
"valid_targets_mean": 4380.0,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 0.34667767230705737,
|
|
"grad_norm": 0.3518570146260082,
|
|
"learning_rate": 2.7623762376237623e-05,
|
|
"loss": 0.6912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25598108768463135,
|
|
"step": 280,
|
|
"valid_targets_mean": 3904.7,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 0.35286834502682624,
|
|
"grad_norm": 0.30980256849377164,
|
|
"learning_rate": 2.811881188118812e-05,
|
|
"loss": 0.6647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20046451687812805,
|
|
"step": 285,
|
|
"valid_targets_mean": 3962.2,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 0.35905901774659515,
|
|
"grad_norm": 0.39090008967777884,
|
|
"learning_rate": 2.8613861386138614e-05,
|
|
"loss": 0.6394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13189911842346191,
|
|
"step": 290,
|
|
"valid_targets_mean": 1099.3,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 0.365249690466364,
|
|
"grad_norm": 0.30712915868602125,
|
|
"learning_rate": 2.9108910891089112e-05,
|
|
"loss": 0.6633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24897335469722748,
|
|
"step": 295,
|
|
"valid_targets_mean": 5050.1,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 0.3714403631861329,
|
|
"grad_norm": 0.2928557384723093,
|
|
"learning_rate": 2.9603960396039606e-05,
|
|
"loss": 0.65,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23281772434711456,
|
|
"step": 300,
|
|
"valid_targets_mean": 5151.1,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 0.3776310359059018,
|
|
"grad_norm": 0.3217747871277684,
|
|
"learning_rate": 3.00990099009901e-05,
|
|
"loss": 0.4544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09104875475168228,
|
|
"step": 305,
|
|
"valid_targets_mean": 5515.5,
|
|
"valid_targets_min": 1936
|
|
},
|
|
{
|
|
"epoch": 0.38382170862567067,
|
|
"grad_norm": 0.3068332865986789,
|
|
"learning_rate": 3.0594059405940594e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07501222938299179,
|
|
"step": 310,
|
|
"valid_targets_mean": 5127.6,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 0.39001238134543953,
|
|
"grad_norm": 0.2794395660896411,
|
|
"learning_rate": 3.1089108910891095e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07017892599105835,
|
|
"step": 315,
|
|
"valid_targets_mean": 5021.8,
|
|
"valid_targets_min": 2808
|
|
},
|
|
{
|
|
"epoch": 0.3962030540652084,
|
|
"grad_norm": 0.24225181932863452,
|
|
"learning_rate": 3.158415841584159e-05,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07376162707805634,
|
|
"step": 320,
|
|
"valid_targets_mean": 5327.4,
|
|
"valid_targets_min": 2771
|
|
},
|
|
{
|
|
"epoch": 0.4023937267849773,
|
|
"grad_norm": 0.2540394940988095,
|
|
"learning_rate": 3.2079207920792084e-05,
|
|
"loss": 0.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.080901600420475,
|
|
"step": 325,
|
|
"valid_targets_mean": 5568.0,
|
|
"valid_targets_min": 3219
|
|
},
|
|
{
|
|
"epoch": 0.4085843995047462,
|
|
"grad_norm": 0.2886882511925012,
|
|
"learning_rate": 3.257425742574258e-05,
|
|
"loss": 0.2138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07093691825866699,
|
|
"step": 330,
|
|
"valid_targets_mean": 5279.6,
|
|
"valid_targets_min": 3339
|
|
},
|
|
{
|
|
"epoch": 0.41477507222451504,
|
|
"grad_norm": 0.26219159937917347,
|
|
"learning_rate": 3.306930693069307e-05,
|
|
"loss": 0.2079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0702129453420639,
|
|
"step": 335,
|
|
"valid_targets_mean": 5462.8,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 0.42096574494428396,
|
|
"grad_norm": 0.24572831374972345,
|
|
"learning_rate": 3.3564356435643566e-05,
|
|
"loss": 0.2075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06763170659542084,
|
|
"step": 340,
|
|
"valid_targets_mean": 5447.9,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 0.4271564176640528,
|
|
"grad_norm": 0.30961088803519143,
|
|
"learning_rate": 3.405940594059406e-05,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07071402668952942,
|
|
"step": 345,
|
|
"valid_targets_mean": 5365.3,
|
|
"valid_targets_min": 2902
|
|
},
|
|
{
|
|
"epoch": 0.4333470903838217,
|
|
"grad_norm": 0.25990210441474304,
|
|
"learning_rate": 3.4554455445544554e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06358982622623444,
|
|
"step": 350,
|
|
"valid_targets_mean": 4987.0,
|
|
"valid_targets_min": 3287
|
|
},
|
|
{
|
|
"epoch": 0.4395377631035906,
|
|
"grad_norm": 0.22746062323846136,
|
|
"learning_rate": 3.5049504950495055e-05,
|
|
"loss": 0.1989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06009838730096817,
|
|
"step": 355,
|
|
"valid_targets_mean": 5047.8,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 0.44572843582335947,
|
|
"grad_norm": 0.22457559704126684,
|
|
"learning_rate": 3.554455445544555e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07244648039340973,
|
|
"step": 360,
|
|
"valid_targets_mean": 5323.9,
|
|
"valid_targets_min": 2401
|
|
},
|
|
{
|
|
"epoch": 0.45191910854312833,
|
|
"grad_norm": 0.2459435446966312,
|
|
"learning_rate": 3.6039603960396043e-05,
|
|
"loss": 0.1951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06363025307655334,
|
|
"step": 365,
|
|
"valid_targets_mean": 5421.0,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 0.45810978126289725,
|
|
"grad_norm": 0.24597432021116275,
|
|
"learning_rate": 3.653465346534654e-05,
|
|
"loss": 0.1956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05958259850740433,
|
|
"step": 370,
|
|
"valid_targets_mean": 5157.3,
|
|
"valid_targets_min": 2787
|
|
},
|
|
{
|
|
"epoch": 0.4643004539826661,
|
|
"grad_norm": 0.25523337156505405,
|
|
"learning_rate": 3.702970297029703e-05,
|
|
"loss": 0.1956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06700345873832703,
|
|
"step": 375,
|
|
"valid_targets_mean": 5285.9,
|
|
"valid_targets_min": 2725
|
|
},
|
|
{
|
|
"epoch": 0.470491126702435,
|
|
"grad_norm": 0.23212833424646626,
|
|
"learning_rate": 3.7524752475247526e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06414772570133209,
|
|
"step": 380,
|
|
"valid_targets_mean": 4471.1,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 0.4766817994222039,
|
|
"grad_norm": 0.2550797368747138,
|
|
"learning_rate": 3.801980198019802e-05,
|
|
"loss": 0.1942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06532502174377441,
|
|
"step": 385,
|
|
"valid_targets_mean": 5323.2,
|
|
"valid_targets_min": 3113
|
|
},
|
|
{
|
|
"epoch": 0.48287247214197276,
|
|
"grad_norm": 0.24872807211853945,
|
|
"learning_rate": 3.8514851485148514e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06627032160758972,
|
|
"step": 390,
|
|
"valid_targets_mean": 5507.8,
|
|
"valid_targets_min": 3054
|
|
},
|
|
{
|
|
"epoch": 0.4890631448617416,
|
|
"grad_norm": 0.2999488197490646,
|
|
"learning_rate": 3.9009900990099015e-05,
|
|
"loss": 0.1946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06143973767757416,
|
|
"step": 395,
|
|
"valid_targets_mean": 5065.1,
|
|
"valid_targets_min": 2739
|
|
},
|
|
{
|
|
"epoch": 0.49525381758151055,
|
|
"grad_norm": 0.30571772692709337,
|
|
"learning_rate": 3.950495049504951e-05,
|
|
"loss": 0.1897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06572841852903366,
|
|
"step": 400,
|
|
"valid_targets_mean": 5267.3,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 0.5014444903012794,
|
|
"grad_norm": 0.22269261776603716,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06665158271789551,
|
|
"step": 405,
|
|
"valid_targets_mean": 5350.7,
|
|
"valid_targets_min": 2809
|
|
},
|
|
{
|
|
"epoch": 0.5076351630210483,
|
|
"grad_norm": 0.4456755132862717,
|
|
"learning_rate": 3.9999813365757024e-05,
|
|
"loss": 0.3948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12805122137069702,
|
|
"step": 410,
|
|
"valid_targets_mean": 2500.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 0.5138258357408172,
|
|
"grad_norm": 0.3620662929238399,
|
|
"learning_rate": 3.9999253466511324e-05,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13281777501106262,
|
|
"step": 415,
|
|
"valid_targets_mean": 2896.2,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 0.520016508460586,
|
|
"grad_norm": 0.34320331126430126,
|
|
"learning_rate": 3.999832031271254e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15719559788703918,
|
|
"step": 420,
|
|
"valid_targets_mean": 3304.2,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 0.5262071811803549,
|
|
"grad_norm": 0.32612391575062233,
|
|
"learning_rate": 3.9997013921776516e-05,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13453912734985352,
|
|
"step": 425,
|
|
"valid_targets_mean": 2892.6,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 0.5323978539001238,
|
|
"grad_norm": 0.33452304770207697,
|
|
"learning_rate": 3.9995334318084974e-05,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338978260755539,
|
|
"step": 430,
|
|
"valid_targets_mean": 2760.3,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 0.5385885266198926,
|
|
"grad_norm": 0.31669204759517733,
|
|
"learning_rate": 3.9993281532985084e-05,
|
|
"loss": 0.3911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1150984913110733,
|
|
"step": 435,
|
|
"valid_targets_mean": 2537.0,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.5447791993396616,
|
|
"grad_norm": 0.29425047475860694,
|
|
"learning_rate": 3.9990855604788836e-05,
|
|
"loss": 0.395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10267926007509232,
|
|
"step": 440,
|
|
"valid_targets_mean": 2513.7,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 0.5509698720594305,
|
|
"grad_norm": 0.295116724633044,
|
|
"learning_rate": 3.998805657877236e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11967551708221436,
|
|
"step": 445,
|
|
"valid_targets_mean": 3243.0,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 0.5571605447791993,
|
|
"grad_norm": 0.32645907020606896,
|
|
"learning_rate": 3.9984884507175065e-05,
|
|
"loss": 0.3873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13591070473194122,
|
|
"step": 450,
|
|
"valid_targets_mean": 3058.7,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 0.5633512174989682,
|
|
"grad_norm": 0.33362040081773897,
|
|
"learning_rate": 3.998133944919867e-05,
|
|
"loss": 0.3857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13289275765419006,
|
|
"step": 455,
|
|
"valid_targets_mean": 2666.9,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 0.5695418902187371,
|
|
"grad_norm": 0.29636779971396265,
|
|
"learning_rate": 3.99774214710061e-05,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12378550320863724,
|
|
"step": 460,
|
|
"valid_targets_mean": 2825.4,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.5757325629385059,
|
|
"grad_norm": 0.3284740000950174,
|
|
"learning_rate": 3.997313064572023e-05,
|
|
"loss": 0.3864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1285189837217331,
|
|
"step": 465,
|
|
"valid_targets_mean": 2946.9,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 0.5819232356582749,
|
|
"grad_norm": 0.36468009048637,
|
|
"learning_rate": 3.9968467053422576e-05,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1238355040550232,
|
|
"step": 470,
|
|
"valid_targets_mean": 2378.3,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 0.5881139083780438,
|
|
"grad_norm": 0.3045144773550056,
|
|
"learning_rate": 3.996343078115172e-05,
|
|
"loss": 0.3748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12378422915935516,
|
|
"step": 475,
|
|
"valid_targets_mean": 3020.7,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.5943045810978126,
|
|
"grad_norm": 0.31310032314681036,
|
|
"learning_rate": 3.995802192290175e-05,
|
|
"loss": 0.3828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11719360947608948,
|
|
"step": 480,
|
|
"valid_targets_mean": 2624.6,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 0.6004952538175815,
|
|
"grad_norm": 0.32175847418488496,
|
|
"learning_rate": 3.9952240579620495e-05,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13276472687721252,
|
|
"step": 485,
|
|
"valid_targets_mean": 2637.3,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 0.6066859265373504,
|
|
"grad_norm": 0.3060985863645244,
|
|
"learning_rate": 3.994608685920761e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12911058962345123,
|
|
"step": 490,
|
|
"valid_targets_mean": 3129.5,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 0.6128765992571192,
|
|
"grad_norm": 0.311015701749235,
|
|
"learning_rate": 3.993956087651259e-05,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14879468083381653,
|
|
"step": 495,
|
|
"valid_targets_mean": 3229.0,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 0.6190672719768882,
|
|
"grad_norm": 0.30849381916217494,
|
|
"learning_rate": 3.993266275333262e-05,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.108841672539711,
|
|
"step": 500,
|
|
"valid_targets_mean": 3065.2,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 0.6252579446966571,
|
|
"grad_norm": 0.35681503971011563,
|
|
"learning_rate": 3.992539261841029e-05,
|
|
"loss": 0.5011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17994052171707153,
|
|
"step": 505,
|
|
"valid_targets_mean": 3194.9,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 0.6314486174164259,
|
|
"grad_norm": 0.33103841446997584,
|
|
"learning_rate": 3.9917750607431236e-05,
|
|
"loss": 0.5511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2287166714668274,
|
|
"step": 510,
|
|
"valid_targets_mean": 4403.2,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 0.6376392901361948,
|
|
"grad_norm": 0.42820448670204536,
|
|
"learning_rate": 3.990973686302153e-05,
|
|
"loss": 0.5368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27759698033332825,
|
|
"step": 515,
|
|
"valid_targets_mean": 2703.6,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 0.6438299628559637,
|
|
"grad_norm": 0.26435801197715686,
|
|
"learning_rate": 3.9901351534745096e-05,
|
|
"loss": 0.5432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15167224407196045,
|
|
"step": 520,
|
|
"valid_targets_mean": 3833.4,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 0.6500206355757325,
|
|
"grad_norm": 0.26394234274180794,
|
|
"learning_rate": 3.9892594779100866e-05,
|
|
"loss": 0.5323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16260398924350739,
|
|
"step": 525,
|
|
"valid_targets_mean": 4071.8,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 0.6562113082955014,
|
|
"grad_norm": 0.3033729403184701,
|
|
"learning_rate": 3.988346675951989e-05,
|
|
"loss": 0.5579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1599036455154419,
|
|
"step": 530,
|
|
"valid_targets_mean": 3823.3,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 0.6624019810152704,
|
|
"grad_norm": 0.29624596434427386,
|
|
"learning_rate": 3.9873967646362264e-05,
|
|
"loss": 0.5272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1791398525238037,
|
|
"step": 535,
|
|
"valid_targets_mean": 3716.2,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 0.6685926537350392,
|
|
"grad_norm": 0.24932495954735678,
|
|
"learning_rate": 3.986409761691398e-05,
|
|
"loss": 0.4967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13532483577728271,
|
|
"step": 540,
|
|
"valid_targets_mean": 4653.4,
|
|
"valid_targets_min": 1378
|
|
},
|
|
{
|
|
"epoch": 0.6747833264548081,
|
|
"grad_norm": 0.34383684075109205,
|
|
"learning_rate": 3.9853856855383575e-05,
|
|
"loss": 0.5597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22093486785888672,
|
|
"step": 545,
|
|
"valid_targets_mean": 4251.1,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 0.680973999174577,
|
|
"grad_norm": 0.2882670484341648,
|
|
"learning_rate": 3.984324555289873e-05,
|
|
"loss": 0.514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1629081815481186,
|
|
"step": 550,
|
|
"valid_targets_mean": 3837.9,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 0.6871646718943458,
|
|
"grad_norm": 0.40548944311423024,
|
|
"learning_rate": 3.9832263907502684e-05,
|
|
"loss": 0.5225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2748474180698395,
|
|
"step": 555,
|
|
"valid_targets_mean": 4099.4,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 0.6933553446141147,
|
|
"grad_norm": 0.27359174156618793,
|
|
"learning_rate": 3.982091212415055e-05,
|
|
"loss": 0.5133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1127590760588646,
|
|
"step": 560,
|
|
"valid_targets_mean": 2985.5,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 0.6995460173338837,
|
|
"grad_norm": 0.25814177905139474,
|
|
"learning_rate": 3.980919041470547e-05,
|
|
"loss": 0.5071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1341688185930252,
|
|
"step": 565,
|
|
"valid_targets_mean": 3816.5,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 0.7057366900536525,
|
|
"grad_norm": 0.3052006481254495,
|
|
"learning_rate": 3.9797098997934686e-05,
|
|
"loss": 0.5331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1798521727323532,
|
|
"step": 570,
|
|
"valid_targets_mean": 3534.6,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 0.7119273627734214,
|
|
"grad_norm": 0.2779292097160102,
|
|
"learning_rate": 3.978463809950544e-05,
|
|
"loss": 0.531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18083035945892334,
|
|
"step": 575,
|
|
"valid_targets_mean": 4482.8,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 0.7181180354931903,
|
|
"grad_norm": 0.28200839372093733,
|
|
"learning_rate": 3.977180795198076e-05,
|
|
"loss": 0.5003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16228215396404266,
|
|
"step": 580,
|
|
"valid_targets_mean": 5174.0,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 0.7243087082129591,
|
|
"grad_norm": 0.3183565656176847,
|
|
"learning_rate": 3.975860879481514e-05,
|
|
"loss": 0.5407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16546174883842468,
|
|
"step": 585,
|
|
"valid_targets_mean": 3754.9,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 0.730499380932728,
|
|
"grad_norm": 0.2943172838770068,
|
|
"learning_rate": 3.974504087435005e-05,
|
|
"loss": 0.5146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18059690296649933,
|
|
"step": 590,
|
|
"valid_targets_mean": 4046.8,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 0.736690053652497,
|
|
"grad_norm": 0.3149624012563377,
|
|
"learning_rate": 3.973110444380934e-05,
|
|
"loss": 0.5053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21076446771621704,
|
|
"step": 595,
|
|
"valid_targets_mean": 4540.8,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 0.7428807263722658,
|
|
"grad_norm": 0.2759353191020033,
|
|
"learning_rate": 3.971679976329452e-05,
|
|
"loss": 0.5182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15601934492588043,
|
|
"step": 600,
|
|
"valid_targets_mean": 4031.6,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 0.7490713990920347,
|
|
"grad_norm": 0.26713089105939264,
|
|
"learning_rate": 3.970212709977994e-05,
|
|
"loss": 0.5186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15477588772773743,
|
|
"step": 605,
|
|
"valid_targets_mean": 4447.7,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 0.7552620718118036,
|
|
"grad_norm": 0.26206437527065074,
|
|
"learning_rate": 3.968708672710772e-05,
|
|
"loss": 0.4619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11552147567272186,
|
|
"step": 610,
|
|
"valid_targets_mean": 6965.9,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 0.7614527445315724,
|
|
"grad_norm": 0.22850864392043216,
|
|
"learning_rate": 3.967167892598272e-05,
|
|
"loss": 0.3576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11083020269870758,
|
|
"step": 615,
|
|
"valid_targets_mean": 6287.7,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 0.7676434172513413,
|
|
"grad_norm": 0.249254230775655,
|
|
"learning_rate": 3.9655903983967284e-05,
|
|
"loss": 0.3611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14337262511253357,
|
|
"step": 620,
|
|
"valid_targets_mean": 6181.2,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 0.7738340899711101,
|
|
"grad_norm": 0.2738158301749309,
|
|
"learning_rate": 3.963976219547583e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10075066238641739,
|
|
"step": 625,
|
|
"valid_targets_mean": 5839.1,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 0.7800247626908791,
|
|
"grad_norm": 0.2295834682991341,
|
|
"learning_rate": 3.962325386176942e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0919828787446022,
|
|
"step": 630,
|
|
"valid_targets_mean": 5992.5,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 0.786215435410648,
|
|
"grad_norm": 0.21850155919956962,
|
|
"learning_rate": 3.960637929095008e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12861928343772888,
|
|
"step": 635,
|
|
"valid_targets_mean": 7167.2,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 0.7924061081304168,
|
|
"grad_norm": 0.2126599471667712,
|
|
"learning_rate": 3.9589138797955087e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09005976468324661,
|
|
"step": 640,
|
|
"valid_targets_mean": 6423.7,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 0.7985967808501857,
|
|
"grad_norm": 0.2396147945889789,
|
|
"learning_rate": 3.957153270455108e-05,
|
|
"loss": 0.3335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0931413322687149,
|
|
"step": 645,
|
|
"valid_targets_mean": 6112.5,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 0.8047874535699546,
|
|
"grad_norm": 0.22795407617510605,
|
|
"learning_rate": 3.955356133932806e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10919217765331268,
|
|
"step": 650,
|
|
"valid_targets_mean": 7196.7,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.8109781262897234,
|
|
"grad_norm": 0.39108388547520034,
|
|
"learning_rate": 3.953522503769322e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07933703064918518,
|
|
"step": 655,
|
|
"valid_targets_mean": 1509.7,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 0.8171687990094924,
|
|
"grad_norm": 0.2250720948950377,
|
|
"learning_rate": 3.951652414186475e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07982674241065979,
|
|
"step": 660,
|
|
"valid_targets_mean": 5713.6,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 0.8233594717292613,
|
|
"grad_norm": 0.21189031354345028,
|
|
"learning_rate": 3.94974590008654e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11800597608089447,
|
|
"step": 665,
|
|
"valid_targets_mean": 7103.9,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 0.8295501444490301,
|
|
"grad_norm": 0.21234226616805732,
|
|
"learning_rate": 3.947802997051599e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08876694738864899,
|
|
"step": 670,
|
|
"valid_targets_mean": 5966.4,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 0.835740817168799,
|
|
"grad_norm": 0.19242332205112833,
|
|
"learning_rate": 3.9458237413428754e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11552058160305023,
|
|
"step": 675,
|
|
"valid_targets_mean": 7561.1,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 0.8419314898885679,
|
|
"grad_norm": 0.2188762186031303,
|
|
"learning_rate": 3.943808169900058e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1803225576877594,
|
|
"step": 680,
|
|
"valid_targets_mean": 7743.8,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 0.8481221626083367,
|
|
"grad_norm": 0.19535976427283863,
|
|
"learning_rate": 3.941756320340613e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1100626215338707,
|
|
"step": 685,
|
|
"valid_targets_mean": 7503.8,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 0.8543128353281056,
|
|
"grad_norm": 0.21312323294511204,
|
|
"learning_rate": 3.939668230959078e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10139448940753937,
|
|
"step": 690,
|
|
"valid_targets_mean": 5711.1,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 0.8605035080478746,
|
|
"grad_norm": 0.22619492142655012,
|
|
"learning_rate": 3.937543940726351e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09439124912023544,
|
|
"step": 695,
|
|
"valid_targets_mean": 5931.0,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 0.8666941807676434,
|
|
"grad_norm": 0.21322322651700568,
|
|
"learning_rate": 3.935383489288963e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14434558153152466,
|
|
"step": 700,
|
|
"valid_targets_mean": 7564.3,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 0.8728848534874123,
|
|
"grad_norm": 0.31984990541996194,
|
|
"learning_rate": 3.933186916968336e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08168262243270874,
|
|
"step": 705,
|
|
"valid_targets_mean": 4701.6,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 0.8790755262071812,
|
|
"grad_norm": 0.2593895549056664,
|
|
"learning_rate": 3.93095426476003e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07684122025966644,
|
|
"step": 710,
|
|
"valid_targets_mean": 5168.5,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 0.88526619892695,
|
|
"grad_norm": 0.2439275212924497,
|
|
"learning_rate": 3.9286855743329824e-05,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08285287022590637,
|
|
"step": 715,
|
|
"valid_targets_mean": 5513.5,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 0.8914568716467189,
|
|
"grad_norm": 0.2970477862326248,
|
|
"learning_rate": 3.926380888028723e-05,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07643958926200867,
|
|
"step": 720,
|
|
"valid_targets_mean": 5370.2,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 0.8976475443664879,
|
|
"grad_norm": 0.2630275100732389,
|
|
"learning_rate": 3.9240402488605904e-05,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07292252033948898,
|
|
"step": 725,
|
|
"valid_targets_mean": 4751.9,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 0.9038382170862567,
|
|
"grad_norm": 0.23499909807147012,
|
|
"learning_rate": 3.9216637005129274e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08045721054077148,
|
|
"step": 730,
|
|
"valid_targets_mean": 5578.0,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 0.9100288898060256,
|
|
"grad_norm": 0.22286340307728442,
|
|
"learning_rate": 3.919251287340265e-05,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08224225044250488,
|
|
"step": 735,
|
|
"valid_targets_mean": 5186.3,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 0.9162195625257945,
|
|
"grad_norm": 0.22861299278884095,
|
|
"learning_rate": 3.916803054366492e-05,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07119175791740417,
|
|
"step": 740,
|
|
"valid_targets_mean": 4736.5,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 0.9224102352455633,
|
|
"grad_norm": 0.20402215614710315,
|
|
"learning_rate": 3.9143190472840206e-05,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06836479157209396,
|
|
"step": 745,
|
|
"valid_targets_mean": 4341.2,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 0.9286009079653322,
|
|
"grad_norm": 0.22426315283193549,
|
|
"learning_rate": 3.911799312452928e-05,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08552255481481552,
|
|
"step": 750,
|
|
"valid_targets_mean": 5612.3,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 0.9347915806851012,
|
|
"grad_norm": 0.21785283245490633,
|
|
"learning_rate": 3.909243896900095e-05,
|
|
"loss": 0.212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07895369082689285,
|
|
"step": 755,
|
|
"valid_targets_mean": 5527.5,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 0.94098225340487,
|
|
"grad_norm": 0.31753106076641646,
|
|
"learning_rate": 3.9066528483183256e-05,
|
|
"loss": 0.2153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06483040004968643,
|
|
"step": 760,
|
|
"valid_targets_mean": 4836.5,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 0.9471729261246389,
|
|
"grad_norm": 0.2235742117838112,
|
|
"learning_rate": 3.90402621506546e-05,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07146476954221725,
|
|
"step": 765,
|
|
"valid_targets_mean": 5341.6,
|
|
"valid_targets_min": 211
|
|
},
|
|
{
|
|
"epoch": 0.9533635988444078,
|
|
"grad_norm": 0.24232316650639799,
|
|
"learning_rate": 3.901364046163468e-05,
|
|
"loss": 0.2119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07449258863925934,
|
|
"step": 770,
|
|
"valid_targets_mean": 5475.3,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 0.9595542715641766,
|
|
"grad_norm": 0.21935116207173305,
|
|
"learning_rate": 3.898666391297538e-05,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07074664533138275,
|
|
"step": 775,
|
|
"valid_targets_mean": 4993.7,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 0.9657449442839455,
|
|
"grad_norm": 0.24948372163077012,
|
|
"learning_rate": 3.8959333008151474e-05,
|
|
"loss": 0.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07407493889331818,
|
|
"step": 780,
|
|
"valid_targets_mean": 5846.2,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 0.9719356170037144,
|
|
"grad_norm": 0.2301833841498614,
|
|
"learning_rate": 3.893164825725124e-05,
|
|
"loss": 0.2138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06988928467035294,
|
|
"step": 785,
|
|
"valid_targets_mean": 5926.5,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 0.9781262897234833,
|
|
"grad_norm": 0.2400888054860897,
|
|
"learning_rate": 3.890361017696691e-05,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07038062810897827,
|
|
"step": 790,
|
|
"valid_targets_mean": 5794.1,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 0.9843169624432522,
|
|
"grad_norm": 0.25158328929517476,
|
|
"learning_rate": 3.88752192905851e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08624989539384842,
|
|
"step": 795,
|
|
"valid_targets_mean": 3965.2,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 0.9905076351630211,
|
|
"grad_norm": 0.23088904065518467,
|
|
"learning_rate": 3.8846476127976943e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.075506791472435,
|
|
"step": 800,
|
|
"valid_targets_mean": 5186.0,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 0.9966983078827899,
|
|
"grad_norm": 0.2040862399075733,
|
|
"learning_rate": 3.881738122558829e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07165488600730896,
|
|
"step": 805,
|
|
"valid_targets_mean": 5449.4,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 1.0024762690879077,
|
|
"grad_norm": 1.7630021478199602,
|
|
"learning_rate": 3.8787935126429667e-05,
|
|
"loss": 0.4301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19306975603103638,
|
|
"step": 810,
|
|
"valid_targets_mean": 6370.5,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 1.0086669418076764,
|
|
"grad_norm": 0.7302939969915765,
|
|
"learning_rate": 3.8758138380066084e-05,
|
|
"loss": 0.5838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2058759182691574,
|
|
"step": 815,
|
|
"valid_targets_mean": 7651.2,
|
|
"valid_targets_min": 2581
|
|
},
|
|
{
|
|
"epoch": 1.0148576145274453,
|
|
"grad_norm": 0.4801071260106032,
|
|
"learning_rate": 3.872799154260689e-05,
|
|
"loss": 0.5095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1718531847000122,
|
|
"step": 820,
|
|
"valid_targets_mean": 7917.3,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 1.0210482872472142,
|
|
"grad_norm": 0.33367145790459013,
|
|
"learning_rate": 3.8697495176695274e-05,
|
|
"loss": 0.4799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1657310277223587,
|
|
"step": 825,
|
|
"valid_targets_mean": 7465.7,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 1.0272389599669831,
|
|
"grad_norm": 0.28146614916461793,
|
|
"learning_rate": 3.866664985149788e-05,
|
|
"loss": 0.4498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14047694206237793,
|
|
"step": 830,
|
|
"valid_targets_mean": 6986.5,
|
|
"valid_targets_min": 2745
|
|
},
|
|
{
|
|
"epoch": 1.033429632686752,
|
|
"grad_norm": 0.25897876592592867,
|
|
"learning_rate": 3.863545614269408e-05,
|
|
"loss": 0.4422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12455333024263382,
|
|
"step": 835,
|
|
"valid_targets_mean": 6226.8,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 1.0396203054065207,
|
|
"grad_norm": 0.26501076566135745,
|
|
"learning_rate": 3.8603914632465315e-05,
|
|
"loss": 0.4334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12873420119285583,
|
|
"step": 840,
|
|
"valid_targets_mean": 6504.0,
|
|
"valid_targets_min": 3466
|
|
},
|
|
{
|
|
"epoch": 1.0458109781262896,
|
|
"grad_norm": 0.23724447599190418,
|
|
"learning_rate": 3.8572025909484156e-05,
|
|
"loss": 0.4237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1439080685377121,
|
|
"step": 845,
|
|
"valid_targets_mean": 7776.8,
|
|
"valid_targets_min": 2749
|
|
},
|
|
{
|
|
"epoch": 1.0520016508460586,
|
|
"grad_norm": 0.2573401738922151,
|
|
"learning_rate": 3.853979056890339e-05,
|
|
"loss": 0.4151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12129548192024231,
|
|
"step": 850,
|
|
"valid_targets_mean": 6354.8,
|
|
"valid_targets_min": 1941
|
|
},
|
|
{
|
|
"epoch": 1.0581923235658275,
|
|
"grad_norm": 0.26389482309032825,
|
|
"learning_rate": 3.850720921234484e-05,
|
|
"loss": 0.414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11435656994581223,
|
|
"step": 855,
|
|
"valid_targets_mean": 6384.4,
|
|
"valid_targets_min": 2441
|
|
},
|
|
{
|
|
"epoch": 1.0643829962855964,
|
|
"grad_norm": 0.26365827044130996,
|
|
"learning_rate": 3.847428244788818e-05,
|
|
"loss": 0.4107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13937512040138245,
|
|
"step": 860,
|
|
"valid_targets_mean": 7465.9,
|
|
"valid_targets_min": 2228
|
|
},
|
|
{
|
|
"epoch": 1.0705736690053653,
|
|
"grad_norm": 0.24656739628215865,
|
|
"learning_rate": 3.8441010890059606e-05,
|
|
"loss": 0.3906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12065096944570541,
|
|
"step": 865,
|
|
"valid_targets_mean": 8703.7,
|
|
"valid_targets_min": 4392
|
|
},
|
|
{
|
|
"epoch": 1.0767643417251342,
|
|
"grad_norm": 0.22034153375853116,
|
|
"learning_rate": 3.840739515982031e-05,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11956725269556046,
|
|
"step": 870,
|
|
"valid_targets_mean": 9525.9,
|
|
"valid_targets_min": 3807
|
|
},
|
|
{
|
|
"epoch": 1.082955014444903,
|
|
"grad_norm": 0.23000033951797325,
|
|
"learning_rate": 3.837343588455493e-05,
|
|
"loss": 0.3774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12482760101556778,
|
|
"step": 875,
|
|
"valid_targets_mean": 8976.7,
|
|
"valid_targets_min": 2713
|
|
},
|
|
{
|
|
"epoch": 1.0891456871646719,
|
|
"grad_norm": 0.22260692032858995,
|
|
"learning_rate": 3.8339133698059825e-05,
|
|
"loss": 0.3727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12678372859954834,
|
|
"step": 880,
|
|
"valid_targets_mean": 9808.0,
|
|
"valid_targets_min": 5333
|
|
},
|
|
{
|
|
"epoch": 1.0953363598844408,
|
|
"grad_norm": 0.21034027833388624,
|
|
"learning_rate": 3.830448924053126e-05,
|
|
"loss": 0.3719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11682012677192688,
|
|
"step": 885,
|
|
"valid_targets_mean": 8583.8,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 1.1015270326042097,
|
|
"grad_norm": 0.22731438168208315,
|
|
"learning_rate": 3.826950315855344e-05,
|
|
"loss": 0.3665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11874133348464966,
|
|
"step": 890,
|
|
"valid_targets_mean": 8976.7,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 1.1077177053239786,
|
|
"grad_norm": 0.21120147161745279,
|
|
"learning_rate": 3.823417610508647e-05,
|
|
"loss": 0.3736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12308643013238907,
|
|
"step": 895,
|
|
"valid_targets_mean": 8862.6,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 1.1139083780437473,
|
|
"grad_norm": 0.24881540692125995,
|
|
"learning_rate": 3.8198508739454124e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12277589738368988,
|
|
"step": 900,
|
|
"valid_targets_mean": 9068.4,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 1.1200990507635162,
|
|
"grad_norm": 0.20809536029038844,
|
|
"learning_rate": 3.816250172733159e-05,
|
|
"loss": 0.3655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13011404871940613,
|
|
"step": 905,
|
|
"valid_targets_mean": 10089.5,
|
|
"valid_targets_min": 5419
|
|
},
|
|
{
|
|
"epoch": 1.1262897234832852,
|
|
"grad_norm": 0.23605136902890966,
|
|
"learning_rate": 3.812615574073301e-05,
|
|
"loss": 0.362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11580745875835419,
|
|
"step": 910,
|
|
"valid_targets_mean": 9003.8,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 1.132480396203054,
|
|
"grad_norm": 0.3076907249685064,
|
|
"learning_rate": 3.8089471457998943e-05,
|
|
"loss": 0.3613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12416087090969086,
|
|
"step": 915,
|
|
"valid_targets_mean": 10012.9,
|
|
"valid_targets_min": 5197
|
|
},
|
|
{
|
|
"epoch": 1.138671068922823,
|
|
"grad_norm": 0.23367073072605501,
|
|
"learning_rate": 3.8052449563783736e-05,
|
|
"loss": 0.3593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12230054289102554,
|
|
"step": 920,
|
|
"valid_targets_mean": 9826.7,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.144861741642592,
|
|
"grad_norm": 0.25569850202155114,
|
|
"learning_rate": 3.801509074904271e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10747244954109192,
|
|
"step": 925,
|
|
"valid_targets_mean": 8842.1,
|
|
"valid_targets_min": 4239
|
|
},
|
|
{
|
|
"epoch": 1.1510524143623608,
|
|
"grad_norm": 0.22154145845724915,
|
|
"learning_rate": 3.797739571101926e-05,
|
|
"loss": 0.3607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10909201949834824,
|
|
"step": 930,
|
|
"valid_targets_mean": 9048.3,
|
|
"valid_targets_min": 5085
|
|
},
|
|
{
|
|
"epoch": 1.1572430870821295,
|
|
"grad_norm": 0.26707377134531735,
|
|
"learning_rate": 3.7939365153231895e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11631858348846436,
|
|
"step": 935,
|
|
"valid_targets_mean": 9484.0,
|
|
"valid_targets_min": 4512
|
|
},
|
|
{
|
|
"epoch": 1.1634337598018984,
|
|
"grad_norm": 0.2074352043631802,
|
|
"learning_rate": 3.7900999785461044e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1200384646654129,
|
|
"step": 940,
|
|
"valid_targets_mean": 9844.3,
|
|
"valid_targets_min": 3863
|
|
},
|
|
{
|
|
"epoch": 1.1696244325216674,
|
|
"grad_norm": 0.20424035685064698,
|
|
"learning_rate": 3.7862300323735835e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10674324631690979,
|
|
"step": 945,
|
|
"valid_targets_mean": 9963.1,
|
|
"valid_targets_min": 5225
|
|
},
|
|
{
|
|
"epoch": 1.1758151052414363,
|
|
"grad_norm": 0.20063264429995356,
|
|
"learning_rate": 3.782326749032075e-05,
|
|
"loss": 0.3516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1060045063495636,
|
|
"step": 950,
|
|
"valid_targets_mean": 9133.7,
|
|
"valid_targets_min": 2782
|
|
},
|
|
{
|
|
"epoch": 1.1820057779612052,
|
|
"grad_norm": 0.22574347482223645,
|
|
"learning_rate": 3.778390201370212e-05,
|
|
"loss": 0.3554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12533429265022278,
|
|
"step": 955,
|
|
"valid_targets_mean": 9841.2,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 1.188196450680974,
|
|
"grad_norm": 0.21439338477404551,
|
|
"learning_rate": 3.774420462857454e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10613439977169037,
|
|
"step": 960,
|
|
"valid_targets_mean": 9260.1,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 1.1943871234007428,
|
|
"grad_norm": 0.22538495559329547,
|
|
"learning_rate": 3.7704176075827153e-05,
|
|
"loss": 0.3552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11882612854242325,
|
|
"step": 965,
|
|
"valid_targets_mean": 9212.2,
|
|
"valid_targets_min": 3467
|
|
},
|
|
{
|
|
"epoch": 1.2005777961205117,
|
|
"grad_norm": 0.22670012431967673,
|
|
"learning_rate": 3.766381710252981e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1194467544555664,
|
|
"step": 970,
|
|
"valid_targets_mean": 10565.9,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 1.2067684688402807,
|
|
"grad_norm": 0.23891769544346636,
|
|
"learning_rate": 3.762312846191917e-05,
|
|
"loss": 0.3525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12034593522548676,
|
|
"step": 975,
|
|
"valid_targets_mean": 10235.8,
|
|
"valid_targets_min": 5141
|
|
},
|
|
{
|
|
"epoch": 1.2129591415600496,
|
|
"grad_norm": 0.21539592342813615,
|
|
"learning_rate": 3.758211091338459e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12258360534906387,
|
|
"step": 980,
|
|
"valid_targets_mean": 10594.5,
|
|
"valid_targets_min": 4701
|
|
},
|
|
{
|
|
"epoch": 1.2191498142798185,
|
|
"grad_norm": 0.2048119738142403,
|
|
"learning_rate": 3.754076522245398e-05,
|
|
"loss": 0.3427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10987658053636551,
|
|
"step": 985,
|
|
"valid_targets_mean": 9161.7,
|
|
"valid_targets_min": 3022
|
|
},
|
|
{
|
|
"epoch": 1.2253404869995872,
|
|
"grad_norm": 0.20851692573570163,
|
|
"learning_rate": 3.749909216077952e-05,
|
|
"loss": 0.3466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11145468801259995,
|
|
"step": 990,
|
|
"valid_targets_mean": 9593.7,
|
|
"valid_targets_min": 5120
|
|
},
|
|
{
|
|
"epoch": 1.2315311597193561,
|
|
"grad_norm": 0.20408765701685788,
|
|
"learning_rate": 3.745709250612323e-05,
|
|
"loss": 0.3399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10666057467460632,
|
|
"step": 995,
|
|
"valid_targets_mean": 9896.2,
|
|
"valid_targets_min": 3759
|
|
},
|
|
{
|
|
"epoch": 1.237721832439125,
|
|
"grad_norm": 0.22652706592707528,
|
|
"learning_rate": 3.7414767042342495e-05,
|
|
"loss": 0.3456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11425893008708954,
|
|
"step": 1000,
|
|
"valid_targets_mean": 9676.5,
|
|
"valid_targets_min": 4171
|
|
},
|
|
{
|
|
"epoch": 1.243912505158894,
|
|
"grad_norm": 0.2980670458684472,
|
|
"learning_rate": 3.73721165593754e-05,
|
|
"loss": 0.3712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11625061929225922,
|
|
"step": 1005,
|
|
"valid_targets_mean": 6269.3,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 1.2501031778786629,
|
|
"grad_norm": 0.267703764827564,
|
|
"learning_rate": 3.7329141853226005e-05,
|
|
"loss": 0.3752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1270488202571869,
|
|
"step": 1010,
|
|
"valid_targets_mean": 6197.9,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 1.2562938505984316,
|
|
"grad_norm": 0.9694772434591165,
|
|
"learning_rate": 3.7285843725949485e-05,
|
|
"loss": 0.7188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950109839439392,
|
|
"step": 1015,
|
|
"valid_targets_mean": 5028.6,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 1.2624845233182005,
|
|
"grad_norm": 0.5384083569137281,
|
|
"learning_rate": 3.724222298563717e-05,
|
|
"loss": 0.6742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20008735358715057,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3933.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 1.2686751960379694,
|
|
"grad_norm": 0.5042477665378176,
|
|
"learning_rate": 3.719828044640143e-05,
|
|
"loss": 0.664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18917982280254364,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3912.4,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 1.2748658687577383,
|
|
"grad_norm": 0.31673074891045633,
|
|
"learning_rate": 3.7154016928360525e-05,
|
|
"loss": 0.6258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2199656218290329,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3427.5,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.2810565414775072,
|
|
"grad_norm": 0.2808895453809632,
|
|
"learning_rate": 3.710943325762328e-05,
|
|
"loss": 0.6169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1730005443096161,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4285.9,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 1.2872472141972762,
|
|
"grad_norm": 0.2844817790441733,
|
|
"learning_rate": 3.7064530266273656e-05,
|
|
"loss": 0.6155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23248422145843506,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4571.2,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 1.293437886917045,
|
|
"grad_norm": 0.28448121540718235,
|
|
"learning_rate": 3.701930879235522e-05,
|
|
"loss": 0.6004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17288516461849213,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3360.0,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 1.2996285596368138,
|
|
"grad_norm": 0.28650839104845166,
|
|
"learning_rate": 3.6973769679855535e-05,
|
|
"loss": 0.615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18856051564216614,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3748.8,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 1.3058192323565827,
|
|
"grad_norm": 0.2603254577522934,
|
|
"learning_rate": 3.692791377869039e-05,
|
|
"loss": 0.591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25995659828186035,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4315.9,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 1.3120099050763516,
|
|
"grad_norm": 0.2564346837875043,
|
|
"learning_rate": 3.6881741944687906e-05,
|
|
"loss": 0.5791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2046835720539093,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5165.7,
|
|
"valid_targets_min": 1332
|
|
},
|
|
{
|
|
"epoch": 1.3182005777961205,
|
|
"grad_norm": 0.2667942792625742,
|
|
"learning_rate": 3.6835255039572634e-05,
|
|
"loss": 0.5957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17468014359474182,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4124.3,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 1.3243912505158895,
|
|
"grad_norm": 0.29346833466980493,
|
|
"learning_rate": 3.678845393094939e-05,
|
|
"loss": 0.5889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20234981179237366,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4181.1,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 1.3305819232356582,
|
|
"grad_norm": 0.29300595930318796,
|
|
"learning_rate": 3.674133949228713e-05,
|
|
"loss": 0.5689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17362770438194275,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3683.8,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 1.336772595955427,
|
|
"grad_norm": 0.2859110400782711,
|
|
"learning_rate": 3.6693912602902606e-05,
|
|
"loss": 0.6004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845950424671173,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4964.7,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 1.342963268675196,
|
|
"grad_norm": 0.27596948595865967,
|
|
"learning_rate": 3.664617414794399e-05,
|
|
"loss": 0.5722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19574898481369019,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4663.9,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.349153941394965,
|
|
"grad_norm": 0.2895490697583482,
|
|
"learning_rate": 3.659812501837431e-05,
|
|
"loss": 0.6153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20816469192504883,
|
|
"step": 1090,
|
|
"valid_targets_mean": 5404.8,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 1.3553446141147338,
|
|
"grad_norm": 0.28639847631680787,
|
|
"learning_rate": 3.654976611095487e-05,
|
|
"loss": 0.5815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18278181552886963,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4281.5,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 1.3615352868345028,
|
|
"grad_norm": 0.32278892391603276,
|
|
"learning_rate": 3.650109832822847e-05,
|
|
"loss": 0.5852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22262981534004211,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4570.4,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 1.3677259595542717,
|
|
"grad_norm": 0.3059183945863716,
|
|
"learning_rate": 3.6452122578502595e-05,
|
|
"loss": 0.5966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22158236801624298,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4505.6,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 1.3739166322740404,
|
|
"grad_norm": 0.2647416293906054,
|
|
"learning_rate": 3.640283977583243e-05,
|
|
"loss": 0.5692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.157904252409935,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4379.0,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 1.3801073049938093,
|
|
"grad_norm": 0.28694663429772727,
|
|
"learning_rate": 3.635325084000384e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062404341995716095,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5018.2,
|
|
"valid_targets_min": 2523
|
|
},
|
|
{
|
|
"epoch": 1.3862979777135782,
|
|
"grad_norm": 0.2112342930737145,
|
|
"learning_rate": 3.630335669651618e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05750882253050804,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5262.3,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 1.3924886504333471,
|
|
"grad_norm": 0.20377507261249056,
|
|
"learning_rate": 3.6253158276565004e-05,
|
|
"loss": 0.1781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05253617838025093,
|
|
"step": 1125,
|
|
"valid_targets_mean": 5112.8,
|
|
"valid_targets_min": 3095
|
|
},
|
|
{
|
|
"epoch": 1.398679323153116,
|
|
"grad_norm": 0.19073688837170974,
|
|
"learning_rate": 3.620265651702474e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05561065301299095,
|
|
"step": 1130,
|
|
"valid_targets_mean": 5352.1,
|
|
"valid_targets_min": 2034
|
|
},
|
|
{
|
|
"epoch": 1.4048699958728847,
|
|
"grad_norm": 0.20066498405659405,
|
|
"learning_rate": 3.615185236043115e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06021295487880707,
|
|
"step": 1135,
|
|
"valid_targets_mean": 5610.1,
|
|
"valid_targets_min": 3069
|
|
},
|
|
{
|
|
"epoch": 1.4110606685926537,
|
|
"grad_norm": 0.19826660969874396,
|
|
"learning_rate": 3.6100746754963756e-05,
|
|
"loss": 0.1734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05546848475933075,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5206.3,
|
|
"valid_targets_min": 3294
|
|
},
|
|
{
|
|
"epoch": 1.4172513413124226,
|
|
"grad_norm": 0.21785904153172483,
|
|
"learning_rate": 3.604934065442817e-05,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057219430804252625,
|
|
"step": 1145,
|
|
"valid_targets_mean": 5573.9,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 1.4234420140321915,
|
|
"grad_norm": 0.20799133262388406,
|
|
"learning_rate": 3.599763501823824e-05,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058664433658123016,
|
|
"step": 1150,
|
|
"valid_targets_mean": 5469.3,
|
|
"valid_targets_min": 3506
|
|
},
|
|
{
|
|
"epoch": 1.4296326867519604,
|
|
"grad_norm": 0.20037711630702568,
|
|
"learning_rate": 3.594563081139821e-05,
|
|
"loss": 0.1701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057093121111392975,
|
|
"step": 1155,
|
|
"valid_targets_mean": 5436.2,
|
|
"valid_targets_min": 2886
|
|
},
|
|
{
|
|
"epoch": 1.4358233594717293,
|
|
"grad_norm": 0.20061606095568418,
|
|
"learning_rate": 3.589332900448465e-05,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06141910329461098,
|
|
"step": 1160,
|
|
"valid_targets_mean": 5540.5,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 1.4420140321914983,
|
|
"grad_norm": 0.1909973154449887,
|
|
"learning_rate": 3.584073057362836e-05,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05757623538374901,
|
|
"step": 1165,
|
|
"valid_targets_mean": 5831.1,
|
|
"valid_targets_min": 2745
|
|
},
|
|
{
|
|
"epoch": 1.448204704911267,
|
|
"grad_norm": 0.22502361864492065,
|
|
"learning_rate": 3.578783650049621e-05,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05193805322051048,
|
|
"step": 1170,
|
|
"valid_targets_mean": 5347.5,
|
|
"valid_targets_min": 2417
|
|
},
|
|
{
|
|
"epoch": 1.4543953776310359,
|
|
"grad_norm": 0.2215900040664804,
|
|
"learning_rate": 3.57346477722727e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053193263709545135,
|
|
"step": 1175,
|
|
"valid_targets_mean": 5258.5,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.4605860503508048,
|
|
"grad_norm": 0.21819446442583929,
|
|
"learning_rate": 3.568116538164163e-05,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0533098429441452,
|
|
"step": 1180,
|
|
"valid_targets_mean": 5099.8,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 1.4667767230705737,
|
|
"grad_norm": 0.20821665548119334,
|
|
"learning_rate": 3.562739032676756e-05,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05742804706096649,
|
|
"step": 1185,
|
|
"valid_targets_mean": 5578.4,
|
|
"valid_targets_min": 2798
|
|
},
|
|
{
|
|
"epoch": 1.4729673957903424,
|
|
"grad_norm": 0.18928627752955723,
|
|
"learning_rate": 3.557332361127716e-05,
|
|
"loss": 0.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05045832693576813,
|
|
"step": 1190,
|
|
"valid_targets_mean": 5036.6,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 1.4791580685101113,
|
|
"grad_norm": 0.19030607675659,
|
|
"learning_rate": 3.551896624424048e-05,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05345331132411957,
|
|
"step": 1195,
|
|
"valid_targets_mean": 5270.8,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 1.4853487412298803,
|
|
"grad_norm": 0.21748995509964933,
|
|
"learning_rate": 3.5464319240152107e-05,
|
|
"loss": 0.1665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05382663384079933,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5462.3,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 1.4915394139496492,
|
|
"grad_norm": 0.20949334512204829,
|
|
"learning_rate": 3.5409383618912284e-05,
|
|
"loss": 0.1672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051436129957437515,
|
|
"step": 1205,
|
|
"valid_targets_mean": 5422.2,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 1.497730086669418,
|
|
"grad_norm": 0.20448576103587413,
|
|
"learning_rate": 3.535416040580781e-05,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055137235671281815,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5195.3,
|
|
"valid_targets_min": 3110
|
|
},
|
|
{
|
|
"epoch": 1.503920759389187,
|
|
"grad_norm": 0.6520644202794976,
|
|
"learning_rate": 3.529865063149296e-05,
|
|
"loss": 0.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12050806730985641,
|
|
"step": 1215,
|
|
"valid_targets_mean": 2715.2,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 1.510111432108956,
|
|
"grad_norm": 0.3622349748568696,
|
|
"learning_rate": 3.5242855331970174e-05,
|
|
"loss": 0.3524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12519493699073792,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3519.0,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 1.5163021048287249,
|
|
"grad_norm": 0.3642324925263967,
|
|
"learning_rate": 3.5186775548570824e-05,
|
|
"loss": 0.3305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1077585369348526,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2402.4,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.5224927775484935,
|
|
"grad_norm": 0.3347523624795357,
|
|
"learning_rate": 3.5130412327935695e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11616308242082596,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2721.7,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 1.5286834502682625,
|
|
"grad_norm": 0.30162439452035616,
|
|
"learning_rate": 3.5073766721995485e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09673435986042023,
|
|
"step": 1235,
|
|
"valid_targets_mean": 2738.3,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 1.5348741229880314,
|
|
"grad_norm": 0.30902366982913926,
|
|
"learning_rate": 3.5016839787951186e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10775735229253769,
|
|
"step": 1240,
|
|
"valid_targets_mean": 2599.6,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 1.5410647957078,
|
|
"grad_norm": 0.30278727099934105,
|
|
"learning_rate": 3.49596325882543e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12161455303430557,
|
|
"step": 1245,
|
|
"valid_targets_mean": 2981.7,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 1.547255468427569,
|
|
"grad_norm": 0.2902323040664705,
|
|
"learning_rate": 3.490214619058709e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11328007280826569,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3400.5,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 1.553446141147338,
|
|
"grad_norm": 0.29961988798009503,
|
|
"learning_rate": 3.484438166784257e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10336589068174362,
|
|
"step": 1255,
|
|
"valid_targets_mean": 2426.8,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 1.5596368138671068,
|
|
"grad_norm": 0.2737869718112754,
|
|
"learning_rate": 3.478634009810455e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1028667539358139,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3239.3,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 1.5658274865868758,
|
|
"grad_norm": 0.31863025553411795,
|
|
"learning_rate": 3.4728022564627464e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09025024622678757,
|
|
"step": 1265,
|
|
"valid_targets_mean": 2395.9,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 1.5720181593066447,
|
|
"grad_norm": 0.3051712508780277,
|
|
"learning_rate": 3.466943015581619e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09216859936714172,
|
|
"step": 1270,
|
|
"valid_targets_mean": 2764.9,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 1.5782088320264136,
|
|
"grad_norm": 0.3124821916723884,
|
|
"learning_rate": 3.46105639652057e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10129411518573761,
|
|
"step": 1275,
|
|
"valid_targets_mean": 2182.6,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 1.5843995047461825,
|
|
"grad_norm": 0.2747575943996048,
|
|
"learning_rate": 3.455142509144071e-05,
|
|
"loss": 0.323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09187658876180649,
|
|
"step": 1280,
|
|
"valid_targets_mean": 2757.8,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 1.5905901774659514,
|
|
"grad_norm": 0.30539737912804876,
|
|
"learning_rate": 3.4492014638255094e-05,
|
|
"loss": 0.3176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09385639429092407,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2362.7,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 1.5967808501857201,
|
|
"grad_norm": 0.3064244752963847,
|
|
"learning_rate": 3.443233371445135e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10986920446157455,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3076.5,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 1.602971522905489,
|
|
"grad_norm": 0.28367385873270917,
|
|
"learning_rate": 3.437238343387989e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09152495115995407,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3429.8,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 1.609162195625258,
|
|
"grad_norm": 0.35040257975444944,
|
|
"learning_rate": 3.4312164915418235e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11502495408058167,
|
|
"step": 1300,
|
|
"valid_targets_mean": 2517.6,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 1.6153528683450267,
|
|
"grad_norm": 0.32411177469231334,
|
|
"learning_rate": 3.425167928295015e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09927357733249664,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2430.2,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 1.6215435410647956,
|
|
"grad_norm": 0.36777730943728837,
|
|
"learning_rate": 3.4190927665344636e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15405333042144775,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3270.8,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 1.6277342137845645,
|
|
"grad_norm": 0.3265250537156614,
|
|
"learning_rate": 3.4129911196434926e-05,
|
|
"loss": 0.4918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13666336238384247,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4209.8,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 1.6339248865043334,
|
|
"grad_norm": 0.260325971067913,
|
|
"learning_rate": 3.406863101499727e-05,
|
|
"loss": 0.4825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13777384161949158,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4164.6,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 1.6401155592241023,
|
|
"grad_norm": 0.32666922704675544,
|
|
"learning_rate": 3.4007088264729694e-05,
|
|
"loss": 0.4889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12942048907279968,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2817.8,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 1.6463062319438713,
|
|
"grad_norm": 0.27095694488376304,
|
|
"learning_rate": 3.394528409423066e-05,
|
|
"loss": 0.4792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1661018431186676,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4294.8,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 1.6524969046636402,
|
|
"grad_norm": 0.22582745581767177,
|
|
"learning_rate": 3.388321965697761e-05,
|
|
"loss": 0.4477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259889304637909,
|
|
"step": 1335,
|
|
"valid_targets_mean": 5346.6,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 1.658687577383409,
|
|
"grad_norm": 0.2558877002340073,
|
|
"learning_rate": 3.3820896111305485e-05,
|
|
"loss": 0.5239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21825307607650757,
|
|
"step": 1340,
|
|
"valid_targets_mean": 4882.9,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 1.664878250103178,
|
|
"grad_norm": 0.26746574312840565,
|
|
"learning_rate": 3.3758314620385074e-05,
|
|
"loss": 0.458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16172057390213013,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3881.3,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 1.6710689228229467,
|
|
"grad_norm": 0.28132586028625306,
|
|
"learning_rate": 3.3695476352201264e-05,
|
|
"loss": 0.4671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20941264927387238,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4113.5,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 1.6772595955427156,
|
|
"grad_norm": 0.270433614074323,
|
|
"learning_rate": 3.3632382479531336e-05,
|
|
"loss": 0.4713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14134955406188965,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3982.2,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 1.6834502682624846,
|
|
"grad_norm": 0.28061529895054244,
|
|
"learning_rate": 3.3569034179922994e-05,
|
|
"loss": 0.4664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15205073356628418,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4060.7,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 1.6896409409822533,
|
|
"grad_norm": 0.295734808258396,
|
|
"learning_rate": 3.350543263567246e-05,
|
|
"loss": 0.4744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16960671544075012,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3746.6,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.6958316137020222,
|
|
"grad_norm": 0.2896276743663762,
|
|
"learning_rate": 3.3441579033802304e-05,
|
|
"loss": 0.455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16290168464183807,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3811.0,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 1.702022286421791,
|
|
"grad_norm": 0.27142254729041365,
|
|
"learning_rate": 3.3377474566039415e-05,
|
|
"loss": 0.4479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12198525667190552,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3178.8,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 1.70821295914156,
|
|
"grad_norm": 0.26022743254910097,
|
|
"learning_rate": 3.331312042879266e-05,
|
|
"loss": 0.4984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15562951564788818,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3965.6,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 1.714403631861329,
|
|
"grad_norm": 0.306672891420966,
|
|
"learning_rate": 3.3248517823130625e-05,
|
|
"loss": 0.4558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14392998814582825,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3233.6,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 1.7205943045810979,
|
|
"grad_norm": 0.291244382297998,
|
|
"learning_rate": 3.318366795475913e-05,
|
|
"loss": 0.49,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.141901433467865,
|
|
"step": 1390,
|
|
"valid_targets_mean": 3789.5,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 1.7267849773008668,
|
|
"grad_norm": 0.31014859524520666,
|
|
"learning_rate": 3.31185720339988e-05,
|
|
"loss": 0.4553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11878319084644318,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3069.0,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 1.7329756500206357,
|
|
"grad_norm": 0.2762448043036437,
|
|
"learning_rate": 3.305323127576242e-05,
|
|
"loss": 0.4679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14085543155670166,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4238.5,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 1.7391663227404046,
|
|
"grad_norm": 0.2938314664317184,
|
|
"learning_rate": 3.2987646899532275e-05,
|
|
"loss": 0.4657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15402351319789886,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3785.9,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 1.7453569954601733,
|
|
"grad_norm": 0.2941872124375445,
|
|
"learning_rate": 3.292182012933742e-05,
|
|
"loss": 0.482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16791443526744843,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3915.9,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 1.7515476681799422,
|
|
"grad_norm": 0.29772295923581144,
|
|
"learning_rate": 3.285575219373079e-05,
|
|
"loss": 0.4537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0908592939376831,
|
|
"step": 1415,
|
|
"valid_targets_mean": 1047.1,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 1.7577383408997111,
|
|
"grad_norm": 0.24750272270915513,
|
|
"learning_rate": 3.27894443257663e-05,
|
|
"loss": 0.3249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12157382071018219,
|
|
"step": 1420,
|
|
"valid_targets_mean": 8119.0,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 1.7639290136194798,
|
|
"grad_norm": 0.23477672281612283,
|
|
"learning_rate": 3.272289776297583e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09342372417449951,
|
|
"step": 1425,
|
|
"valid_targets_mean": 5534.6,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 1.7701196863392488,
|
|
"grad_norm": 0.23444160745808323,
|
|
"learning_rate": 3.26561137473461e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12781041860580444,
|
|
"step": 1430,
|
|
"valid_targets_mean": 7868.5,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 1.7763103590590177,
|
|
"grad_norm": 0.19849713870676394,
|
|
"learning_rate": 3.258909352529555e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0959029346704483,
|
|
"step": 1435,
|
|
"valid_targets_mean": 7399.9,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.7825010317787866,
|
|
"grad_norm": 0.24558090962194007,
|
|
"learning_rate": 3.2521838347651014e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11990231275558472,
|
|
"step": 1440,
|
|
"valid_targets_mean": 6082.5,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 1.7886917044985555,
|
|
"grad_norm": 0.19303735747598508,
|
|
"learning_rate": 3.24543494696244e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08387348055839539,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5861.0,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 1.7948823772183244,
|
|
"grad_norm": 0.20962219125058565,
|
|
"learning_rate": 3.238662815078928e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10890525579452515,
|
|
"step": 1450,
|
|
"valid_targets_mean": 6673.4,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 1.8010730499380934,
|
|
"grad_norm": 0.17446499795930132,
|
|
"learning_rate": 3.231867565505737e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08495984971523285,
|
|
"step": 1455,
|
|
"valid_targets_mean": 6232.8,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.8072637226578623,
|
|
"grad_norm": 0.1726782723243939,
|
|
"learning_rate": 3.2250493250654925e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07411457598209381,
|
|
"step": 1460,
|
|
"valid_targets_mean": 5522.1,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 1.8134543953776312,
|
|
"grad_norm": 0.19496297266717966,
|
|
"learning_rate": 3.2182082210099084e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11083486676216125,
|
|
"step": 1465,
|
|
"valid_targets_mean": 7316.1,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 1.8196450680974,
|
|
"grad_norm": 0.18591801303957783,
|
|
"learning_rate": 3.211344381017411e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11938369274139404,
|
|
"step": 1470,
|
|
"valid_targets_mean": 7420.2,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 1.8258357408171688,
|
|
"grad_norm": 0.26993099872591914,
|
|
"learning_rate": 3.204457933190762e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07108387351036072,
|
|
"step": 1475,
|
|
"valid_targets_mean": 1202.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.8320264135369375,
|
|
"grad_norm": 0.17502123032506656,
|
|
"learning_rate": 3.197549006054656e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07866702973842621,
|
|
"step": 1480,
|
|
"valid_targets_mean": 6558.3,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 1.8382170862567064,
|
|
"grad_norm": 0.18539663802149575,
|
|
"learning_rate": 3.190617728553332e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057975903153419495,
|
|
"step": 1485,
|
|
"valid_targets_mean": 5374.2,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 1.8444077589764754,
|
|
"grad_norm": 0.18447680772689543,
|
|
"learning_rate": 3.183664230048164e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09268680214881897,
|
|
"step": 1490,
|
|
"valid_targets_mean": 6666.2,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 1.8505984316962443,
|
|
"grad_norm": 0.1866438017965833,
|
|
"learning_rate": 3.176688640315245e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08710762113332748,
|
|
"step": 1495,
|
|
"valid_targets_mean": 6206.8,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 1.8567891044160132,
|
|
"grad_norm": 0.21396532019559145,
|
|
"learning_rate": 3.1696910895429644e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10578572750091553,
|
|
"step": 1500,
|
|
"valid_targets_mean": 6207.1,
|
|
"valid_targets_min": 135
|
|
},
|
|
{
|
|
"epoch": 1.8629797771357821,
|
|
"grad_norm": 0.18700818608522762,
|
|
"learning_rate": 3.162671708329583e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10384561866521835,
|
|
"step": 1505,
|
|
"valid_targets_mean": 6138.9,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 1.869170449855551,
|
|
"grad_norm": 0.17245943458908233,
|
|
"learning_rate": 3.1556306276807905e-05,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10552076995372772,
|
|
"step": 1510,
|
|
"valid_targets_mean": 7026.4,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 1.87536112257532,
|
|
"grad_norm": 0.25666006932918556,
|
|
"learning_rate": 3.148567979007261e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.069573312997818,
|
|
"step": 1515,
|
|
"valid_targets_mean": 5165.2,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 1.8815517952950889,
|
|
"grad_norm": 0.2805444931262978,
|
|
"learning_rate": 3.141483894122205e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06534378975629807,
|
|
"step": 1520,
|
|
"valid_targets_mean": 5186.9,
|
|
"valid_targets_min": 214
|
|
},
|
|
{
|
|
"epoch": 1.8877424680148576,
|
|
"grad_norm": 0.22566971903632724,
|
|
"learning_rate": 3.134378505238903e-05,
|
|
"loss": 0.1965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0647401437163353,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4295.1,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 1.8939331407346265,
|
|
"grad_norm": 0.1974883272826852,
|
|
"learning_rate": 3.127251944968245e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0631110668182373,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4718.3,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 1.9001238134543954,
|
|
"grad_norm": 0.1913342659280007,
|
|
"learning_rate": 3.120104346316246e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06381383538246155,
|
|
"step": 1535,
|
|
"valid_targets_mean": 5277.9,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 1.906314486174164,
|
|
"grad_norm": 0.20710456440848443,
|
|
"learning_rate": 3.112935842681575e-05,
|
|
"loss": 0.1905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06552497297525406,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4981.3,
|
|
"valid_targets_min": 216
|
|
},
|
|
{
|
|
"epoch": 1.912505158893933,
|
|
"grad_norm": 0.18802102259544187,
|
|
"learning_rate": 3.1057465678530545e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06527107954025269,
|
|
"step": 1545,
|
|
"valid_targets_mean": 5764.1,
|
|
"valid_targets_min": 195
|
|
},
|
|
{
|
|
"epoch": 1.918695831613702,
|
|
"grad_norm": 0.2132710724712806,
|
|
"learning_rate": 3.0985366560071734e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06656339764595032,
|
|
"step": 1550,
|
|
"valid_targets_mean": 5180.8,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 1.9248865043334709,
|
|
"grad_norm": 0.20756965280220152,
|
|
"learning_rate": 3.0913062417055737e-05,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06850530207157135,
|
|
"step": 1555,
|
|
"valid_targets_mean": 5028.8,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 1.9310771770532398,
|
|
"grad_norm": 0.2091612113560533,
|
|
"learning_rate": 3.084055459892547e-05,
|
|
"loss": 0.183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05863848701119423,
|
|
"step": 1560,
|
|
"valid_targets_mean": 5127.5,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 1.9372678497730087,
|
|
"grad_norm": 0.2174421203969585,
|
|
"learning_rate": 3.07678444589251e-05,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0629086047410965,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5064.0,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 1.9434585224927776,
|
|
"grad_norm": 0.18555120249820206,
|
|
"learning_rate": 3.069493335407481e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06423955410718918,
|
|
"step": 1570,
|
|
"valid_targets_mean": 5678.2,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 1.9496491952125465,
|
|
"grad_norm": 0.1992777382712916,
|
|
"learning_rate": 3.06218226451455e-05,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05571291223168373,
|
|
"step": 1575,
|
|
"valid_targets_mean": 4230.2,
|
|
"valid_targets_min": 188
|
|
},
|
|
{
|
|
"epoch": 1.9558398679323155,
|
|
"grad_norm": 0.2133599614001258,
|
|
"learning_rate": 3.054851369663335e-05,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059494636952877045,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4367.6,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 1.9620305406520842,
|
|
"grad_norm": 0.20506246260396002,
|
|
"learning_rate": 3.0475007876734372e-05,
|
|
"loss": 0.1832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05049007758498192,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4216.6,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 1.968221213371853,
|
|
"grad_norm": 0.20955199628763072,
|
|
"learning_rate": 3.0401306557318863e-05,
|
|
"loss": 0.1828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0777488723397255,
|
|
"step": 1590,
|
|
"valid_targets_mean": 6083.8,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.974411886091622,
|
|
"grad_norm": 0.17502816879410302,
|
|
"learning_rate": 3.0327411113905823e-05,
|
|
"loss": 0.1877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062159813940525055,
|
|
"step": 1595,
|
|
"valid_targets_mean": 5689.1,
|
|
"valid_targets_min": 179
|
|
},
|
|
{
|
|
"epoch": 1.9806025588113907,
|
|
"grad_norm": 0.19627949609308623,
|
|
"learning_rate": 3.025332292563727e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0666460245847702,
|
|
"step": 1600,
|
|
"valid_targets_mean": 6020.2,
|
|
"valid_targets_min": 209
|
|
},
|
|
{
|
|
"epoch": 1.9867932315311596,
|
|
"grad_norm": 0.20599410627107334,
|
|
"learning_rate": 3.0179043375252487e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06357702612876892,
|
|
"step": 1605,
|
|
"valid_targets_mean": 5016.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.9929839042509285,
|
|
"grad_norm": 0.21456245499786558,
|
|
"learning_rate": 3.010457384906225e-05,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0663001611828804,
|
|
"step": 1610,
|
|
"valid_targets_mean": 5500.6,
|
|
"valid_targets_min": 164
|
|
},
|
|
{
|
|
"epoch": 1.9991745769706974,
|
|
"grad_norm": 0.20109170384329172,
|
|
"learning_rate": 3.002991573692292e-05,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05118231475353241,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4179.7,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 2.0049525381758153,
|
|
"grad_norm": 0.9601678125095755,
|
|
"learning_rate": 2.9955070432210517e-05,
|
|
"loss": 0.5473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1709616482257843,
|
|
"step": 1620,
|
|
"valid_targets_mean": 6954.8,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 2.011143210895584,
|
|
"grad_norm": 0.6735910782183346,
|
|
"learning_rate": 2.9880039331794727e-05,
|
|
"loss": 0.4817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1732027530670166,
|
|
"step": 1625,
|
|
"valid_targets_mean": 8220.0,
|
|
"valid_targets_min": 2842
|
|
},
|
|
{
|
|
"epoch": 2.0173338836153527,
|
|
"grad_norm": 0.44381742444664096,
|
|
"learning_rate": 2.9804823836012807e-05,
|
|
"loss": 0.4395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1327214241027832,
|
|
"step": 1630,
|
|
"valid_targets_mean": 7166.5,
|
|
"valid_targets_min": 2501
|
|
},
|
|
{
|
|
"epoch": 2.0235245563351216,
|
|
"grad_norm": 0.3543773582487693,
|
|
"learning_rate": 2.9729425348643466e-05,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13141068816184998,
|
|
"step": 1635,
|
|
"valid_targets_mean": 6987.4,
|
|
"valid_targets_min": 1709
|
|
},
|
|
{
|
|
"epoch": 2.0297152290548905,
|
|
"grad_norm": 0.2785746111741437,
|
|
"learning_rate": 2.9653845276880675e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13437555730342865,
|
|
"step": 1640,
|
|
"valid_targets_mean": 7609.8,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 2.0359059017746595,
|
|
"grad_norm": 0.2568164310805484,
|
|
"learning_rate": 2.9578085031307374e-05,
|
|
"loss": 0.3906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12476755678653717,
|
|
"step": 1645,
|
|
"valid_targets_mean": 7067.3,
|
|
"valid_targets_min": 2449
|
|
},
|
|
{
|
|
"epoch": 2.0420965744944284,
|
|
"grad_norm": 0.24875455176167166,
|
|
"learning_rate": 2.9502146025869175e-05,
|
|
"loss": 0.3826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11679118871688843,
|
|
"step": 1650,
|
|
"valid_targets_mean": 7091.4,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 2.0482872472141973,
|
|
"grad_norm": 0.23062982614264357,
|
|
"learning_rate": 2.942602967784795e-05,
|
|
"loss": 0.3718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13672779500484467,
|
|
"step": 1655,
|
|
"valid_targets_mean": 7742.7,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 2.0544779199339662,
|
|
"grad_norm": 0.22185750418015865,
|
|
"learning_rate": 2.934973740783541e-05,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14152377843856812,
|
|
"step": 1660,
|
|
"valid_targets_mean": 8509.0,
|
|
"valid_targets_min": 3043
|
|
},
|
|
{
|
|
"epoch": 2.060668592653735,
|
|
"grad_norm": 0.22450721483696912,
|
|
"learning_rate": 2.9273270639706547e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12072049081325531,
|
|
"step": 1665,
|
|
"valid_targets_mean": 6931.7,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 2.066859265373504,
|
|
"grad_norm": 0.2044611516321987,
|
|
"learning_rate": 2.9196630800593112e-05,
|
|
"loss": 0.3609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11479555070400238,
|
|
"step": 1670,
|
|
"valid_targets_mean": 9712.0,
|
|
"valid_targets_min": 2685
|
|
},
|
|
{
|
|
"epoch": 2.073049938093273,
|
|
"grad_norm": 0.19450433483389243,
|
|
"learning_rate": 2.9119819320856938e-05,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11819329857826233,
|
|
"step": 1675,
|
|
"valid_targets_mean": 9876.8,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 2.0792406108130415,
|
|
"grad_norm": 0.2147865613386832,
|
|
"learning_rate": 2.9042837634063254e-05,
|
|
"loss": 0.3387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11040879786014557,
|
|
"step": 1680,
|
|
"valid_targets_mean": 9530.9,
|
|
"valid_targets_min": 5556
|
|
},
|
|
{
|
|
"epoch": 2.0854312835328104,
|
|
"grad_norm": 0.20612421834255124,
|
|
"learning_rate": 2.8965687176953952e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11788208037614822,
|
|
"step": 1685,
|
|
"valid_targets_mean": 9594.7,
|
|
"valid_targets_min": 4603
|
|
},
|
|
{
|
|
"epoch": 2.0916219562525793,
|
|
"grad_norm": 0.201738806366367,
|
|
"learning_rate": 2.888836938942074e-05,
|
|
"loss": 0.337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11270958185195923,
|
|
"step": 1690,
|
|
"valid_targets_mean": 9466.0,
|
|
"valid_targets_min": 6389
|
|
},
|
|
{
|
|
"epoch": 2.097812628972348,
|
|
"grad_norm": 0.21503497069664185,
|
|
"learning_rate": 2.8810885714478303e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10999438166618347,
|
|
"step": 1695,
|
|
"valid_targets_mean": 9956.4,
|
|
"valid_targets_min": 4744
|
|
},
|
|
{
|
|
"epoch": 2.104003301692117,
|
|
"grad_norm": 0.2523956802296016,
|
|
"learning_rate": 2.8733237598237337e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11635546386241913,
|
|
"step": 1700,
|
|
"valid_targets_mean": 9960.5,
|
|
"valid_targets_min": 6172
|
|
},
|
|
{
|
|
"epoch": 2.110193974411886,
|
|
"grad_norm": 0.22082769962282334,
|
|
"learning_rate": 2.865542648987758e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11087852716445923,
|
|
"step": 1705,
|
|
"valid_targets_mean": 9848.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 2.116384647131655,
|
|
"grad_norm": 0.20837758551009614,
|
|
"learning_rate": 2.8577453841620763e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10901837050914764,
|
|
"step": 1710,
|
|
"valid_targets_mean": 9407.0,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 2.122575319851424,
|
|
"grad_norm": 0.2215764595160881,
|
|
"learning_rate": 2.8499321108703503e-05,
|
|
"loss": 0.3297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10778579115867615,
|
|
"step": 1715,
|
|
"valid_targets_mean": 9471.5,
|
|
"valid_targets_min": 4297
|
|
},
|
|
{
|
|
"epoch": 2.128765992571193,
|
|
"grad_norm": 0.225010366110783,
|
|
"learning_rate": 2.8421029749350154e-05,
|
|
"loss": 0.3287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10852929949760437,
|
|
"step": 1720,
|
|
"valid_targets_mean": 8577.6,
|
|
"valid_targets_min": 3933
|
|
},
|
|
{
|
|
"epoch": 2.1349566652909617,
|
|
"grad_norm": 0.21115395812737398,
|
|
"learning_rate": 2.8342581224745563e-05,
|
|
"loss": 0.3249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11572936177253723,
|
|
"step": 1725,
|
|
"valid_targets_mean": 9960.7,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 2.1411473380107306,
|
|
"grad_norm": 0.19921825310170338,
|
|
"learning_rate": 2.826397699900784e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10550732910633087,
|
|
"step": 1730,
|
|
"valid_targets_mean": 9948.3,
|
|
"valid_targets_min": 3344
|
|
},
|
|
{
|
|
"epoch": 2.1473380107304996,
|
|
"grad_norm": 0.2094970808803699,
|
|
"learning_rate": 2.8185218539160996e-05,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09928901493549347,
|
|
"step": 1735,
|
|
"valid_targets_mean": 9342.2,
|
|
"valid_targets_min": 3715
|
|
},
|
|
{
|
|
"epoch": 2.1535286834502685,
|
|
"grad_norm": 0.21606869914975577,
|
|
"learning_rate": 2.810630731510758e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11338921636343002,
|
|
"step": 1740,
|
|
"valid_targets_mean": 9508.4,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 2.159719356170037,
|
|
"grad_norm": 0.21773082265463714,
|
|
"learning_rate": 2.8027244799601256e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10537728667259216,
|
|
"step": 1745,
|
|
"valid_targets_mean": 9982.4,
|
|
"valid_targets_min": 3744
|
|
},
|
|
{
|
|
"epoch": 2.165910028889806,
|
|
"grad_norm": 0.22751120361919794,
|
|
"learning_rate": 2.7948032468219293e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10990798473358154,
|
|
"step": 1750,
|
|
"valid_targets_mean": 9832.3,
|
|
"valid_targets_min": 5100
|
|
},
|
|
{
|
|
"epoch": 2.172100701609575,
|
|
"grad_norm": 0.20099878234806784,
|
|
"learning_rate": 2.7868671799335052e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11180532723665237,
|
|
"step": 1755,
|
|
"valid_targets_mean": 10835.8,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 2.1782913743293437,
|
|
"grad_norm": 0.21658337429716357,
|
|
"learning_rate": 2.7789164274090348e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10473163425922394,
|
|
"step": 1760,
|
|
"valid_targets_mean": 10256.2,
|
|
"valid_targets_min": 5379
|
|
},
|
|
{
|
|
"epoch": 2.1844820470491126,
|
|
"grad_norm": 0.1889646169904501,
|
|
"learning_rate": 2.7709511376367882e-05,
|
|
"loss": 0.324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09448504447937012,
|
|
"step": 1765,
|
|
"valid_targets_mean": 9251.9,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 2.1906727197688816,
|
|
"grad_norm": 0.17877778311995035,
|
|
"learning_rate": 2.762971459276346e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11410965770483017,
|
|
"step": 1770,
|
|
"valid_targets_mean": 11007.1,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 2.1968633924886505,
|
|
"grad_norm": 0.20244733872513224,
|
|
"learning_rate": 2.754977541255833e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10839540511369705,
|
|
"step": 1775,
|
|
"valid_targets_mean": 9657.8,
|
|
"valid_targets_min": 2733
|
|
},
|
|
{
|
|
"epoch": 2.2030540652084194,
|
|
"grad_norm": 0.18273722898256076,
|
|
"learning_rate": 2.7469695327691316e-05,
|
|
"loss": 0.3204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12985725700855255,
|
|
"step": 1780,
|
|
"valid_targets_mean": 10753.2,
|
|
"valid_targets_min": 5581
|
|
},
|
|
{
|
|
"epoch": 2.2092447379281883,
|
|
"grad_norm": 0.21748090746598014,
|
|
"learning_rate": 2.7389475832731034e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10689324140548706,
|
|
"step": 1785,
|
|
"valid_targets_mean": 9586.8,
|
|
"valid_targets_min": 3841
|
|
},
|
|
{
|
|
"epoch": 2.2154354106479572,
|
|
"grad_norm": 0.19805089993275524,
|
|
"learning_rate": 2.730911842484794e-05,
|
|
"loss": 0.3176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09780093282461166,
|
|
"step": 1790,
|
|
"valid_targets_mean": 9004.5,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 2.221626083367726,
|
|
"grad_norm": 0.2159025331372916,
|
|
"learning_rate": 2.7228624603786445e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09618871659040451,
|
|
"step": 1795,
|
|
"valid_targets_mean": 9294.9,
|
|
"valid_targets_min": 4130
|
|
},
|
|
{
|
|
"epoch": 2.2278167560874946,
|
|
"grad_norm": 0.19546554937222332,
|
|
"learning_rate": 2.714799587183688e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10691098868846893,
|
|
"step": 1800,
|
|
"valid_targets_mean": 10179.0,
|
|
"valid_targets_min": 4267
|
|
},
|
|
{
|
|
"epoch": 2.2340074288072636,
|
|
"grad_norm": 0.20477056290951823,
|
|
"learning_rate": 2.7067233733807472e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09465555846691132,
|
|
"step": 1805,
|
|
"valid_targets_mean": 8532.7,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 2.2401981015270325,
|
|
"grad_norm": 0.2403686728516108,
|
|
"learning_rate": 2.6986339696996283e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12363459914922714,
|
|
"step": 1810,
|
|
"valid_targets_mean": 6747.5,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 2.2463887742468014,
|
|
"grad_norm": 0.27647464182725556,
|
|
"learning_rate": 2.6905315271163042e-05,
|
|
"loss": 0.3397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10924101620912552,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5423.5,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 2.2525794469665703,
|
|
"grad_norm": 2.4730823582341332,
|
|
"learning_rate": 2.6824161968500982e-05,
|
|
"loss": 0.4392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848860025405884,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3791.8,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 2.2587701196863392,
|
|
"grad_norm": 0.6160247089609974,
|
|
"learning_rate": 2.6742881303608625e-05,
|
|
"loss": 0.6954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20853915810585022,
|
|
"step": 1825,
|
|
"valid_targets_mean": 4159.7,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 2.264960792406108,
|
|
"grad_norm": 0.5363290771699545,
|
|
"learning_rate": 2.666147479346152e-05,
|
|
"loss": 0.6221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1877930611371994,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3654.0,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 2.271151465125877,
|
|
"grad_norm": 0.3763451133882638,
|
|
"learning_rate": 2.657994395738388e-05,
|
|
"loss": 0.6164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17502644658088684,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2945.1,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 2.277342137845646,
|
|
"grad_norm": 0.32349875095528546,
|
|
"learning_rate": 2.6498290317020318e-05,
|
|
"loss": 0.5848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2072514295578003,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4179.1,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 2.283532810565415,
|
|
"grad_norm": 0.27969032627740975,
|
|
"learning_rate": 2.6416515396307354e-05,
|
|
"loss": 0.5515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17987778782844543,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4048.5,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 2.289723483285184,
|
|
"grad_norm": 0.2708616035238242,
|
|
"learning_rate": 2.633462072144504e-05,
|
|
"loss": 0.5454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17672090232372284,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4272.8,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 2.2959141560049527,
|
|
"grad_norm": 0.25916499220597367,
|
|
"learning_rate": 2.625260782086843e-05,
|
|
"loss": 0.5607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673288881778717,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4842.6,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 2.3021048287247217,
|
|
"grad_norm": 0.28502218034369065,
|
|
"learning_rate": 2.6170478225219097e-05,
|
|
"loss": 0.5523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21681833267211914,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4255.8,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 2.30829550144449,
|
|
"grad_norm": 0.25937949622699397,
|
|
"learning_rate": 2.6088233467316523e-05,
|
|
"loss": 0.5479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17168188095092773,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4050.4,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 2.314486174164259,
|
|
"grad_norm": 0.28775702425285576,
|
|
"learning_rate": 2.6005875082129527e-05,
|
|
"loss": 0.5269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16889512538909912,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3251.1,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 2.320676846884028,
|
|
"grad_norm": 0.26676217007174596,
|
|
"learning_rate": 2.592340460674759e-05,
|
|
"loss": 0.5375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18335913121700287,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3961.1,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 2.326867519603797,
|
|
"grad_norm": 0.2526345753364653,
|
|
"learning_rate": 2.5840823580352194e-05,
|
|
"loss": 0.5359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19697967171669006,
|
|
"step": 1880,
|
|
"valid_targets_mean": 5018.0,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 2.333058192323566,
|
|
"grad_norm": 0.26342849490912634,
|
|
"learning_rate": 2.575813354418806e-05,
|
|
"loss": 0.5407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1968528777360916,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4231.1,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 2.3392488650433347,
|
|
"grad_norm": 0.28010674555026877,
|
|
"learning_rate": 2.567533604153444e-05,
|
|
"loss": 0.5365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16634684801101685,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3704.2,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 2.3454395377631037,
|
|
"grad_norm": 0.28665532498406054,
|
|
"learning_rate": 2.5592432617676232e-05,
|
|
"loss": 0.5545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1718532145023346,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3797.5,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 2.3516302104828726,
|
|
"grad_norm": 0.2794734482582125,
|
|
"learning_rate": 2.550942481987523e-05,
|
|
"loss": 0.5503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18096503615379333,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3499.0,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 2.3578208832026415,
|
|
"grad_norm": 0.2442120337527459,
|
|
"learning_rate": 2.542631419734118e-05,
|
|
"loss": 0.5391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19101189076900482,
|
|
"step": 1905,
|
|
"valid_targets_mean": 5092.9,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 2.3640115559224104,
|
|
"grad_norm": 0.27188689202309085,
|
|
"learning_rate": 2.534310230120289e-05,
|
|
"loss": 0.55,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22167204320430756,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4786.8,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 2.370202228642179,
|
|
"grad_norm": 0.2700401716333207,
|
|
"learning_rate": 2.5259790684479286e-05,
|
|
"loss": 0.5487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18425676226615906,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4219.8,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 2.376392901361948,
|
|
"grad_norm": 0.3958937926895449,
|
|
"learning_rate": 2.5176380902050418e-05,
|
|
"loss": 0.4187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06445576250553131,
|
|
"step": 1920,
|
|
"valid_targets_mean": 5702.6,
|
|
"valid_targets_min": 2936
|
|
},
|
|
{
|
|
"epoch": 2.3825835740817167,
|
|
"grad_norm": 0.31439941674234595,
|
|
"learning_rate": 2.5092874510628446e-05,
|
|
"loss": 0.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05536726489663124,
|
|
"step": 1925,
|
|
"valid_targets_mean": 5383.9,
|
|
"valid_targets_min": 3193
|
|
},
|
|
{
|
|
"epoch": 2.3887742468014856,
|
|
"grad_norm": 0.216010903285335,
|
|
"learning_rate": 2.5009273068728593e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05073722451925278,
|
|
"step": 1930,
|
|
"valid_targets_mean": 5404.5,
|
|
"valid_targets_min": 3254
|
|
},
|
|
{
|
|
"epoch": 2.3949649195212546,
|
|
"grad_norm": 0.20988467972967847,
|
|
"learning_rate": 2.4925578136640032e-05,
|
|
"loss": 0.1545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051566556096076965,
|
|
"step": 1935,
|
|
"valid_targets_mean": 5669.6,
|
|
"valid_targets_min": 2339
|
|
},
|
|
{
|
|
"epoch": 2.4011555922410235,
|
|
"grad_norm": 0.18824490976976443,
|
|
"learning_rate": 2.484179127639679e-05,
|
|
"loss": 0.1487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051841527223587036,
|
|
"step": 1940,
|
|
"valid_targets_mean": 5712.7,
|
|
"valid_targets_min": 2963
|
|
},
|
|
{
|
|
"epoch": 2.4073462649607924,
|
|
"grad_norm": 0.18821391118370176,
|
|
"learning_rate": 2.47579140517486e-05,
|
|
"loss": 0.152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046889111399650574,
|
|
"step": 1945,
|
|
"valid_targets_mean": 5044.9,
|
|
"valid_targets_min": 2759
|
|
},
|
|
{
|
|
"epoch": 2.4135369376805613,
|
|
"grad_norm": 0.1870637005286098,
|
|
"learning_rate": 2.4673948028131688e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04546132683753967,
|
|
"step": 1950,
|
|
"valid_targets_mean": 5219.8,
|
|
"valid_targets_min": 2654
|
|
},
|
|
{
|
|
"epoch": 2.4197276104003302,
|
|
"grad_norm": 0.18693550565148853,
|
|
"learning_rate": 2.4589894772639585e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04823080822825432,
|
|
"step": 1955,
|
|
"valid_targets_mean": 5413.7,
|
|
"valid_targets_min": 3427
|
|
},
|
|
{
|
|
"epoch": 2.425918283120099,
|
|
"grad_norm": 0.19250772177370612,
|
|
"learning_rate": 2.4505755853993852e-05,
|
|
"loss": 0.1499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04659372568130493,
|
|
"step": 1960,
|
|
"valid_targets_mean": 5462.3,
|
|
"valid_targets_min": 3458
|
|
},
|
|
{
|
|
"epoch": 2.432108955839868,
|
|
"grad_norm": 0.21168125088921474,
|
|
"learning_rate": 2.442153284251484e-05,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052339181303977966,
|
|
"step": 1965,
|
|
"valid_targets_mean": 5651.2,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 2.438299628559637,
|
|
"grad_norm": 0.17649667734279523,
|
|
"learning_rate": 2.4337227310092333e-05,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04489101469516754,
|
|
"step": 1970,
|
|
"valid_targets_mean": 5431.2,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 2.444490301279406,
|
|
"grad_norm": 0.2067323814552953,
|
|
"learning_rate": 2.4252840830156265e-05,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045895278453826904,
|
|
"step": 1975,
|
|
"valid_targets_mean": 5013.1,
|
|
"valid_targets_min": 2521
|
|
},
|
|
{
|
|
"epoch": 2.4506809739991744,
|
|
"grad_norm": 0.19610429034984053,
|
|
"learning_rate": 2.4168374977647314e-05,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04308094084262848,
|
|
"step": 1980,
|
|
"valid_targets_mean": 5014.3,
|
|
"valid_targets_min": 2703
|
|
},
|
|
{
|
|
"epoch": 2.4568716467189433,
|
|
"grad_norm": 0.2021202275930502,
|
|
"learning_rate": 2.4083831328987525e-05,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04532980918884277,
|
|
"step": 1985,
|
|
"valid_targets_mean": 5006.2,
|
|
"valid_targets_min": 2670
|
|
},
|
|
{
|
|
"epoch": 2.4630623194387122,
|
|
"grad_norm": 0.18997073841115034,
|
|
"learning_rate": 2.3999211462050878e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050782494246959686,
|
|
"step": 1990,
|
|
"valid_targets_mean": 6038.5,
|
|
"valid_targets_min": 3631
|
|
},
|
|
{
|
|
"epoch": 2.469252992158481,
|
|
"grad_norm": 0.25361399295001685,
|
|
"learning_rate": 2.391451695613386e-05,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051717113703489304,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4937.4,
|
|
"valid_targets_min": 2598
|
|
},
|
|
{
|
|
"epoch": 2.47544366487825,
|
|
"grad_norm": 0.1832109315762058,
|
|
"learning_rate": 2.382974939192597e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04698061943054199,
|
|
"step": 2000,
|
|
"valid_targets_mean": 5467.6,
|
|
"valid_targets_min": 2990
|
|
},
|
|
{
|
|
"epoch": 2.481634337598019,
|
|
"grad_norm": 0.18978103653214698,
|
|
"learning_rate": 2.3744910351480222e-05,
|
|
"loss": 0.1461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049003757536411285,
|
|
"step": 2005,
|
|
"valid_targets_mean": 5111.8,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 2.487825010317788,
|
|
"grad_norm": 0.16795161885536908,
|
|
"learning_rate": 2.3660001418183632e-05,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05067973583936691,
|
|
"step": 2010,
|
|
"valid_targets_mean": 5763.9,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 2.494015683037557,
|
|
"grad_norm": 0.18070188811132068,
|
|
"learning_rate": 2.3575024176727634e-05,
|
|
"loss": 0.1435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04466181620955467,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4922.0,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 2.5002063557573257,
|
|
"grad_norm": 0.19459280372167223,
|
|
"learning_rate": 2.348998021307856e-05,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04511520266532898,
|
|
"step": 2020,
|
|
"valid_targets_mean": 5118.9,
|
|
"valid_targets_min": 2923
|
|
},
|
|
{
|
|
"epoch": 2.5063970284770947,
|
|
"grad_norm": 0.3308093957006726,
|
|
"learning_rate": 2.3404871114447976e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09127230942249298,
|
|
"step": 2025,
|
|
"valid_targets_mean": 2572.3,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 2.512587701196863,
|
|
"grad_norm": 0.29446412761453195,
|
|
"learning_rate": 2.33196984692631e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.082707479596138,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2910.9,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 2.518778373916632,
|
|
"grad_norm": 0.3448619160566435,
|
|
"learning_rate": 2.3234463867137157e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09904389828443527,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3065.4,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 2.524969046636401,
|
|
"grad_norm": 0.3191846458284289,
|
|
"learning_rate": 2.3149168898839682e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07122047245502472,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2485.7,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 2.53115971935617,
|
|
"grad_norm": 0.32490419921509084,
|
|
"learning_rate": 2.306381515626686e-05,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0754295140504837,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3224.3,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 2.537350392075939,
|
|
"grad_norm": 0.333268575264297,
|
|
"learning_rate": 2.297840423241181e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08463700860738754,
|
|
"step": 2050,
|
|
"valid_targets_mean": 2620.0,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 2.5435410647957077,
|
|
"grad_norm": 0.33110551321352766,
|
|
"learning_rate": 2.2892937721334844e-05,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0754794031381607,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2364.1,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 2.5497317375154767,
|
|
"grad_norm": 0.3247923627654641,
|
|
"learning_rate": 2.280741721813371e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0882129818201065,
|
|
"step": 2060,
|
|
"valid_targets_mean": 2866.9,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 2.5559224102352456,
|
|
"grad_norm": 0.3275767642045292,
|
|
"learning_rate": 2.2721844318913858e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09351696074008942,
|
|
"step": 2065,
|
|
"valid_targets_mean": 2796.1,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.5621130829550145,
|
|
"grad_norm": 0.29790024922824526,
|
|
"learning_rate": 2.2636220620758605e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07411504536867142,
|
|
"step": 2070,
|
|
"valid_targets_mean": 2757.5,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 2.5683037556747834,
|
|
"grad_norm": 0.29738966199235956,
|
|
"learning_rate": 2.2550547721699368e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07395999133586884,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2478.7,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 2.5744944283945523,
|
|
"grad_norm": 0.3156251386777051,
|
|
"learning_rate": 2.246482722068581e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08775871247053146,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3050.7,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 2.5806851011143213,
|
|
"grad_norm": 0.33687949615120394,
|
|
"learning_rate": 2.237906071755601e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09910603612661362,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3265.0,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 2.58687577383409,
|
|
"grad_norm": 0.31154363121832845,
|
|
"learning_rate": 2.2293249813006606e-05,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09251091629266739,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3089.6,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 2.593066446553859,
|
|
"grad_norm": 0.356924406194488,
|
|
"learning_rate": 2.220739610856292e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08979831635951996,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3113.7,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 2.5992571192736276,
|
|
"grad_norm": 0.33011383284022083,
|
|
"learning_rate": 2.212150120654907e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0826551765203476,
|
|
"step": 2100,
|
|
"valid_targets_mean": 2633.3,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 2.6054477919933965,
|
|
"grad_norm": 0.3170383608088678,
|
|
"learning_rate": 2.2035566710058053e-05,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07782086730003357,
|
|
"step": 2105,
|
|
"valid_targets_mean": 2690.3,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 2.6116384647131654,
|
|
"grad_norm": 0.3791952540358925,
|
|
"learning_rate": 2.194959422292184e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08845369517803192,
|
|
"step": 2110,
|
|
"valid_targets_mean": 2393.4,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 2.6178291374329343,
|
|
"grad_norm": 0.323355564553435,
|
|
"learning_rate": 2.1863585349681436e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08920931071043015,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3139.9,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 2.6240198101527032,
|
|
"grad_norm": 0.5522208742747446,
|
|
"learning_rate": 2.1777541695556936e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18075402081012726,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3908.5,
|
|
"valid_targets_min": 1510
|
|
},
|
|
{
|
|
"epoch": 2.630210482872472,
|
|
"grad_norm": 0.3043530931084567,
|
|
"learning_rate": 2.169146486641756e-05,
|
|
"loss": 0.4567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1775311678647995,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4539.9,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 2.636401155592241,
|
|
"grad_norm": 0.2204006134816767,
|
|
"learning_rate": 2.1605356468751704e-05,
|
|
"loss": 0.4182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13055014610290527,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4636.7,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 2.64259182831201,
|
|
"grad_norm": 0.2845811541305584,
|
|
"learning_rate": 2.1519218109636917e-05,
|
|
"loss": 0.4609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14510205388069153,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3688.9,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 2.648782501031779,
|
|
"grad_norm": 0.27228237277428324,
|
|
"learning_rate": 2.1433051396709953e-05,
|
|
"loss": 0.436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1411634385585785,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3569.2,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 2.6549731737515474,
|
|
"grad_norm": 0.27510849900057355,
|
|
"learning_rate": 2.134685793813673e-05,
|
|
"loss": 0.4578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1718500256538391,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3664.9,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 2.6611638464713163,
|
|
"grad_norm": 0.2588827982880247,
|
|
"learning_rate": 2.1260639342582338e-05,
|
|
"loss": 0.4249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.137798473238945,
|
|
"step": 2150,
|
|
"valid_targets_mean": 4255.6,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 2.6673545191910852,
|
|
"grad_norm": 0.2616380399095123,
|
|
"learning_rate": 2.117439721918101e-05,
|
|
"loss": 0.4179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13038744032382965,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3859.6,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 2.673545191910854,
|
|
"grad_norm": 0.2767689483525373,
|
|
"learning_rate": 2.1088133177506087e-05,
|
|
"loss": 0.4449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1569627821445465,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4090.2,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 2.679735864630623,
|
|
"grad_norm": 0.27744731947840506,
|
|
"learning_rate": 2.1001848827539975e-05,
|
|
"loss": 0.4355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15183711051940918,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3309.9,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 2.685926537350392,
|
|
"grad_norm": 0.25206789817075675,
|
|
"learning_rate": 2.091554577964412e-05,
|
|
"loss": 0.4111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10424351692199707,
|
|
"step": 2170,
|
|
"valid_targets_mean": 2271.8,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 2.692117210070161,
|
|
"grad_norm": 0.24895867891508436,
|
|
"learning_rate": 2.0829225644528913e-05,
|
|
"loss": 0.4372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13071134686470032,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4197.5,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 2.69830788278993,
|
|
"grad_norm": 0.24873546176559738,
|
|
"learning_rate": 2.0742890033223658e-05,
|
|
"loss": 0.4174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16955408453941345,
|
|
"step": 2180,
|
|
"valid_targets_mean": 5003.2,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 2.7044985555096988,
|
|
"grad_norm": 0.2689264710555221,
|
|
"learning_rate": 2.0656540557046512e-05,
|
|
"loss": 0.4393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15500536561012268,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4006.6,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 2.7106892282294677,
|
|
"grad_norm": 0.26859816896397914,
|
|
"learning_rate": 2.057017882757438e-05,
|
|
"loss": 0.4287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1307629495859146,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3161.7,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 2.7168799009492366,
|
|
"grad_norm": 0.25468612807364926,
|
|
"learning_rate": 2.048380645661286e-05,
|
|
"loss": 0.4327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15774625539779663,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4580.7,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 2.7230705736690055,
|
|
"grad_norm": 0.2914963806394626,
|
|
"learning_rate": 2.0397425056166162e-05,
|
|
"loss": 0.4425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15585854649543762,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3757.0,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 2.7292612463887744,
|
|
"grad_norm": 0.28215526202486446,
|
|
"learning_rate": 2.0311036238407023e-05,
|
|
"loss": 0.4352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12329836934804916,
|
|
"step": 2205,
|
|
"valid_targets_mean": 2430.5,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 2.7354519191085434,
|
|
"grad_norm": 0.32178930719330096,
|
|
"learning_rate": 2.0224641615646584e-05,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09574954211711884,
|
|
"step": 2210,
|
|
"valid_targets_mean": 981.8,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 2.7416425918283123,
|
|
"grad_norm": 0.25500095885593865,
|
|
"learning_rate": 2.0138242800304363e-05,
|
|
"loss": 0.4402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15458162128925323,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4691.9,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 2.7478332645480807,
|
|
"grad_norm": 0.2420878999190932,
|
|
"learning_rate": 2.0051841404878102e-05,
|
|
"loss": 0.4381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14765343070030212,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4924.3,
|
|
"valid_targets_min": 1620
|
|
},
|
|
{
|
|
"epoch": 2.7540239372678497,
|
|
"grad_norm": 0.3391001467947181,
|
|
"learning_rate": 1.996543904191371e-05,
|
|
"loss": 0.3821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11113733053207397,
|
|
"step": 2225,
|
|
"valid_targets_mean": 6950.2,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 2.7602146099876186,
|
|
"grad_norm": 0.21871175962320583,
|
|
"learning_rate": 1.9879037323975135e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09046187251806259,
|
|
"step": 2230,
|
|
"valid_targets_mean": 6872.7,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 2.7664052827073875,
|
|
"grad_norm": 0.21092917986423992,
|
|
"learning_rate": 1.979263786361431e-05,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04919278249144554,
|
|
"step": 2235,
|
|
"valid_targets_mean": 1881.6,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 2.7725959554271564,
|
|
"grad_norm": 0.1882502263758761,
|
|
"learning_rate": 1.9706242273341025e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08854604512453079,
|
|
"step": 2240,
|
|
"valid_targets_mean": 5967.8,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 2.7787866281469253,
|
|
"grad_norm": 0.18755567250877106,
|
|
"learning_rate": 1.961985216559283e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09173551946878433,
|
|
"step": 2245,
|
|
"valid_targets_mean": 7220.3,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 2.7849773008666943,
|
|
"grad_norm": 0.3487526326000516,
|
|
"learning_rate": 1.9533469152704957e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07703309506177902,
|
|
"step": 2250,
|
|
"valid_targets_mean": 6546.8,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 2.791167973586463,
|
|
"grad_norm": 0.18937263174116817,
|
|
"learning_rate": 1.9447094846880243e-05,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0715169608592987,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5979.1,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 2.797358646306232,
|
|
"grad_norm": 0.16532781323960613,
|
|
"learning_rate": 1.9360730860159e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09339802712202072,
|
|
"step": 2260,
|
|
"valid_targets_mean": 6519.6,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 2.8035493190260006,
|
|
"grad_norm": 0.17580556768702843,
|
|
"learning_rate": 1.9274378804388954e-05,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10122714191675186,
|
|
"step": 2265,
|
|
"valid_targets_mean": 6011.3,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 2.8097399917457695,
|
|
"grad_norm": 0.18296152686489844,
|
|
"learning_rate": 1.9188040291195154e-05,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1199965700507164,
|
|
"step": 2270,
|
|
"valid_targets_mean": 6978.6,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 2.8159306644655384,
|
|
"grad_norm": 0.16563423969655394,
|
|
"learning_rate": 1.9101716931949915e-05,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07235737890005112,
|
|
"step": 2275,
|
|
"valid_targets_mean": 7419.2,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 2.8221213371853073,
|
|
"grad_norm": 0.18743630158247912,
|
|
"learning_rate": 1.9015410337742717e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07758144289255142,
|
|
"step": 2280,
|
|
"valid_targets_mean": 6317.3,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 2.8283120099050763,
|
|
"grad_norm": 0.1946918294010466,
|
|
"learning_rate": 1.8929122119350146e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10401487350463867,
|
|
"step": 2285,
|
|
"valid_targets_mean": 6492.8,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 2.834502682624845,
|
|
"grad_norm": 0.1779802957837411,
|
|
"learning_rate": 1.8842853887205833e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08227240294218063,
|
|
"step": 2290,
|
|
"valid_targets_mean": 6018.3,
|
|
"valid_targets_min": 176
|
|
},
|
|
{
|
|
"epoch": 2.840693355344614,
|
|
"grad_norm": 0.1990639802685102,
|
|
"learning_rate": 1.875660725137039e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06621192395687103,
|
|
"step": 2295,
|
|
"valid_targets_mean": 2794.3,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 2.846884028064383,
|
|
"grad_norm": 0.20909452497550943,
|
|
"learning_rate": 1.867038382150139e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07610773295164108,
|
|
"step": 2300,
|
|
"valid_targets_mean": 5186.0,
|
|
"valid_targets_min": 195
|
|
},
|
|
{
|
|
"epoch": 2.853074700784152,
|
|
"grad_norm": 0.20376157191278518,
|
|
"learning_rate": 1.8584185206823284e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07136265933513641,
|
|
"step": 2305,
|
|
"valid_targets_mean": 5268.3,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 2.859265373503921,
|
|
"grad_norm": 0.1866474625417802,
|
|
"learning_rate": 1.849801301609739e-05,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09811045229434967,
|
|
"step": 2310,
|
|
"valid_targets_mean": 7726.2,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 2.8654560462236898,
|
|
"grad_norm": 0.1835043443557471,
|
|
"learning_rate": 1.8411868857591858e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08636979758739471,
|
|
"step": 2315,
|
|
"valid_targets_mean": 6938.9,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 2.8716467189434587,
|
|
"grad_norm": 0.38028533010905563,
|
|
"learning_rate": 1.8325754339051684e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07348066568374634,
|
|
"step": 2320,
|
|
"valid_targets_mean": 4534.9,
|
|
"valid_targets_min": 218
|
|
},
|
|
{
|
|
"epoch": 2.8778373916632276,
|
|
"grad_norm": 0.22137318069036283,
|
|
"learning_rate": 1.8239671067668664e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06525801122188568,
|
|
"step": 2325,
|
|
"valid_targets_mean": 6093.9,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 2.8840280643829965,
|
|
"grad_norm": 0.21784067806511861,
|
|
"learning_rate": 1.8153620650051403e-05,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06337665021419525,
|
|
"step": 2330,
|
|
"valid_targets_mean": 5790.1,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 2.890218737102765,
|
|
"grad_norm": 0.217920395964421,
|
|
"learning_rate": 1.8067604692195366e-05,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05745193362236023,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5209.1,
|
|
"valid_targets_min": 213
|
|
},
|
|
{
|
|
"epoch": 2.896409409822534,
|
|
"grad_norm": 0.2077406979550345,
|
|
"learning_rate": 1.798162479945287e-05,
|
|
"loss": 0.171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051630161702632904,
|
|
"step": 2340,
|
|
"valid_targets_mean": 4721.6,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 2.902600082542303,
|
|
"grad_norm": 0.18627948554337911,
|
|
"learning_rate": 1.7895682576503136e-05,
|
|
"loss": 0.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052973873913288116,
|
|
"step": 2345,
|
|
"valid_targets_mean": 5201.5,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 2.9087907552620718,
|
|
"grad_norm": 0.1952875026298226,
|
|
"learning_rate": 1.7809779627322338e-05,
|
|
"loss": 0.1688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0581645742058754,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5476.3,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 2.9149814279818407,
|
|
"grad_norm": 0.19336947004243718,
|
|
"learning_rate": 1.7723917555153652e-05,
|
|
"loss": 0.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05744573473930359,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4997.8,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 2.9211721007016096,
|
|
"grad_norm": 0.18853580499460681,
|
|
"learning_rate": 1.7638097962477377e-05,
|
|
"loss": 0.1634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05075407028198242,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4857.9,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 2.9273627734213785,
|
|
"grad_norm": 0.19231085109158563,
|
|
"learning_rate": 1.7552322450980976e-05,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05073636770248413,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4449.8,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 2.9335534461411474,
|
|
"grad_norm": 0.20854600555940264,
|
|
"learning_rate": 1.746659262152922e-05,
|
|
"loss": 0.1631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05210212618112564,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4912.2,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 2.9397441188609164,
|
|
"grad_norm": 0.21599886052692355,
|
|
"learning_rate": 1.7380910074134284e-05,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05672221630811691,
|
|
"step": 2375,
|
|
"valid_targets_mean": 5112.4,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 2.945934791580685,
|
|
"grad_norm": 0.21213399990645138,
|
|
"learning_rate": 1.729527640792591e-05,
|
|
"loss": 0.1634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04278998821973801,
|
|
"step": 2380,
|
|
"valid_targets_mean": 4016.1,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 2.9521254643004537,
|
|
"grad_norm": 0.1865072085772809,
|
|
"learning_rate": 1.7209693221121542e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05916375294327736,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5291.6,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 2.9583161370202227,
|
|
"grad_norm": 0.20960434802046582,
|
|
"learning_rate": 1.7124162110996513e-05,
|
|
"loss": 0.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0606967955827713,
|
|
"step": 2390,
|
|
"valid_targets_mean": 5184.2,
|
|
"valid_targets_min": 186
|
|
},
|
|
{
|
|
"epoch": 2.9645068097399916,
|
|
"grad_norm": 0.20033365133548395,
|
|
"learning_rate": 1.703868467385421e-05,
|
|
"loss": 0.1593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05832201987504959,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5581.1,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 2.9706974824597605,
|
|
"grad_norm": 0.19755171126683943,
|
|
"learning_rate": 1.6953262504996325e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05633174255490303,
|
|
"step": 2400,
|
|
"valid_targets_mean": 5395.4,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 2.9768881551795294,
|
|
"grad_norm": 0.17529225627974818,
|
|
"learning_rate": 1.686789719869303e-05,
|
|
"loss": 0.1644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04811451956629753,
|
|
"step": 2405,
|
|
"valid_targets_mean": 5007.3,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 2.9830788278992983,
|
|
"grad_norm": 0.1885784429302209,
|
|
"learning_rate": 1.678259034815327e-05,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.063509501516819,
|
|
"step": 2410,
|
|
"valid_targets_mean": 6297.9,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 2.9892695006190673,
|
|
"grad_norm": 0.2017345087506049,
|
|
"learning_rate": 1.6697343545494976e-05,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040830567479133606,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4157.3,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 2.995460173338836,
|
|
"grad_norm": 0.19417563845034627,
|
|
"learning_rate": 1.6612158381715393e-05,
|
|
"loss": 0.1597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04415581375360489,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4138.9,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 3.0012381345439536,
|
|
"grad_norm": 2.6377626669041287,
|
|
"learning_rate": 1.6527036446661396e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22807632386684418,
|
|
"step": 2425,
|
|
"valid_targets_mean": 7264.4,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 3.0074288072637225,
|
|
"grad_norm": 0.9088793325737297,
|
|
"learning_rate": 1.644197932899976e-05,
|
|
"loss": 0.5568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2018498331308365,
|
|
"step": 2430,
|
|
"valid_targets_mean": 9100.2,
|
|
"valid_targets_min": 3178
|
|
},
|
|
{
|
|
"epoch": 3.0136194799834914,
|
|
"grad_norm": 0.7001538901133555,
|
|
"learning_rate": 1.635698861618758e-05,
|
|
"loss": 0.4545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13684338331222534,
|
|
"step": 2435,
|
|
"valid_targets_mean": 6959.6,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 3.0198101527032604,
|
|
"grad_norm": 0.5085791791433556,
|
|
"learning_rate": 1.6272065894442564e-05,
|
|
"loss": 0.4119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13675005733966827,
|
|
"step": 2440,
|
|
"valid_targets_mean": 7188.0,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 3.0260008254230293,
|
|
"grad_norm": 0.34170869054641617,
|
|
"learning_rate": 1.618721274871352e-05,
|
|
"loss": 0.3778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386517435312271,
|
|
"step": 2445,
|
|
"valid_targets_mean": 6959.9,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 3.032191498142798,
|
|
"grad_norm": 0.2513172185887825,
|
|
"learning_rate": 1.6102430762650697e-05,
|
|
"loss": 0.3687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13630551099777222,
|
|
"step": 2450,
|
|
"valid_targets_mean": 7818.4,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 3.038382170862567,
|
|
"grad_norm": 0.23816353711979832,
|
|
"learning_rate": 1.6017721518576296e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12291576713323593,
|
|
"step": 2455,
|
|
"valid_targets_mean": 7926.4,
|
|
"valid_targets_min": 3527
|
|
},
|
|
{
|
|
"epoch": 3.044572843582336,
|
|
"grad_norm": 0.2216826406524909,
|
|
"learning_rate": 1.5933086597454852e-05,
|
|
"loss": 0.3549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10611715167760849,
|
|
"step": 2460,
|
|
"valid_targets_mean": 6605.4,
|
|
"valid_targets_min": 2848
|
|
},
|
|
{
|
|
"epoch": 3.050763516302105,
|
|
"grad_norm": 0.22297507015606374,
|
|
"learning_rate": 1.5848527578863827e-05,
|
|
"loss": 0.346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.114805668592453,
|
|
"step": 2465,
|
|
"valid_targets_mean": 6947.1,
|
|
"valid_targets_min": 2244
|
|
},
|
|
{
|
|
"epoch": 3.056954189021874,
|
|
"grad_norm": 0.23048283250162235,
|
|
"learning_rate": 1.5764046040964054e-05,
|
|
"loss": 0.3492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11917024850845337,
|
|
"step": 2470,
|
|
"valid_targets_mean": 7027.9,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 3.063144861741643,
|
|
"grad_norm": 0.20749698499494104,
|
|
"learning_rate": 1.5679643560470336e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13566739857196808,
|
|
"step": 2475,
|
|
"valid_targets_mean": 8959.9,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 3.0693355344614113,
|
|
"grad_norm": 0.20123388004730128,
|
|
"learning_rate": 1.5595321712621955e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10353903472423553,
|
|
"step": 2480,
|
|
"valid_targets_mean": 9482.4,
|
|
"valid_targets_min": 5095
|
|
},
|
|
{
|
|
"epoch": 3.07552620718118,
|
|
"grad_norm": 0.18654349174765578,
|
|
"learning_rate": 1.5511082071153355e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0999169573187828,
|
|
"step": 2485,
|
|
"valid_targets_mean": 8913.7,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.081716879900949,
|
|
"grad_norm": 0.18609250637392374,
|
|
"learning_rate": 1.542692620826469e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10604382306337357,
|
|
"step": 2490,
|
|
"valid_targets_mean": 8780.3,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 3.087907552620718,
|
|
"grad_norm": 0.18173544980191644,
|
|
"learning_rate": 1.534285569459255e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10367858409881592,
|
|
"step": 2495,
|
|
"valid_targets_mean": 9303.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 3.094098225340487,
|
|
"grad_norm": 0.1778496869215686,
|
|
"learning_rate": 1.5258872099180603e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10192026942968369,
|
|
"step": 2500,
|
|
"valid_targets_mean": 9819.0,
|
|
"valid_targets_min": 5584
|
|
},
|
|
{
|
|
"epoch": 3.100288898060256,
|
|
"grad_norm": 0.17705617038502525,
|
|
"learning_rate": 1.5174976989450315e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10536333918571472,
|
|
"step": 2505,
|
|
"valid_targets_mean": 9100.7,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 3.106479570780025,
|
|
"grad_norm": 0.23752473467223828,
|
|
"learning_rate": 1.509117193117172e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10404828190803528,
|
|
"step": 2510,
|
|
"valid_targets_mean": 9212.3,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 3.1126702434997937,
|
|
"grad_norm": 0.19183470090707747,
|
|
"learning_rate": 1.5007458488434173e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0919274389743805,
|
|
"step": 2515,
|
|
"valid_targets_mean": 8648.9,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 3.1188609162195626,
|
|
"grad_norm": 0.21168628117032956,
|
|
"learning_rate": 1.4923838223617191e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11724919080734253,
|
|
"step": 2520,
|
|
"valid_targets_mean": 10183.4,
|
|
"valid_targets_min": 3886
|
|
},
|
|
{
|
|
"epoch": 3.1250515889393315,
|
|
"grad_norm": 0.18177367772673336,
|
|
"learning_rate": 1.4840312697361242e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09794522821903229,
|
|
"step": 2525,
|
|
"valid_targets_mean": 9594.6,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 3.1312422616591005,
|
|
"grad_norm": 0.1830159726800109,
|
|
"learning_rate": 1.4756883468538665e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09517817199230194,
|
|
"step": 2530,
|
|
"valid_targets_mean": 8928.9,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 3.137432934378869,
|
|
"grad_norm": 0.20416503139043451,
|
|
"learning_rate": 1.4673552094224553e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10387183725833893,
|
|
"step": 2535,
|
|
"valid_targets_mean": 9772.7,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 3.143623607098638,
|
|
"grad_norm": 0.19466066756188768,
|
|
"learning_rate": 1.4590320129667715e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10351294279098511,
|
|
"step": 2540,
|
|
"valid_targets_mean": 10406.6,
|
|
"valid_targets_min": 5877
|
|
},
|
|
{
|
|
"epoch": 3.149814279818407,
|
|
"grad_norm": 0.18162984431523294,
|
|
"learning_rate": 1.4507189128261609e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10327140241861343,
|
|
"step": 2545,
|
|
"valid_targets_mean": 9535.4,
|
|
"valid_targets_min": 3557
|
|
},
|
|
{
|
|
"epoch": 3.1560049525381757,
|
|
"grad_norm": 0.18902572978656057,
|
|
"learning_rate": 1.442416064151539e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09635287523269653,
|
|
"step": 2550,
|
|
"valid_targets_mean": 9136.9,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 3.1621956252579446,
|
|
"grad_norm": 0.18560262747864736,
|
|
"learning_rate": 1.434123621902493e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08781301230192184,
|
|
"step": 2555,
|
|
"valid_targets_mean": 9388.7,
|
|
"valid_targets_min": 3282
|
|
},
|
|
{
|
|
"epoch": 3.1683862979777135,
|
|
"grad_norm": 0.18365513682140477,
|
|
"learning_rate": 1.4258417408443928e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09914126992225647,
|
|
"step": 2560,
|
|
"valid_targets_mean": 9729.2,
|
|
"valid_targets_min": 3231
|
|
},
|
|
{
|
|
"epoch": 3.1745769706974825,
|
|
"grad_norm": 0.1846516488003656,
|
|
"learning_rate": 1.4175705755454963e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11342475563287735,
|
|
"step": 2565,
|
|
"valid_targets_mean": 10332.3,
|
|
"valid_targets_min": 4344
|
|
},
|
|
{
|
|
"epoch": 3.1807676434172514,
|
|
"grad_norm": 0.17548218700822557,
|
|
"learning_rate": 1.4093102803740722e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10971412062644958,
|
|
"step": 2570,
|
|
"valid_targets_mean": 11408.0,
|
|
"valid_targets_min": 5726
|
|
},
|
|
{
|
|
"epoch": 3.1869583161370203,
|
|
"grad_norm": 0.1858509878232249,
|
|
"learning_rate": 1.4010610094955133e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10787683725357056,
|
|
"step": 2575,
|
|
"valid_targets_mean": 10250.5,
|
|
"valid_targets_min": 2607
|
|
},
|
|
{
|
|
"epoch": 3.193148988856789,
|
|
"grad_norm": 0.20281660735024015,
|
|
"learning_rate": 1.3928229168694632e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09862907975912094,
|
|
"step": 2580,
|
|
"valid_targets_mean": 9131.8,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 3.199339661576558,
|
|
"grad_norm": 0.1995157472300076,
|
|
"learning_rate": 1.3845961562469397e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10315236449241638,
|
|
"step": 2585,
|
|
"valid_targets_mean": 10329.2,
|
|
"valid_targets_min": 5291
|
|
},
|
|
{
|
|
"epoch": 3.205530334296327,
|
|
"grad_norm": 0.19410071372321638,
|
|
"learning_rate": 1.3763808811674665e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0971391499042511,
|
|
"step": 2590,
|
|
"valid_targets_mean": 8999.2,
|
|
"valid_targets_min": 2381
|
|
},
|
|
{
|
|
"epoch": 3.211721007016096,
|
|
"grad_norm": 0.19760141134508002,
|
|
"learning_rate": 1.3681772449562078e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10752210766077042,
|
|
"step": 2595,
|
|
"valid_targets_mean": 9227.1,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 3.2179116797358644,
|
|
"grad_norm": 0.1924470029833156,
|
|
"learning_rate": 1.359985400721108e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09535451233386993,
|
|
"step": 2600,
|
|
"valid_targets_mean": 9084.0,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 3.2241023524556334,
|
|
"grad_norm": 0.19253603479729658,
|
|
"learning_rate": 1.3518055013500318e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08641564846038818,
|
|
"step": 2605,
|
|
"valid_targets_mean": 9274.7,
|
|
"valid_targets_min": 4259
|
|
},
|
|
{
|
|
"epoch": 3.2302930251754023,
|
|
"grad_norm": 0.18911788339962052,
|
|
"learning_rate": 1.3436376995079107e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09178595244884491,
|
|
"step": 2610,
|
|
"valid_targets_mean": 8931.9,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 3.236483697895171,
|
|
"grad_norm": 0.1955734583861401,
|
|
"learning_rate": 1.335482147633897e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12398092448711395,
|
|
"step": 2615,
|
|
"valid_targets_mean": 11522.8,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 3.24267437061494,
|
|
"grad_norm": 0.26232586746959535,
|
|
"learning_rate": 1.3273389979385151e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10446576774120331,
|
|
"step": 2620,
|
|
"valid_targets_mean": 6154.2,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 3.248865043334709,
|
|
"grad_norm": 0.2565350551540809,
|
|
"learning_rate": 1.319208402400824e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10455872118473053,
|
|
"step": 2625,
|
|
"valid_targets_mean": 6070.9,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 3.255055716054478,
|
|
"grad_norm": 1.808209735209328,
|
|
"learning_rate": 1.3110905127655768e-05,
|
|
"loss": 0.5708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20408064126968384,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3383.0,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 3.261246388774247,
|
|
"grad_norm": 0.6703444088435634,
|
|
"learning_rate": 1.302985480540393e-05,
|
|
"loss": 0.6666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1613503396511078,
|
|
"step": 2635,
|
|
"valid_targets_mean": 2937.2,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 3.267437061494016,
|
|
"grad_norm": 0.650212108697498,
|
|
"learning_rate": 1.2948934569929268e-05,
|
|
"loss": 0.5865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767566740512848,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4071.6,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 3.2736277342137847,
|
|
"grad_norm": 0.3586790872518766,
|
|
"learning_rate": 1.2868145931480485e-05,
|
|
"loss": 0.5662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18741057813167572,
|
|
"step": 2645,
|
|
"valid_targets_mean": 5239.0,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 3.279818406933553,
|
|
"grad_norm": 0.3145694807156944,
|
|
"learning_rate": 1.2787490397850215e-05,
|
|
"loss": 0.547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16440746188163757,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4039.2,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 3.286009079653322,
|
|
"grad_norm": 0.2787311189664209,
|
|
"learning_rate": 1.27069694743469e-05,
|
|
"loss": 0.518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1486622393131256,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3473.9,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 3.292199752373091,
|
|
"grad_norm": 0.2823033827256175,
|
|
"learning_rate": 1.2626584663766698e-05,
|
|
"loss": 0.5258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15642622113227844,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3420.7,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 3.29839042509286,
|
|
"grad_norm": 0.3028555432762388,
|
|
"learning_rate": 1.2546337466365443e-05,
|
|
"loss": 0.5084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22593529522418976,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3772.6,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 3.304581097812629,
|
|
"grad_norm": 0.2457502370986697,
|
|
"learning_rate": 1.2466229379830633e-05,
|
|
"loss": 0.5043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1613931953907013,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4168.5,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 3.310771770532398,
|
|
"grad_norm": 0.25133441617858354,
|
|
"learning_rate": 1.2386261899253462e-05,
|
|
"loss": 0.5054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.117974191904068,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3252.4,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 3.3169624432521667,
|
|
"grad_norm": 0.2611571302850951,
|
|
"learning_rate": 1.2306436517100954e-05,
|
|
"loss": 0.496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1949538290500641,
|
|
"step": 2680,
|
|
"valid_targets_mean": 4318.8,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 3.3231531159719356,
|
|
"grad_norm": 0.2555231967105449,
|
|
"learning_rate": 1.2226754723188096e-05,
|
|
"loss": 0.5102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1484585404396057,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3735.0,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 3.3293437886917046,
|
|
"grad_norm": 0.26373422284839143,
|
|
"learning_rate": 1.2147218004650015e-05,
|
|
"loss": 0.485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2103593349456787,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4098.8,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 3.3355344614114735,
|
|
"grad_norm": 0.2579651388913525,
|
|
"learning_rate": 1.2067827845914224e-05,
|
|
"loss": 0.4967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15302711725234985,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4343.0,
|
|
"valid_targets_min": 1454
|
|
},
|
|
{
|
|
"epoch": 3.3417251341312424,
|
|
"grad_norm": 0.2620779485829393,
|
|
"learning_rate": 1.1988585728672945e-05,
|
|
"loss": 0.5102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15016847848892212,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3758.9,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 3.3479158068510113,
|
|
"grad_norm": 0.2646836047519502,
|
|
"learning_rate": 1.1909493131855443e-05,
|
|
"loss": 0.5152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14472386240959167,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3194.6,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 3.3541064795707802,
|
|
"grad_norm": 0.2616841800521915,
|
|
"learning_rate": 1.1830551531600408e-05,
|
|
"loss": 0.5172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15361610054969788,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3790.3,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 3.360297152290549,
|
|
"grad_norm": 0.26660617558488164,
|
|
"learning_rate": 1.1751762401228415e-05,
|
|
"loss": 0.5019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2095314860343933,
|
|
"step": 2715,
|
|
"valid_targets_mean": 4685.3,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 3.3664878250103176,
|
|
"grad_norm": 0.23512777404115162,
|
|
"learning_rate": 1.1673127211214434e-05,
|
|
"loss": 0.5033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134054496884346,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3750.0,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 3.3726784977300865,
|
|
"grad_norm": 0.2606228675821491,
|
|
"learning_rate": 1.1594647429160384e-05,
|
|
"loss": 0.5086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16086050868034363,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4435.8,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 3.3788691704498555,
|
|
"grad_norm": 0.2516547250956556,
|
|
"learning_rate": 1.1516324519767734e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05032425746321678,
|
|
"step": 2730,
|
|
"valid_targets_mean": 5312.0,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 3.3850598431696244,
|
|
"grad_norm": 0.24753556080329858,
|
|
"learning_rate": 1.1438159944810185e-05,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05226186662912369,
|
|
"step": 2735,
|
|
"valid_targets_mean": 5549.4,
|
|
"valid_targets_min": 3000
|
|
},
|
|
{
|
|
"epoch": 3.3912505158893933,
|
|
"grad_norm": 0.196490288782892,
|
|
"learning_rate": 1.1360155163106357e-05,
|
|
"loss": 0.1385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04352680966258049,
|
|
"step": 2740,
|
|
"valid_targets_mean": 5357.1,
|
|
"valid_targets_min": 2787
|
|
},
|
|
{
|
|
"epoch": 3.397441188609162,
|
|
"grad_norm": 0.17888882986498272,
|
|
"learning_rate": 1.128231163049258e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05421479046344757,
|
|
"step": 2745,
|
|
"valid_targets_mean": 5853.2,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 3.403631861328931,
|
|
"grad_norm": 0.19104839802959508,
|
|
"learning_rate": 1.1204630799795737e-05,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0430365726351738,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5323.5,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 3.4098225340487,
|
|
"grad_norm": 0.18291678810699047,
|
|
"learning_rate": 1.1127114120806142e-05,
|
|
"loss": 0.1318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04416714608669281,
|
|
"step": 2755,
|
|
"valid_targets_mean": 5320.9,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 3.416013206768469,
|
|
"grad_norm": 0.18296051940850683,
|
|
"learning_rate": 1.1049763040250458e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04050867259502411,
|
|
"step": 2760,
|
|
"valid_targets_mean": 5294.3,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 3.422203879488238,
|
|
"grad_norm": 0.17256567764742534,
|
|
"learning_rate": 1.097257900176471e-05,
|
|
"loss": 0.1334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0461638867855072,
|
|
"step": 2765,
|
|
"valid_targets_mean": 5738.7,
|
|
"valid_targets_min": 3508
|
|
},
|
|
{
|
|
"epoch": 3.4283945522080064,
|
|
"grad_norm": 0.17633911451548007,
|
|
"learning_rate": 1.0895563445867367e-05,
|
|
"loss": 0.1344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04075031727552414,
|
|
"step": 2770,
|
|
"valid_targets_mean": 5052.5,
|
|
"valid_targets_min": 2487
|
|
},
|
|
{
|
|
"epoch": 3.4345852249277753,
|
|
"grad_norm": 0.1762346962978501,
|
|
"learning_rate": 1.0818717809932435e-05,
|
|
"loss": 0.1263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0424673967063427,
|
|
"step": 2775,
|
|
"valid_targets_mean": 5108.5,
|
|
"valid_targets_min": 3236
|
|
},
|
|
{
|
|
"epoch": 3.440775897647544,
|
|
"grad_norm": 0.1806690408205645,
|
|
"learning_rate": 1.0742043528162616e-05,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041259389370679855,
|
|
"step": 2780,
|
|
"valid_targets_mean": 5397.8,
|
|
"valid_targets_min": 2803
|
|
},
|
|
{
|
|
"epoch": 3.446966570367313,
|
|
"grad_norm": 0.18400973064925788,
|
|
"learning_rate": 1.066554203156255e-05,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04019046202301979,
|
|
"step": 2785,
|
|
"valid_targets_mean": 5513.7,
|
|
"valid_targets_min": 2867
|
|
},
|
|
{
|
|
"epoch": 3.453157243087082,
|
|
"grad_norm": 0.1898308964321092,
|
|
"learning_rate": 1.0589214747912142e-05,
|
|
"loss": 0.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04493900388479233,
|
|
"step": 2790,
|
|
"valid_targets_mean": 5217.7,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 3.459347915806851,
|
|
"grad_norm": 0.1765216333417586,
|
|
"learning_rate": 1.0513063101739875e-05,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040192194283008575,
|
|
"step": 2795,
|
|
"valid_targets_mean": 5169.1,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 3.46553858852662,
|
|
"grad_norm": 0.17050704218419852,
|
|
"learning_rate": 1.043708851429623e-05,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04200092330574989,
|
|
"step": 2800,
|
|
"valid_targets_mean": 5809.1,
|
|
"valid_targets_min": 2841
|
|
},
|
|
{
|
|
"epoch": 3.471729261246389,
|
|
"grad_norm": 0.17389636648862228,
|
|
"learning_rate": 1.0361292403527155e-05,
|
|
"loss": 0.1281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043897852301597595,
|
|
"step": 2805,
|
|
"valid_targets_mean": 5502.1,
|
|
"valid_targets_min": 2911
|
|
},
|
|
{
|
|
"epoch": 3.4779199339661577,
|
|
"grad_norm": 0.21021567946740766,
|
|
"learning_rate": 1.0285676184047635e-05,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04397951066493988,
|
|
"step": 2810,
|
|
"valid_targets_mean": 5274.9,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 3.4841106066859266,
|
|
"grad_norm": 0.18170350578170802,
|
|
"learning_rate": 1.0210241267115268e-05,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04176317900419235,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4966.8,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 3.4903012794056956,
|
|
"grad_norm": 0.18501571248718257,
|
|
"learning_rate": 1.0134989060603907e-05,
|
|
"loss": 0.1285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046220626682043076,
|
|
"step": 2820,
|
|
"valid_targets_mean": 5898.0,
|
|
"valid_targets_min": 3337
|
|
},
|
|
{
|
|
"epoch": 3.4964919521254645,
|
|
"grad_norm": 0.20563208316647416,
|
|
"learning_rate": 1.0059920968977397e-05,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045016516000032425,
|
|
"step": 2825,
|
|
"valid_targets_mean": 5214.2,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 3.5026826248452334,
|
|
"grad_norm": 0.19752156970180976,
|
|
"learning_rate": 9.985038393263402e-06,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055424097925424576,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3787.2,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 3.5088732975650023,
|
|
"grad_norm": 0.36972949071948,
|
|
"learning_rate": 9.910342731027207e-06,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0840710997581482,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2707.7,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 3.515063970284771,
|
|
"grad_norm": 0.3748201293327838,
|
|
"learning_rate": 9.835835376345641e-06,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0875723585486412,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3315.3,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 3.5212546430045397,
|
|
"grad_norm": 0.35889868316758455,
|
|
"learning_rate": 9.761517719781073e-06,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07586594671010971,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2977.7,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 3.5274453157243086,
|
|
"grad_norm": 0.402717588920429,
|
|
"learning_rate": 9.687391148355475e-06,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07470747083425522,
|
|
"step": 2850,
|
|
"valid_targets_mean": 2580.0,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 3.5336359884440776,
|
|
"grad_norm": 0.3451691727129318,
|
|
"learning_rate": 9.613457045524481e-06,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06438814103603363,
|
|
"step": 2855,
|
|
"valid_targets_mean": 2145.7,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 3.5398266611638465,
|
|
"grad_norm": 0.3451917280601573,
|
|
"learning_rate": 9.539716791151646e-06,
|
|
"loss": 0.2169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07749766856431961,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3349.0,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 3.5460173338836154,
|
|
"grad_norm": 0.405121196888742,
|
|
"learning_rate": 9.466171761482603e-06,
|
|
"loss": 0.2204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06382091343402863,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2337.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 3.5522080066033843,
|
|
"grad_norm": 0.38362052378621964,
|
|
"learning_rate": 9.392823329119463e-06,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07750914245843887,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2693.7,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 3.5583986793231532,
|
|
"grad_norm": 0.32507866580816974,
|
|
"learning_rate": 9.319672862995126e-06,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07666446268558502,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3172.9,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 3.564589352042922,
|
|
"grad_norm": 0.356242975493215,
|
|
"learning_rate": 9.246721728347787e-06,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06241380423307419,
|
|
"step": 2880,
|
|
"valid_targets_mean": 2427.7,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 3.5707800247626906,
|
|
"grad_norm": 0.3380638978496436,
|
|
"learning_rate": 9.173971286695433e-06,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07215704768896103,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3079.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 3.5769706974824595,
|
|
"grad_norm": 0.37740031543946767,
|
|
"learning_rate": 9.101422895810414e-06,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07300017029047012,
|
|
"step": 2890,
|
|
"valid_targets_mean": 2843.0,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 3.5831613702022285,
|
|
"grad_norm": 0.35965876357050924,
|
|
"learning_rate": 9.029077909694129e-06,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05546549707651138,
|
|
"step": 2895,
|
|
"valid_targets_mean": 2625.9,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 3.5893520429219974,
|
|
"grad_norm": 0.3535975711181502,
|
|
"learning_rate": 8.95693767855175e-06,
|
|
"loss": 0.2138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06681904196739197,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2795.7,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 3.5955427156417663,
|
|
"grad_norm": 0.35831286458384964,
|
|
"learning_rate": 8.885003548767031e-06,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08127890527248383,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2658.8,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 3.6017333883615352,
|
|
"grad_norm": 0.3255245001879369,
|
|
"learning_rate": 8.813276862877151e-06,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08868478238582611,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3810.7,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 3.607924061081304,
|
|
"grad_norm": 0.3775860328972047,
|
|
"learning_rate": 8.741758959547674e-06,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09441325813531876,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3041.2,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 3.614114733801073,
|
|
"grad_norm": 0.34929562566676037,
|
|
"learning_rate": 8.670451173547583e-06,
|
|
"loss": 0.2143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0680951178073883,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3060.2,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 3.620305406520842,
|
|
"grad_norm": 0.35044278320690253,
|
|
"learning_rate": 8.599354835724348e-06,
|
|
"loss": 0.213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07023820281028748,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2596.9,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 3.626496079240611,
|
|
"grad_norm": 0.5787773513968048,
|
|
"learning_rate": 8.528471272979083e-06,
|
|
"loss": 0.4251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1488938182592392,
|
|
"step": 2930,
|
|
"valid_targets_mean": 4241.0,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 3.63268675196038,
|
|
"grad_norm": 0.27860635151014107,
|
|
"learning_rate": 8.45780180824179e-06,
|
|
"loss": 0.4272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13841289281845093,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4306.0,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 3.6388774246801487,
|
|
"grad_norm": 0.31660335971346804,
|
|
"learning_rate": 8.387347760446678e-06,
|
|
"loss": 0.4173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1286192238330841,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3592.4,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 3.6450680973999177,
|
|
"grad_norm": 0.30390386310268513,
|
|
"learning_rate": 8.317110444507544e-06,
|
|
"loss": 0.4075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10869715362787247,
|
|
"step": 2945,
|
|
"valid_targets_mean": 2727.4,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 3.6512587701196866,
|
|
"grad_norm": 0.27322998384768876,
|
|
"learning_rate": 8.247091171293206e-06,
|
|
"loss": 0.4088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15685974061489105,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4547.5,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 3.657449442839455,
|
|
"grad_norm": 0.2903755653093551,
|
|
"learning_rate": 8.177291247603068e-06,
|
|
"loss": 0.4154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1123238354921341,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3017.2,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 3.663640115559224,
|
|
"grad_norm": 0.28795743536270985,
|
|
"learning_rate": 8.107711976142722e-06,
|
|
"loss": 0.3995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13243310153484344,
|
|
"step": 2960,
|
|
"valid_targets_mean": 2964.7,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 3.669830788278993,
|
|
"grad_norm": 0.26745095265053703,
|
|
"learning_rate": 8.038354655499645e-06,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07177959382534027,
|
|
"step": 2965,
|
|
"valid_targets_mean": 940.8,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 3.676021460998762,
|
|
"grad_norm": 0.23294354536691292,
|
|
"learning_rate": 7.969220580118935e-06,
|
|
"loss": 0.4237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17236873507499695,
|
|
"step": 2970,
|
|
"valid_targets_mean": 5461.8,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 3.6822121337185307,
|
|
"grad_norm": 0.23820899598106976,
|
|
"learning_rate": 7.900311040279162e-06,
|
|
"loss": 0.3907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17907428741455078,
|
|
"step": 2975,
|
|
"valid_targets_mean": 5619.4,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.6884028064382997,
|
|
"grad_norm": 0.26049534368856725,
|
|
"learning_rate": 7.831627322068319e-06,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11706390976905823,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3569.9,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 3.6945934791580686,
|
|
"grad_norm": 0.26875591138254984,
|
|
"learning_rate": 7.763170707359783e-06,
|
|
"loss": 0.3881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15870535373687744,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3463.0,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 3.7007841518778375,
|
|
"grad_norm": 0.25381766085554536,
|
|
"learning_rate": 7.694942473788394e-06,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.149326890707016,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4937.0,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 3.7069748245976064,
|
|
"grad_norm": 0.260810286798833,
|
|
"learning_rate": 7.6269438947266196e-06,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13493020832538605,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3598.3,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 3.713165497317375,
|
|
"grad_norm": 0.24986081846470876,
|
|
"learning_rate": 7.559176239260799e-06,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11331235617399216,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3548.3,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 3.719356170037144,
|
|
"grad_norm": 0.3546648416988216,
|
|
"learning_rate": 7.491640772167448e-06,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14008715748786926,
|
|
"step": 3005,
|
|
"valid_targets_mean": 1681.6,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 3.7255468427569127,
|
|
"grad_norm": 0.22901301367821644,
|
|
"learning_rate": 7.4243387538896324e-06,
|
|
"loss": 0.3957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09780922532081604,
|
|
"step": 3010,
|
|
"valid_targets_mean": 3609.7,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 3.7317375154766816,
|
|
"grad_norm": 0.23358979591699058,
|
|
"learning_rate": 7.3572714405134735e-06,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15065903961658478,
|
|
"step": 3015,
|
|
"valid_targets_mean": 4945.1,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 3.7379281881964506,
|
|
"grad_norm": 0.26116568016951025,
|
|
"learning_rate": 7.290440083744703e-06,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13248634338378906,
|
|
"step": 3020,
|
|
"valid_targets_mean": 3720.4,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 3.7441188609162195,
|
|
"grad_norm": 0.26928236690588164,
|
|
"learning_rate": 7.223845930885296e-06,
|
|
"loss": 0.4143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16243666410446167,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4374.7,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 3.7503095336359884,
|
|
"grad_norm": 0.24054806311524868,
|
|
"learning_rate": 7.1574902248101665e-06,
|
|
"loss": 0.3964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11514532566070557,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4073.6,
|
|
"valid_targets_min": 1764
|
|
},
|
|
{
|
|
"epoch": 3.7565002063557573,
|
|
"grad_norm": 0.27980013752670346,
|
|
"learning_rate": 7.091374203944026e-06,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07349371910095215,
|
|
"step": 3035,
|
|
"valid_targets_mean": 6295.9,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 3.7626908790755262,
|
|
"grad_norm": 0.20457280873890113,
|
|
"learning_rate": 7.025499102238214e-06,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08301842212677002,
|
|
"step": 3040,
|
|
"valid_targets_mean": 6297.3,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 3.768881551795295,
|
|
"grad_norm": 0.2146597328164818,
|
|
"learning_rate": 6.959866149147716e-06,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07045641541481018,
|
|
"step": 3045,
|
|
"valid_targets_mean": 5215.5,
|
|
"valid_targets_min": 184
|
|
},
|
|
{
|
|
"epoch": 3.775072224515064,
|
|
"grad_norm": 0.18200689929375732,
|
|
"learning_rate": 6.89447656960817e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0889335572719574,
|
|
"step": 3050,
|
|
"valid_targets_mean": 6649.1,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 3.781262897234833,
|
|
"grad_norm": 0.22860543124714944,
|
|
"learning_rate": 6.829331584013057e-06,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04138035327196121,
|
|
"step": 3055,
|
|
"valid_targets_mean": 1256.4,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 3.787453569954602,
|
|
"grad_norm": 0.166809758272612,
|
|
"learning_rate": 6.764432408190871e-06,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08066590130329132,
|
|
"step": 3060,
|
|
"valid_targets_mean": 6641.7,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 3.793644242674371,
|
|
"grad_norm": 0.17163137343467202,
|
|
"learning_rate": 6.6997802533824795e-06,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07697588205337524,
|
|
"step": 3065,
|
|
"valid_targets_mean": 6290.7,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 3.7998349153941398,
|
|
"grad_norm": 0.16832064299798719,
|
|
"learning_rate": 6.635376326218466e-06,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08682464063167572,
|
|
"step": 3070,
|
|
"valid_targets_mean": 7629.2,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 3.8060255881139082,
|
|
"grad_norm": 0.1676858843405696,
|
|
"learning_rate": 6.571221828696663e-06,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08969520032405853,
|
|
"step": 3075,
|
|
"valid_targets_mean": 7520.3,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 3.812216260833677,
|
|
"grad_norm": 0.17227411552850067,
|
|
"learning_rate": 6.507317958159669e-06,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10327152907848358,
|
|
"step": 3080,
|
|
"valid_targets_mean": 7877.2,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 3.818406933553446,
|
|
"grad_norm": 0.16907625676088583,
|
|
"learning_rate": 6.44366590727254e-06,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07436846196651459,
|
|
"step": 3085,
|
|
"valid_targets_mean": 6407.7,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 3.824597606273215,
|
|
"grad_norm": 0.16221856592289713,
|
|
"learning_rate": 6.380266864000504e-06,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12140980362892151,
|
|
"step": 3090,
|
|
"valid_targets_mean": 8744.6,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 3.830788278992984,
|
|
"grad_norm": 0.16352978743143964,
|
|
"learning_rate": 6.3171220115868045e-06,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06712472438812256,
|
|
"step": 3095,
|
|
"valid_targets_mean": 5903.2,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 3.836978951712753,
|
|
"grad_norm": 0.1620060862205527,
|
|
"learning_rate": 6.2542325285306285e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07059751451015472,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5620.8,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 3.8431696244325217,
|
|
"grad_norm": 0.1717876760158624,
|
|
"learning_rate": 6.1915995885650675e-06,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06778329610824585,
|
|
"step": 3105,
|
|
"valid_targets_mean": 6053.4,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 3.8493602971522907,
|
|
"grad_norm": 0.1724498163364375,
|
|
"learning_rate": 6.129224360635255e-06,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0723428875207901,
|
|
"step": 3110,
|
|
"valid_targets_mean": 5808.6,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 3.8555509698720596,
|
|
"grad_norm": 0.2924342510388217,
|
|
"learning_rate": 6.067108008876539e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05951783061027527,
|
|
"step": 3115,
|
|
"valid_targets_mean": 1165.8,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 3.861741642591828,
|
|
"grad_norm": 0.16538063625276245,
|
|
"learning_rate": 6.0052516925927575e-06,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06697045266628265,
|
|
"step": 3120,
|
|
"valid_targets_mean": 6545.5,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 3.867932315311597,
|
|
"grad_norm": 0.17735097847195555,
|
|
"learning_rate": 5.943656566234577e-06,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07908181846141815,
|
|
"step": 3125,
|
|
"valid_targets_mean": 7082.3,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 3.874122988031366,
|
|
"grad_norm": 0.2795496426225536,
|
|
"learning_rate": 5.882323779377969e-06,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05319851636886597,
|
|
"step": 3130,
|
|
"valid_targets_mean": 5055.3,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 3.880313660751135,
|
|
"grad_norm": 0.21480035161977398,
|
|
"learning_rate": 5.821254476702766e-06,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05196044594049454,
|
|
"step": 3135,
|
|
"valid_targets_mean": 5057.8,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 3.8865043334709037,
|
|
"grad_norm": 0.19980532865976003,
|
|
"learning_rate": 5.7604497979712885e-06,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05519983172416687,
|
|
"step": 3140,
|
|
"valid_targets_mean": 4976.6,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 3.8926950061906727,
|
|
"grad_norm": 0.18562163276138666,
|
|
"learning_rate": 5.699910878007029e-06,
|
|
"loss": 0.1549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05100224167108536,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4991.0,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 3.8988856789104416,
|
|
"grad_norm": 0.2004233551974947,
|
|
"learning_rate": 5.6396388466735494e-06,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054582323879003525,
|
|
"step": 3150,
|
|
"valid_targets_mean": 4929.2,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 3.9050763516302105,
|
|
"grad_norm": 0.1930621366436326,
|
|
"learning_rate": 5.579634828853346e-06,
|
|
"loss": 0.1499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05246791988611221,
|
|
"step": 3155,
|
|
"valid_targets_mean": 4816.0,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 3.9112670243499794,
|
|
"grad_norm": 0.1940344389340398,
|
|
"learning_rate": 5.519899944426872e-06,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050756849348545074,
|
|
"step": 3160,
|
|
"valid_targets_mean": 5106.1,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 3.9174576970697483,
|
|
"grad_norm": 0.18266570640280186,
|
|
"learning_rate": 5.460435308251597e-06,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04534699022769928,
|
|
"step": 3165,
|
|
"valid_targets_mean": 5717.9,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 3.9236483697895173,
|
|
"grad_norm": 0.2019228989698077,
|
|
"learning_rate": 5.401242030141272e-06,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045035433024168015,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4223.5,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 3.929839042509286,
|
|
"grad_norm": 0.19496159345323943,
|
|
"learning_rate": 5.3423212148451565e-06,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05356280505657196,
|
|
"step": 3175,
|
|
"valid_targets_mean": 5358.9,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 3.936029715229055,
|
|
"grad_norm": 0.20723721043870766,
|
|
"learning_rate": 5.283673962027438e-06,
|
|
"loss": 0.1464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055323224514722824,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4907.8,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 3.942220387948824,
|
|
"grad_norm": 0.18822413104299468,
|
|
"learning_rate": 5.2253013662466685e-06,
|
|
"loss": 0.1487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053791530430316925,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5368.9,
|
|
"valid_targets_min": 207
|
|
},
|
|
{
|
|
"epoch": 3.948411060668593,
|
|
"grad_norm": 0.2009252470006424,
|
|
"learning_rate": 5.167204516935369e-06,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04316543787717819,
|
|
"step": 3190,
|
|
"valid_targets_mean": 5002.8,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 3.9546017333883614,
|
|
"grad_norm": 0.22138478739704973,
|
|
"learning_rate": 5.1093844983796995e-06,
|
|
"loss": 0.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051566220819950104,
|
|
"step": 3195,
|
|
"valid_targets_mean": 4689.6,
|
|
"valid_targets_min": 199
|
|
},
|
|
{
|
|
"epoch": 3.9607924061081303,
|
|
"grad_norm": 0.19760139974628216,
|
|
"learning_rate": 5.051842389699187e-06,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04717274755239487,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4091.0,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 3.9669830788278992,
|
|
"grad_norm": 0.19253008373468292,
|
|
"learning_rate": 4.9945792648266285e-06,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045230429619550705,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4954.5,
|
|
"valid_targets_min": 174
|
|
},
|
|
{
|
|
"epoch": 3.973173751547668,
|
|
"grad_norm": 0.18074751705158218,
|
|
"learning_rate": 4.937596192488014e-06,
|
|
"loss": 0.1529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047024406492710114,
|
|
"step": 3210,
|
|
"valid_targets_mean": 6080.0,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 3.979364424267437,
|
|
"grad_norm": 0.1863030479067299,
|
|
"learning_rate": 4.880894236182612e-06,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046461321413517,
|
|
"step": 3215,
|
|
"valid_targets_mean": 5143.1,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 3.985555096987206,
|
|
"grad_norm": 0.1840113853429413,
|
|
"learning_rate": 4.824474454163075e-06,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047562018036842346,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4991.2,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 3.991745769706975,
|
|
"grad_norm": 0.18208586104370034,
|
|
"learning_rate": 4.768337899415749e-06,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051083069294691086,
|
|
"step": 3225,
|
|
"valid_targets_mean": 5415.2,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 3.997936442426744,
|
|
"grad_norm": 0.2591302202514017,
|
|
"learning_rate": 4.712485619640961e-06,
|
|
"loss": 0.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04865710437297821,
|
|
"step": 3230,
|
|
"valid_targets_mean": 4573.3,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 4.003714403631862,
|
|
"grad_norm": 2.4935469046351515,
|
|
"learning_rate": 4.656918657233518e-06,
|
|
"loss": 0.4446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21996045112609863,
|
|
"step": 3235,
|
|
"valid_targets_mean": 7854.0,
|
|
"valid_targets_min": 2676
|
|
},
|
|
{
|
|
"epoch": 4.009905076351631,
|
|
"grad_norm": 1.548263519711514,
|
|
"learning_rate": 4.6016380492632066e-06,
|
|
"loss": 0.5494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15648065507411957,
|
|
"step": 3240,
|
|
"valid_targets_mean": 6740.2,
|
|
"valid_targets_min": 3017
|
|
},
|
|
{
|
|
"epoch": 4.016095749071399,
|
|
"grad_norm": 0.9066247805656071,
|
|
"learning_rate": 4.546644827455473e-06,
|
|
"loss": 0.4759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13795755803585052,
|
|
"step": 3245,
|
|
"valid_targets_mean": 7294.2,
|
|
"valid_targets_min": 3200
|
|
},
|
|
{
|
|
"epoch": 4.022286421791168,
|
|
"grad_norm": 0.6406180080809663,
|
|
"learning_rate": 4.491940018172154e-06,
|
|
"loss": 0.4189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14580833911895752,
|
|
"step": 3250,
|
|
"valid_targets_mean": 7212.2,
|
|
"valid_targets_min": 2554
|
|
},
|
|
{
|
|
"epoch": 4.0284770945109365,
|
|
"grad_norm": 0.4748148577468879,
|
|
"learning_rate": 4.437524642392312e-06,
|
|
"loss": 0.382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12739720940589905,
|
|
"step": 3255,
|
|
"valid_targets_mean": 7016.0,
|
|
"valid_targets_min": 2429
|
|
},
|
|
{
|
|
"epoch": 4.034667767230705,
|
|
"grad_norm": 0.31992606949991964,
|
|
"learning_rate": 4.383399715693191e-06,
|
|
"loss": 0.3659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11324085295200348,
|
|
"step": 3260,
|
|
"valid_targets_mean": 7897.7,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 4.040858439950474,
|
|
"grad_norm": 0.26910883047827056,
|
|
"learning_rate": 4.329566248231261e-06,
|
|
"loss": 0.3546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12365679442882538,
|
|
"step": 3265,
|
|
"valid_targets_mean": 7362.4,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 4.047049112670243,
|
|
"grad_norm": 0.24616052772626323,
|
|
"learning_rate": 4.276025244723379e-06,
|
|
"loss": 0.34,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11187300086021423,
|
|
"step": 3270,
|
|
"valid_targets_mean": 6896.6,
|
|
"valid_targets_min": 2522
|
|
},
|
|
{
|
|
"epoch": 4.053239785390012,
|
|
"grad_norm": 0.21061670174766794,
|
|
"learning_rate": 4.222777704428002e-06,
|
|
"loss": 0.3379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10639345645904541,
|
|
"step": 3275,
|
|
"valid_targets_mean": 7470.7,
|
|
"valid_targets_min": 2790
|
|
},
|
|
{
|
|
"epoch": 4.059430458109781,
|
|
"grad_norm": 0.18753401679570936,
|
|
"learning_rate": 4.169824621126563e-06,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11539415270090103,
|
|
"step": 3280,
|
|
"valid_targets_mean": 8266.8,
|
|
"valid_targets_min": 3307
|
|
},
|
|
{
|
|
"epoch": 4.06562113082955,
|
|
"grad_norm": 0.20663140315054465,
|
|
"learning_rate": 4.11716698310493e-06,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09084329009056091,
|
|
"step": 3285,
|
|
"valid_targets_mean": 8286.0,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 4.071811803549319,
|
|
"grad_norm": 0.18738793959646965,
|
|
"learning_rate": 4.064805773134945e-06,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10851626098155975,
|
|
"step": 3290,
|
|
"valid_targets_mean": 9267.8,
|
|
"valid_targets_min": 5215
|
|
},
|
|
{
|
|
"epoch": 4.078002476269088,
|
|
"grad_norm": 0.18106063041038864,
|
|
"learning_rate": 4.012741968456086e-06,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10159880667924881,
|
|
"step": 3295,
|
|
"valid_targets_mean": 9206.4,
|
|
"valid_targets_min": 3552
|
|
},
|
|
{
|
|
"epoch": 4.084193148988857,
|
|
"grad_norm": 0.1612396302551816,
|
|
"learning_rate": 3.9609765407572245e-06,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10520200431346893,
|
|
"step": 3300,
|
|
"valid_targets_mean": 9805.0,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 4.090383821708626,
|
|
"grad_norm": 0.17460194220391953,
|
|
"learning_rate": 3.9095104561585055e-06,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10614360868930817,
|
|
"step": 3305,
|
|
"valid_targets_mean": 9971.8,
|
|
"valid_targets_min": 3896
|
|
},
|
|
{
|
|
"epoch": 4.096574494428395,
|
|
"grad_norm": 0.1856842944175527,
|
|
"learning_rate": 3.858344675193306e-06,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10031236708164215,
|
|
"step": 3310,
|
|
"valid_targets_mean": 9100.1,
|
|
"valid_targets_min": 3598
|
|
},
|
|
{
|
|
"epoch": 4.1027651671481635,
|
|
"grad_norm": 0.172715444753337,
|
|
"learning_rate": 3.8074801527903016e-06,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09845046699047089,
|
|
"step": 3315,
|
|
"valid_targets_mean": 9095.8,
|
|
"valid_targets_min": 3870
|
|
},
|
|
{
|
|
"epoch": 4.1089558398679324,
|
|
"grad_norm": 0.17032228924718737,
|
|
"learning_rate": 3.7569178382556537e-06,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09536390006542206,
|
|
"step": 3320,
|
|
"valid_targets_mean": 8719.1,
|
|
"valid_targets_min": 3803
|
|
},
|
|
{
|
|
"epoch": 4.115146512587701,
|
|
"grad_norm": 0.16064717786441732,
|
|
"learning_rate": 3.7066586752552946e-06,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10273170471191406,
|
|
"step": 3325,
|
|
"valid_targets_mean": 9952.8,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 4.12133718530747,
|
|
"grad_norm": 0.16401198062265868,
|
|
"learning_rate": 3.6567036017973133e-06,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09828859567642212,
|
|
"step": 3330,
|
|
"valid_targets_mean": 9790.2,
|
|
"valid_targets_min": 1990
|
|
},
|
|
{
|
|
"epoch": 4.127527858027239,
|
|
"grad_norm": 0.19055481385735915,
|
|
"learning_rate": 3.6070535502144344e-06,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09713335335254669,
|
|
"step": 3335,
|
|
"valid_targets_mean": 9145.0,
|
|
"valid_targets_min": 3036
|
|
},
|
|
{
|
|
"epoch": 4.133718530747008,
|
|
"grad_norm": 0.1554175099448603,
|
|
"learning_rate": 3.557709447146638e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08989289402961731,
|
|
"step": 3340,
|
|
"valid_targets_mean": 9393.3,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 4.139909203466777,
|
|
"grad_norm": 0.1628115714451333,
|
|
"learning_rate": 3.5086722135238537e-06,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08836889266967773,
|
|
"step": 3345,
|
|
"valid_targets_mean": 8753.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 4.146099876186546,
|
|
"grad_norm": 0.15827242408513267,
|
|
"learning_rate": 3.4599427645487895e-06,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09848266839981079,
|
|
"step": 3350,
|
|
"valid_targets_mean": 9578.5,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 4.152290548906315,
|
|
"grad_norm": 0.16365570965569637,
|
|
"learning_rate": 3.411522009679822e-06,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09809684008359909,
|
|
"step": 3355,
|
|
"valid_targets_mean": 9520.8,
|
|
"valid_targets_min": 4270
|
|
},
|
|
{
|
|
"epoch": 4.158481221626083,
|
|
"grad_norm": 0.18247983138866508,
|
|
"learning_rate": 3.363410852614035e-06,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10381290316581726,
|
|
"step": 3360,
|
|
"valid_targets_mean": 10044.1,
|
|
"valid_targets_min": 5057
|
|
},
|
|
{
|
|
"epoch": 4.164671894345852,
|
|
"grad_norm": 0.16113266646759858,
|
|
"learning_rate": 3.3156101912703774e-06,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09295910596847534,
|
|
"step": 3365,
|
|
"valid_targets_mean": 9430.1,
|
|
"valid_targets_min": 4478
|
|
},
|
|
{
|
|
"epoch": 4.170862567065621,
|
|
"grad_norm": 0.16092409672549826,
|
|
"learning_rate": 3.2681209177728723e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09645083546638489,
|
|
"step": 3370,
|
|
"valid_targets_mean": 9906.8,
|
|
"valid_targets_min": 4602
|
|
},
|
|
{
|
|
"epoch": 4.17705323978539,
|
|
"grad_norm": 0.15724223274860794,
|
|
"learning_rate": 3.220943918433981e-06,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09849556535482407,
|
|
"step": 3375,
|
|
"valid_targets_mean": 9816.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.183243912505159,
|
|
"grad_norm": 0.1616786302951148,
|
|
"learning_rate": 3.1740800737380506e-06,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09693750739097595,
|
|
"step": 3380,
|
|
"valid_targets_mean": 9615.7,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 4.1894345852249275,
|
|
"grad_norm": 0.16066765321492638,
|
|
"learning_rate": 3.1275302583249045e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09067849814891815,
|
|
"step": 3385,
|
|
"valid_targets_mean": 9669.4,
|
|
"valid_targets_min": 4373
|
|
},
|
|
{
|
|
"epoch": 4.195625257944696,
|
|
"grad_norm": 0.31316007472157337,
|
|
"learning_rate": 3.0812953409735048e-06,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10240060091018677,
|
|
"step": 3390,
|
|
"valid_targets_mean": 10030.3,
|
|
"valid_targets_min": 4760
|
|
},
|
|
{
|
|
"epoch": 4.201815930664465,
|
|
"grad_norm": 0.16279083945315906,
|
|
"learning_rate": 3.035376184585723e-06,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09688413143157959,
|
|
"step": 3395,
|
|
"valid_targets_mean": 9881.2,
|
|
"valid_targets_min": 4785
|
|
},
|
|
{
|
|
"epoch": 4.208006603384234,
|
|
"grad_norm": 0.16037632943473892,
|
|
"learning_rate": 2.9897736461702643e-06,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09556827694177628,
|
|
"step": 3400,
|
|
"valid_targets_mean": 9669.7,
|
|
"valid_targets_min": 3932
|
|
},
|
|
{
|
|
"epoch": 4.214197276104003,
|
|
"grad_norm": 0.16241404801285436,
|
|
"learning_rate": 2.9444885768266427e-06,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09576229751110077,
|
|
"step": 3405,
|
|
"valid_targets_mean": 9425.6,
|
|
"valid_targets_min": 3063
|
|
},
|
|
{
|
|
"epoch": 4.220387948823772,
|
|
"grad_norm": 0.1673330997580944,
|
|
"learning_rate": 2.899521821729334e-06,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10415822267532349,
|
|
"step": 3410,
|
|
"valid_targets_mean": 9942.2,
|
|
"valid_targets_min": 3468
|
|
},
|
|
{
|
|
"epoch": 4.226578621543541,
|
|
"grad_norm": 0.16255022199043886,
|
|
"learning_rate": 2.8548742201119583e-06,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0922980010509491,
|
|
"step": 3415,
|
|
"valid_targets_mean": 9693.8,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 4.23276929426331,
|
|
"grad_norm": 0.16435612389843432,
|
|
"learning_rate": 2.810546605251656e-06,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09935683012008667,
|
|
"step": 3420,
|
|
"valid_targets_mean": 10011.0,
|
|
"valid_targets_min": 3774
|
|
},
|
|
{
|
|
"epoch": 4.238959966983079,
|
|
"grad_norm": 0.18062149774213904,
|
|
"learning_rate": 2.7665398044535032e-06,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06484482437372208,
|
|
"step": 3425,
|
|
"valid_targets_mean": 5209.4,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 4.245150639702848,
|
|
"grad_norm": 0.2322773652047266,
|
|
"learning_rate": 2.722854639035104e-06,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09720931947231293,
|
|
"step": 3430,
|
|
"valid_targets_mean": 6386.3,
|
|
"valid_targets_min": 3796
|
|
},
|
|
{
|
|
"epoch": 4.251341312422617,
|
|
"grad_norm": 0.23286252553352585,
|
|
"learning_rate": 2.679491924311226e-06,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1092301532626152,
|
|
"step": 3435,
|
|
"valid_targets_mean": 6407.3,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 4.257531985142386,
|
|
"grad_norm": 1.6176027659267946,
|
|
"learning_rate": 2.6364524695786255e-06,
|
|
"loss": 0.6845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2172609567642212,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4497.1,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 4.2637226578621545,
|
|
"grad_norm": 1.2257944299406256,
|
|
"learning_rate": 2.593737078100893e-06,
|
|
"loss": 0.6324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17631544172763824,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4428.4,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 4.2699133305819235,
|
|
"grad_norm": 0.9074332717102703,
|
|
"learning_rate": 2.5513465470935163e-06,
|
|
"loss": 0.6197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2198156714439392,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3730.1,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 4.276104003301692,
|
|
"grad_norm": 0.5571859317447886,
|
|
"learning_rate": 2.509281667708954e-06,
|
|
"loss": 0.557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20994672179222107,
|
|
"step": 3455,
|
|
"valid_targets_mean": 4497.8,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 4.282294676021461,
|
|
"grad_norm": 0.4631080764559286,
|
|
"learning_rate": 2.4675432250219e-06,
|
|
"loss": 0.5234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14621654152870178,
|
|
"step": 3460,
|
|
"valid_targets_mean": 2116.2,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 4.28848534874123,
|
|
"grad_norm": 0.33911840221263606,
|
|
"learning_rate": 2.4261319980146293e-06,
|
|
"loss": 0.5088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1579686999320984,
|
|
"step": 3465,
|
|
"valid_targets_mean": 4756.1,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 4.294676021460999,
|
|
"grad_norm": 0.2659908271673546,
|
|
"learning_rate": 2.3850487595624227e-06,
|
|
"loss": 0.4976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15896087884902954,
|
|
"step": 3470,
|
|
"valid_targets_mean": 4760.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 4.300866694180768,
|
|
"grad_norm": 0.2777227992961965,
|
|
"learning_rate": 2.3442942764192056e-06,
|
|
"loss": 0.5032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16999685764312744,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3936.6,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 4.307057366900537,
|
|
"grad_norm": 0.2607026301058802,
|
|
"learning_rate": 2.3038693092031816e-06,
|
|
"loss": 0.4917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1763077676296234,
|
|
"step": 3480,
|
|
"valid_targets_mean": 4049.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 4.313248039620305,
|
|
"grad_norm": 0.29319422740769124,
|
|
"learning_rate": 2.263774612382681e-06,
|
|
"loss": 0.4689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17623548209667206,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3266.9,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 4.319438712340074,
|
|
"grad_norm": 0.24469780824069706,
|
|
"learning_rate": 2.2240109342620198e-06,
|
|
"loss": 0.4817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16458621621131897,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4822.6,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 4.325629385059843,
|
|
"grad_norm": 0.24506580324285554,
|
|
"learning_rate": 2.184579016967607e-06,
|
|
"loss": 0.4825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14452794194221497,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3932.0,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 4.331820057779612,
|
|
"grad_norm": 0.2655744157137475,
|
|
"learning_rate": 2.1454795964340457e-06,
|
|
"loss": 0.4626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16227002441883087,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3117.2,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 4.338010730499381,
|
|
"grad_norm": 0.25960900291161476,
|
|
"learning_rate": 2.106713402390419e-06,
|
|
"loss": 0.4991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13666290044784546,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3402.1,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 4.34420140321915,
|
|
"grad_norm": 0.3676616771811431,
|
|
"learning_rate": 2.0682811583466366e-06,
|
|
"loss": 0.4832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20904065668582916,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3427.9,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 4.3503920759389185,
|
|
"grad_norm": 0.22127092817292562,
|
|
"learning_rate": 2.030183581579985e-06,
|
|
"loss": 0.4847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14982908964157104,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4640.0,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 4.356582748658687,
|
|
"grad_norm": 0.23545868917276372,
|
|
"learning_rate": 1.9924213831217033e-06,
|
|
"loss": 0.4939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16136851906776428,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3996.0,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 4.362773421378456,
|
|
"grad_norm": 0.2377512478004086,
|
|
"learning_rate": 1.954995267743736e-06,
|
|
"loss": 0.4766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.149477019906044,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3636.5,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 4.368964094098225,
|
|
"grad_norm": 0.2469450738174877,
|
|
"learning_rate": 1.917905933945532e-06,
|
|
"loss": 0.5041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16657090187072754,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3391.8,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 4.375154766817994,
|
|
"grad_norm": 0.41672460547581003,
|
|
"learning_rate": 1.881154073941076e-06,
|
|
"loss": 0.4321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09508286416530609,
|
|
"step": 3535,
|
|
"valid_targets_mean": 5537.1,
|
|
"valid_targets_min": 2564
|
|
},
|
|
{
|
|
"epoch": 4.381345439537763,
|
|
"grad_norm": 0.4006873109282658,
|
|
"learning_rate": 1.84474037364593e-06,
|
|
"loss": 0.1539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05314338952302933,
|
|
"step": 3540,
|
|
"valid_targets_mean": 5478.3,
|
|
"valid_targets_min": 2226
|
|
},
|
|
{
|
|
"epoch": 4.387536112257532,
|
|
"grad_norm": 0.22625721368966836,
|
|
"learning_rate": 1.8086655126644226e-06,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04012366384267807,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4935.5,
|
|
"valid_targets_min": 2518
|
|
},
|
|
{
|
|
"epoch": 4.393726784977301,
|
|
"grad_norm": 0.21352839700877574,
|
|
"learning_rate": 1.7729301642770003e-06,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04501489922404289,
|
|
"step": 3550,
|
|
"valid_targets_mean": 5529.9,
|
|
"valid_targets_min": 3140
|
|
},
|
|
{
|
|
"epoch": 4.39991745769707,
|
|
"grad_norm": 0.18797831164194703,
|
|
"learning_rate": 1.7375349954276232e-06,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03921738266944885,
|
|
"step": 3555,
|
|
"valid_targets_mean": 4935.2,
|
|
"valid_targets_min": 2984
|
|
},
|
|
{
|
|
"epoch": 4.406108130416839,
|
|
"grad_norm": 0.17676120458891217,
|
|
"learning_rate": 1.7024806667113569e-06,
|
|
"loss": 0.1259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038862355053424835,
|
|
"step": 3560,
|
|
"valid_targets_mean": 4934.8,
|
|
"valid_targets_min": 2648
|
|
},
|
|
{
|
|
"epoch": 4.412298803136608,
|
|
"grad_norm": 0.1760366235689323,
|
|
"learning_rate": 1.6677678323619994e-06,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039879750460386276,
|
|
"step": 3565,
|
|
"valid_targets_mean": 5136.8,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 4.418489475856377,
|
|
"grad_norm": 0.17900894732754008,
|
|
"learning_rate": 1.6333971402399163e-06,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03854871168732643,
|
|
"step": 3570,
|
|
"valid_targets_mean": 5029.4,
|
|
"valid_targets_min": 2963
|
|
},
|
|
{
|
|
"epoch": 4.4246801485761456,
|
|
"grad_norm": 0.1661220263153419,
|
|
"learning_rate": 1.5993692318199116e-06,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04149442911148071,
|
|
"step": 3575,
|
|
"valid_targets_mean": 5930.7,
|
|
"valid_targets_min": 2714
|
|
},
|
|
{
|
|
"epoch": 4.4308708212959145,
|
|
"grad_norm": 0.16406922547932465,
|
|
"learning_rate": 1.565684742179283e-06,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03713925927877426,
|
|
"step": 3580,
|
|
"valid_targets_mean": 5439.3,
|
|
"valid_targets_min": 2668
|
|
},
|
|
{
|
|
"epoch": 4.437061494015683,
|
|
"grad_norm": 0.16626921219826282,
|
|
"learning_rate": 1.5323442999859506e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03770250454545021,
|
|
"step": 3585,
|
|
"valid_targets_mean": 5153.3,
|
|
"valid_targets_min": 2730
|
|
},
|
|
{
|
|
"epoch": 4.443252166735452,
|
|
"grad_norm": 0.17130899939305422,
|
|
"learning_rate": 1.4993485274867347e-06,
|
|
"loss": 0.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04467492550611496,
|
|
"step": 3590,
|
|
"valid_targets_mean": 5870.0,
|
|
"valid_targets_min": 3065
|
|
},
|
|
{
|
|
"epoch": 4.449442839455221,
|
|
"grad_norm": 0.1678875921222296,
|
|
"learning_rate": 1.4666980404957332e-06,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040957070887088776,
|
|
"step": 3595,
|
|
"valid_targets_mean": 5658.6,
|
|
"valid_targets_min": 2803
|
|
},
|
|
{
|
|
"epoch": 4.455633512174989,
|
|
"grad_norm": 0.17669363671336374,
|
|
"learning_rate": 1.4343934483828448e-06,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036771852523088455,
|
|
"step": 3600,
|
|
"valid_targets_mean": 5224.7,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 4.461824184894758,
|
|
"grad_norm": 0.16916418216307372,
|
|
"learning_rate": 1.4024353540623726e-06,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04043577238917351,
|
|
"step": 3605,
|
|
"valid_targets_mean": 5735.1,
|
|
"valid_targets_min": 3533
|
|
},
|
|
{
|
|
"epoch": 4.468014857614527,
|
|
"grad_norm": 0.1757936368796329,
|
|
"learning_rate": 1.3708243539817923e-06,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03609887510538101,
|
|
"step": 3610,
|
|
"valid_targets_mean": 5265.8,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 4.474205530334296,
|
|
"grad_norm": 0.16981820738791287,
|
|
"learning_rate": 1.3395610381106172e-06,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041492559015750885,
|
|
"step": 3615,
|
|
"valid_targets_mean": 5609.0,
|
|
"valid_targets_min": 3609
|
|
},
|
|
{
|
|
"epoch": 4.480396203054065,
|
|
"grad_norm": 0.17678193730470568,
|
|
"learning_rate": 1.3086459899293691e-06,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038396596908569336,
|
|
"step": 3620,
|
|
"valid_targets_mean": 5324.1,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 4.486586875773834,
|
|
"grad_norm": 0.1761940788739463,
|
|
"learning_rate": 1.278079786418711e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04002201557159424,
|
|
"step": 3625,
|
|
"valid_targets_mean": 4990.3,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 4.492777548493603,
|
|
"grad_norm": 0.17003181685530802,
|
|
"learning_rate": 1.2478629980486677e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03946438431739807,
|
|
"step": 3630,
|
|
"valid_targets_mean": 5599.9,
|
|
"valid_targets_min": 2657
|
|
},
|
|
{
|
|
"epoch": 4.498968221213372,
|
|
"grad_norm": 0.17012499718245974,
|
|
"learning_rate": 1.2179961887679848e-06,
|
|
"loss": 0.1157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040597882121801376,
|
|
"step": 3635,
|
|
"valid_targets_mean": 5342.1,
|
|
"valid_targets_min": 2880
|
|
},
|
|
{
|
|
"epoch": 4.505158893933141,
|
|
"grad_norm": 0.3977156147073853,
|
|
"learning_rate": 1.1884799159935968e-06,
|
|
"loss": 0.173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07253716886043549,
|
|
"step": 3640,
|
|
"valid_targets_mean": 2753.4,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 4.5113495666529095,
|
|
"grad_norm": 0.39563832284123934,
|
|
"learning_rate": 1.1593147306002183e-06,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07273897528648376,
|
|
"step": 3645,
|
|
"valid_targets_mean": 2784.8,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 4.5175402393726785,
|
|
"grad_norm": 0.3854593056655363,
|
|
"learning_rate": 1.1305011769100838e-06,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07413459569215775,
|
|
"step": 3650,
|
|
"valid_targets_mean": 2908.0,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 4.523730912092447,
|
|
"grad_norm": 0.40591178276996187,
|
|
"learning_rate": 1.1020397926827765e-06,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05975446105003357,
|
|
"step": 3655,
|
|
"valid_targets_mean": 2257.8,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 4.529921584812216,
|
|
"grad_norm": 0.33462702080907136,
|
|
"learning_rate": 1.0739311091051819e-06,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05820050835609436,
|
|
"step": 3660,
|
|
"valid_targets_mean": 2673.1,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 4.536112257531985,
|
|
"grad_norm": 0.2996201327086261,
|
|
"learning_rate": 1.0461756507815823e-06,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061028555035591125,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3598.9,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 4.542302930251754,
|
|
"grad_norm": 0.34458164984332446,
|
|
"learning_rate": 1.0187739357238802e-06,
|
|
"loss": 0.201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07405778765678406,
|
|
"step": 3670,
|
|
"valid_targets_mean": 2927.8,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 4.548493602971523,
|
|
"grad_norm": 0.3256230028317915,
|
|
"learning_rate": 9.91726475341912e-07,
|
|
"loss": 0.2024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08748337626457214,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3389.4,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 4.554684275691292,
|
|
"grad_norm": 0.3264855498741302,
|
|
"learning_rate": 9.65033774433901e-07,
|
|
"loss": 0.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05760114639997482,
|
|
"step": 3680,
|
|
"valid_targets_mean": 2652.3,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 4.560874948411061,
|
|
"grad_norm": 0.3349167653128066,
|
|
"learning_rate": 9.386963311770514e-07,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07405319809913635,
|
|
"step": 3685,
|
|
"valid_targets_mean": 3397.5,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 4.56706562113083,
|
|
"grad_norm": 0.32841699756509185,
|
|
"learning_rate": 9.127146371182438e-07,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07492075860500336,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3299.4,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 4.573256293850599,
|
|
"grad_norm": 0.35354954385696197,
|
|
"learning_rate": 8.870891771648571e-07,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058701496571302414,
|
|
"step": 3695,
|
|
"valid_targets_mean": 2646.8,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 4.579446966570368,
|
|
"grad_norm": 0.34444433080284126,
|
|
"learning_rate": 8.61820429575726e-07,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06625692546367645,
|
|
"step": 3700,
|
|
"valid_targets_mean": 2510.5,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 4.585637639290137,
|
|
"grad_norm": 0.32955325726015455,
|
|
"learning_rate": 8.369088659522018e-07,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06729099154472351,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3139.3,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 4.5918283120099055,
|
|
"grad_norm": 0.3215373851903429,
|
|
"learning_rate": 8.123549512293683e-07,
|
|
"loss": 0.1946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061443883925676346,
|
|
"step": 3710,
|
|
"valid_targets_mean": 2884.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 4.5980189847296735,
|
|
"grad_norm": 0.3270165956229651,
|
|
"learning_rate": 7.881591436673619e-07,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06545057147741318,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3203.1,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 4.604209657449443,
|
|
"grad_norm": 0.3301081663283038,
|
|
"learning_rate": 7.643218948427967e-07,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05714607983827591,
|
|
"step": 3720,
|
|
"valid_targets_mean": 2343.8,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 4.610400330169211,
|
|
"grad_norm": 0.34539313959899226,
|
|
"learning_rate": 7.408436496403615e-07,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06193718686699867,
|
|
"step": 3725,
|
|
"valid_targets_mean": 2488.7,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 4.61659100288898,
|
|
"grad_norm": 0.34062337861455966,
|
|
"learning_rate": 7.177248462445141e-07,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06846728920936584,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3664.6,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 4.622781675608749,
|
|
"grad_norm": 1.0380648904298762,
|
|
"learning_rate": 6.949659161312872e-07,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16352170705795288,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3916.4,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 4.628972348328518,
|
|
"grad_norm": 0.772998670984014,
|
|
"learning_rate": 6.725672840602549e-07,
|
|
"loss": 0.4232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12537476420402527,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3287.2,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 4.635163021048287,
|
|
"grad_norm": 0.649754438545515,
|
|
"learning_rate": 6.505293680665836e-07,
|
|
"loss": 0.415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11781395971775055,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4194.9,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 4.641353693768056,
|
|
"grad_norm": 0.5100549842149741,
|
|
"learning_rate": 6.288525794532541e-07,
|
|
"loss": 0.4126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12699177861213684,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3712.2,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 4.647544366487825,
|
|
"grad_norm": 0.44826316082941803,
|
|
"learning_rate": 6.075373227833714e-07,
|
|
"loss": 0.4052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11495552211999893,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3538.5,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 4.653735039207594,
|
|
"grad_norm": 0.566979193531166,
|
|
"learning_rate": 5.865839958726116e-07,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16701677441596985,
|
|
"step": 3760,
|
|
"valid_targets_mean": 1189.2,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 4.659925711927363,
|
|
"grad_norm": 0.3128015711369774,
|
|
"learning_rate": 5.659929897818095e-07,
|
|
"loss": 0.3935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12888266146183014,
|
|
"step": 3765,
|
|
"valid_targets_mean": 4119.4,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 4.666116384647132,
|
|
"grad_norm": 0.25944646115927844,
|
|
"learning_rate": 5.457646888096446e-07,
|
|
"loss": 0.3678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1221608966588974,
|
|
"step": 3770,
|
|
"valid_targets_mean": 4418.8,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 4.6723070573669006,
|
|
"grad_norm": 0.2840848051945603,
|
|
"learning_rate": 5.258994704854825e-07,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10952674597501755,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3462.2,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 4.6784977300866695,
|
|
"grad_norm": 0.2720417180082686,
|
|
"learning_rate": 5.063977055623181e-07,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15169087052345276,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3550.4,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 4.684688402806438,
|
|
"grad_norm": 0.244202965457083,
|
|
"learning_rate": 4.872597580098726e-07,
|
|
"loss": 0.3776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12155601382255554,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4635.3,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 4.690879075526207,
|
|
"grad_norm": 0.25655736411005736,
|
|
"learning_rate": 4.6848598500777166e-07,
|
|
"loss": 0.3836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13189350068569183,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3635.9,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 4.697069748245976,
|
|
"grad_norm": 0.26779474625088545,
|
|
"learning_rate": 4.5007673693891583e-07,
|
|
"loss": 0.37,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09058993309736252,
|
|
"step": 3795,
|
|
"valid_targets_mean": 2792.4,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 4.703260420965745,
|
|
"grad_norm": 0.39055931281402584,
|
|
"learning_rate": 4.320323573829055e-07,
|
|
"loss": 0.3815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2163667380809784,
|
|
"step": 3800,
|
|
"valid_targets_mean": 4429.5,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 4.709451093685514,
|
|
"grad_norm": 0.2234994762212477,
|
|
"learning_rate": 4.143531831096548e-07,
|
|
"loss": 0.3909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12510719895362854,
|
|
"step": 3805,
|
|
"valid_targets_mean": 4782.1,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 4.715641766405283,
|
|
"grad_norm": 0.2308215036017246,
|
|
"learning_rate": 3.9703954407309677e-07,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12211146950721741,
|
|
"step": 3810,
|
|
"valid_targets_mean": 4143.1,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 4.721832439125052,
|
|
"grad_norm": 0.23596241705628537,
|
|
"learning_rate": 3.800917634050194e-07,
|
|
"loss": 0.3922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12609779834747314,
|
|
"step": 3815,
|
|
"valid_targets_mean": 4065.9,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 4.728023111844821,
|
|
"grad_norm": 0.2427739054506343,
|
|
"learning_rate": 3.6351015740904607e-07,
|
|
"loss": 0.3861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12428630888462067,
|
|
"step": 3820,
|
|
"valid_targets_mean": 4074.2,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 4.73421378456459,
|
|
"grad_norm": 0.21888991785787223,
|
|
"learning_rate": 3.472950355547267e-07,
|
|
"loss": 0.3732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11329171806573868,
|
|
"step": 3825,
|
|
"valid_targets_mean": 4832.0,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 4.740404457284358,
|
|
"grad_norm": 0.27643411966993786,
|
|
"learning_rate": 3.314467004717581e-07,
|
|
"loss": 0.3859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15358087420463562,
|
|
"step": 3830,
|
|
"valid_targets_mean": 3554.3,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 4.746595130004128,
|
|
"grad_norm": 0.249779073672379,
|
|
"learning_rate": 3.1596544794434194e-07,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1364826112985611,
|
|
"step": 3835,
|
|
"valid_targets_mean": 4176.4,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 4.752785802723896,
|
|
"grad_norm": 0.3894624626013973,
|
|
"learning_rate": 3.008515669056622e-07,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11509427428245544,
|
|
"step": 3840,
|
|
"valid_targets_mean": 5585.3,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 4.7589764754436645,
|
|
"grad_norm": 0.37466729027560286,
|
|
"learning_rate": 2.861053394324964e-07,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07529841363430023,
|
|
"step": 3845,
|
|
"valid_targets_mean": 6182.6,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 4.7651671481634335,
|
|
"grad_norm": 0.35583653574578333,
|
|
"learning_rate": 2.717270407399442e-07,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10174112021923065,
|
|
"step": 3850,
|
|
"valid_targets_mean": 7839.3,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 4.771357820883202,
|
|
"grad_norm": 0.3206630684316962,
|
|
"learning_rate": 2.5771693917629346e-07,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08910118043422699,
|
|
"step": 3855,
|
|
"valid_targets_mean": 6174.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 4.777548493602971,
|
|
"grad_norm": 0.2991076999061963,
|
|
"learning_rate": 2.440752962180137e-07,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09649590402841568,
|
|
"step": 3860,
|
|
"valid_targets_mean": 7711.5,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 4.78373916632274,
|
|
"grad_norm": 0.25418381663794515,
|
|
"learning_rate": 2.308023664648773e-07,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06561288237571716,
|
|
"step": 3865,
|
|
"valid_targets_mean": 5962.4,
|
|
"valid_targets_min": 158
|
|
},
|
|
{
|
|
"epoch": 4.789929839042509,
|
|
"grad_norm": 0.2596798389021729,
|
|
"learning_rate": 2.1789839763520337e-07,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06666063517332077,
|
|
"step": 3870,
|
|
"valid_targets_mean": 5733.2,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.796120511762278,
|
|
"grad_norm": 0.2651010831834104,
|
|
"learning_rate": 2.0536363056123497e-07,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04882320016622543,
|
|
"step": 3875,
|
|
"valid_targets_mean": 1183.0,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 4.802311184482047,
|
|
"grad_norm": 0.23120874553488835,
|
|
"learning_rate": 1.9319829918464927e-07,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05706536024808884,
|
|
"step": 3880,
|
|
"valid_targets_mean": 5966.8,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.808501857201816,
|
|
"grad_norm": 0.22992554929397055,
|
|
"learning_rate": 1.814026305521921e-07,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056153081357479095,
|
|
"step": 3885,
|
|
"valid_targets_mean": 4898.7,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 4.814692529921585,
|
|
"grad_norm": 0.21334916307232607,
|
|
"learning_rate": 1.6997684481142805e-07,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07072760164737701,
|
|
"step": 3890,
|
|
"valid_targets_mean": 6202.8,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 4.820883202641354,
|
|
"grad_norm": 0.20415002126202875,
|
|
"learning_rate": 1.5892115520664386e-07,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07864207029342651,
|
|
"step": 3895,
|
|
"valid_targets_mean": 7420.6,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 4.827073875361123,
|
|
"grad_norm": 0.20650339950988192,
|
|
"learning_rate": 1.4823576807486473e-07,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11657179892063141,
|
|
"step": 3900,
|
|
"valid_targets_mean": 7645.6,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 4.833264548080892,
|
|
"grad_norm": 0.20087388813644044,
|
|
"learning_rate": 1.3792088284200867e-07,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08677313476800919,
|
|
"step": 3905,
|
|
"valid_targets_mean": 6437.9,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 4.8394552208006605,
|
|
"grad_norm": 0.21729607481256077,
|
|
"learning_rate": 1.2797669201915163e-07,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07909562438726425,
|
|
"step": 3910,
|
|
"valid_targets_mean": 6452.5,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 4.845645893520429,
|
|
"grad_norm": 0.20302913670764228,
|
|
"learning_rate": 1.1840338119894824e-07,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07035000622272491,
|
|
"step": 3915,
|
|
"valid_targets_mean": 6317.1,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 4.851836566240198,
|
|
"grad_norm": 0.20249311618635615,
|
|
"learning_rate": 1.0920112905215885e-07,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07681111991405487,
|
|
"step": 3920,
|
|
"valid_targets_mean": 6657.9,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 4.858027238959967,
|
|
"grad_norm": 0.18359633347909893,
|
|
"learning_rate": 1.0037010732432351e-07,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06914827972650528,
|
|
"step": 3925,
|
|
"valid_targets_mean": 5992.4,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 4.864217911679736,
|
|
"grad_norm": 0.1827232897661306,
|
|
"learning_rate": 9.191048083254217e-08,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05853189155459404,
|
|
"step": 3930,
|
|
"valid_targets_mean": 6126.7,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 4.870408584399505,
|
|
"grad_norm": 0.26247459064707296,
|
|
"learning_rate": 8.3822407462415e-08,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04868985712528229,
|
|
"step": 3935,
|
|
"valid_targets_mean": 1509.7,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 4.876599257119274,
|
|
"grad_norm": 0.29359205593459364,
|
|
"learning_rate": 7.61060381650891e-08,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05938276648521423,
|
|
"step": 3940,
|
|
"valid_targets_mean": 5922.6,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 4.882789929839042,
|
|
"grad_norm": 0.2692733772344759,
|
|
"learning_rate": 6.876151695443867e-08,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052184488624334335,
|
|
"step": 3945,
|
|
"valid_targets_mean": 5310.9,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 4.888980602558812,
|
|
"grad_norm": 0.2627650334601582,
|
|
"learning_rate": 6.17889809043759e-08,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04820384830236435,
|
|
"step": 3950,
|
|
"valid_targets_mean": 4497.7,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 4.89517127527858,
|
|
"grad_norm": 0.24522166978663007,
|
|
"learning_rate": 5.5188560146304246e-08,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05095156654715538,
|
|
"step": 3955,
|
|
"valid_targets_mean": 5293.4,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 4.901361947998349,
|
|
"grad_norm": 0.23272212164588268,
|
|
"learning_rate": 4.8960377866673624e-08,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05176714062690735,
|
|
"step": 3960,
|
|
"valid_targets_mean": 5189.2,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 4.907552620718118,
|
|
"grad_norm": 0.24920700144725588,
|
|
"learning_rate": 4.3104550304693405e-08,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053080108016729355,
|
|
"step": 3965,
|
|
"valid_targets_mean": 5132.1,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 4.913743293437887,
|
|
"grad_norm": 0.24152580179745753,
|
|
"learning_rate": 3.762118675015858e-08,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043190836906433105,
|
|
"step": 3970,
|
|
"valid_targets_mean": 4858.0,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 4.9199339661576555,
|
|
"grad_norm": 0.23363527124397515,
|
|
"learning_rate": 3.251038954140917e-08,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050882648676633835,
|
|
"step": 3975,
|
|
"valid_targets_mean": 5312.7,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 4.9261246388774245,
|
|
"grad_norm": 0.22414714179397416,
|
|
"learning_rate": 2.7772254063420656e-08,
|
|
"loss": 0.1428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04192829877138138,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4486.5,
|
|
"valid_targets_min": 216
|
|
},
|
|
{
|
|
"epoch": 4.932315311597193,
|
|
"grad_norm": 0.21124014438798855,
|
|
"learning_rate": 2.340686874602982e-08,
|
|
"loss": 0.1427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04128830134868622,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4421.3,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 4.938505984316962,
|
|
"grad_norm": 0.2207213122028314,
|
|
"learning_rate": 1.9414315062269427e-08,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052753038704395294,
|
|
"step": 3990,
|
|
"valid_targets_mean": 5382.3,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 4.944696657036731,
|
|
"grad_norm": 0.2212290520334536,
|
|
"learning_rate": 1.5794667526867202e-08,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044371940195560455,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4716.2,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 4.9508873297565,
|
|
"grad_norm": 0.2093945686839604,
|
|
"learning_rate": 1.2547993694840276e-08,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05010461062192917,
|
|
"step": 4000,
|
|
"valid_targets_mean": 5423.3,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 4.957078002476269,
|
|
"grad_norm": 0.22712447832690252,
|
|
"learning_rate": 9.67435416023843e-09,
|
|
"loss": 0.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05417510122060776,
|
|
"step": 4005,
|
|
"valid_targets_mean": 5819.8,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 4.963268675196038,
|
|
"grad_norm": 0.2185120322719519,
|
|
"learning_rate": 7.173802555016096e-09,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05051335319876671,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4621.3,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 4.969459347915807,
|
|
"grad_norm": 0.21172026697441632,
|
|
"learning_rate": 5.046385548030941e-09,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05741196125745773,
|
|
"step": 4015,
|
|
"valid_targets_mean": 6490.0,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 4.975650020635576,
|
|
"grad_norm": 0.21650470236568767,
|
|
"learning_rate": 3.2921428441667904e-09,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04022546857595444,
|
|
"step": 4020,
|
|
"valid_targets_mean": 4670.9,
|
|
"valid_targets_min": 206
|
|
},
|
|
{
|
|
"epoch": 4.981840693355345,
|
|
"grad_norm": 0.2104125599264264,
|
|
"learning_rate": 1.9111071835986595e-09,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050080083310604095,
|
|
"step": 4025,
|
|
"valid_targets_mean": 5857.8,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 4.988031366075114,
|
|
"grad_norm": 0.25420987310396814,
|
|
"learning_rate": 9.033043411843523e-10,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04626571759581566,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4251.2,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 4.994222038794883,
|
|
"grad_norm": 0.28318873009106305,
|
|
"learning_rate": 2.687531259737419e-10,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03849755600094795,
|
|
"step": 4035,
|
|
"valid_targets_mean": 3827.2,
|
|
"valid_targets_min": 161
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.46704601482245883,
|
|
"learning_rate": 7.465380864601913e-12,
|
|
"loss": 0.1553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15064625442028046,
|
|
"step": 4040,
|
|
"valid_targets_mean": 4476.7,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15064625442028046,
|
|
"step": 4040,
|
|
"total_flos": 1.157917389896024e+19,
|
|
"train_loss": 0.1381187802789235,
|
|
"train_runtime": 40381.802,
|
|
"train_samples_per_second": 9.6,
|
|
"train_steps_per_second": 0.1,
|
|
"valid_targets_mean": 4476.7,
|
|
"valid_targets_min": 258
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4040,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.157917389896024e+19,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|