5130 lines
143 KiB
JSON
5130 lines
143 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 2310,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.015182186234817813,
|
|
"grad_norm": 9.920685676604402,
|
|
"learning_rate": 6.926406926406927e-07,
|
|
"loss": 0.4984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1637176275253296,
|
|
"step": 5,
|
|
"valid_targets_mean": 8828.2,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 0.030364372469635626,
|
|
"grad_norm": 8.583283954549747,
|
|
"learning_rate": 1.5584415584415584e-06,
|
|
"loss": 0.486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16003212332725525,
|
|
"step": 10,
|
|
"valid_targets_mean": 8116.5,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 0.04554655870445344,
|
|
"grad_norm": 5.834184081474569,
|
|
"learning_rate": 2.4242424242424244e-06,
|
|
"loss": 0.4667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15182234346866608,
|
|
"step": 15,
|
|
"valid_targets_mean": 8655.5,
|
|
"valid_targets_min": 2121
|
|
},
|
|
{
|
|
"epoch": 0.06072874493927125,
|
|
"grad_norm": 3.108293566001373,
|
|
"learning_rate": 3.2900432900432905e-06,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378338634967804,
|
|
"step": 20,
|
|
"valid_targets_mean": 8205.9,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 0.07591093117408906,
|
|
"grad_norm": 1.7248266696316037,
|
|
"learning_rate": 4.155844155844157e-06,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13982835412025452,
|
|
"step": 25,
|
|
"valid_targets_mean": 9102.8,
|
|
"valid_targets_min": 2247
|
|
},
|
|
{
|
|
"epoch": 0.09109311740890688,
|
|
"grad_norm": 1.3298044683252872,
|
|
"learning_rate": 5.021645021645022e-06,
|
|
"loss": 0.3838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12339320778846741,
|
|
"step": 30,
|
|
"valid_targets_mean": 8613.6,
|
|
"valid_targets_min": 2329
|
|
},
|
|
{
|
|
"epoch": 0.1062753036437247,
|
|
"grad_norm": 0.8016851887689874,
|
|
"learning_rate": 5.887445887445888e-06,
|
|
"loss": 0.3578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11066941171884537,
|
|
"step": 35,
|
|
"valid_targets_mean": 8026.8,
|
|
"valid_targets_min": 3644
|
|
},
|
|
{
|
|
"epoch": 0.1214574898785425,
|
|
"grad_norm": 0.5842806573149811,
|
|
"learning_rate": 6.753246753246754e-06,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10672516375780106,
|
|
"step": 40,
|
|
"valid_targets_mean": 8478.8,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 0.13663967611336034,
|
|
"grad_norm": 0.4855203996040171,
|
|
"learning_rate": 7.61904761904762e-06,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11012927442789078,
|
|
"step": 45,
|
|
"valid_targets_mean": 8863.4,
|
|
"valid_targets_min": 3657
|
|
},
|
|
{
|
|
"epoch": 0.15182186234817813,
|
|
"grad_norm": 0.4177503326138919,
|
|
"learning_rate": 8.484848484848486e-06,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09129321575164795,
|
|
"step": 50,
|
|
"valid_targets_mean": 8296.4,
|
|
"valid_targets_min": 3093
|
|
},
|
|
{
|
|
"epoch": 0.16700404858299595,
|
|
"grad_norm": 0.33041887771839845,
|
|
"learning_rate": 9.350649350649352e-06,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0893252044916153,
|
|
"step": 55,
|
|
"valid_targets_mean": 9049.6,
|
|
"valid_targets_min": 2847
|
|
},
|
|
{
|
|
"epoch": 0.18218623481781376,
|
|
"grad_norm": 0.3038750384332255,
|
|
"learning_rate": 1.0216450216450218e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08403687924146652,
|
|
"step": 60,
|
|
"valid_targets_mean": 7953.8,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 0.19736842105263158,
|
|
"grad_norm": 0.2474198191273425,
|
|
"learning_rate": 1.1082251082251083e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07787541300058365,
|
|
"step": 65,
|
|
"valid_targets_mean": 8447.6,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 0.2125506072874494,
|
|
"grad_norm": 0.2308885792853868,
|
|
"learning_rate": 1.1948051948051949e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07520733773708344,
|
|
"step": 70,
|
|
"valid_targets_mean": 7695.7,
|
|
"valid_targets_min": 2372
|
|
},
|
|
{
|
|
"epoch": 0.22773279352226722,
|
|
"grad_norm": 0.1950044857367663,
|
|
"learning_rate": 1.2813852813852813e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0756479948759079,
|
|
"step": 75,
|
|
"valid_targets_mean": 8764.5,
|
|
"valid_targets_min": 3045
|
|
},
|
|
{
|
|
"epoch": 0.242914979757085,
|
|
"grad_norm": 0.19058830703102458,
|
|
"learning_rate": 1.3679653679653682e-05,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07331258058547974,
|
|
"step": 80,
|
|
"valid_targets_mean": 8550.6,
|
|
"valid_targets_min": 3183
|
|
},
|
|
{
|
|
"epoch": 0.25809716599190285,
|
|
"grad_norm": 0.17036253644146848,
|
|
"learning_rate": 1.4545454545454546e-05,
|
|
"loss": 0.2172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06963507831096649,
|
|
"step": 85,
|
|
"valid_targets_mean": 9359.7,
|
|
"valid_targets_min": 2372
|
|
},
|
|
{
|
|
"epoch": 0.2732793522267207,
|
|
"grad_norm": 0.16885887988347678,
|
|
"learning_rate": 1.5411255411255414e-05,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07278674840927124,
|
|
"step": 90,
|
|
"valid_targets_mean": 9334.6,
|
|
"valid_targets_min": 4322
|
|
},
|
|
{
|
|
"epoch": 0.28846153846153844,
|
|
"grad_norm": 0.18962012206116807,
|
|
"learning_rate": 1.6277056277056278e-05,
|
|
"loss": 0.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06861764192581177,
|
|
"step": 95,
|
|
"valid_targets_mean": 8474.4,
|
|
"valid_targets_min": 2161
|
|
},
|
|
{
|
|
"epoch": 0.30364372469635625,
|
|
"grad_norm": 0.1703305790920396,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07181292772293091,
|
|
"step": 100,
|
|
"valid_targets_mean": 8987.0,
|
|
"valid_targets_min": 3077
|
|
},
|
|
{
|
|
"epoch": 0.3188259109311741,
|
|
"grad_norm": 0.17353967429286005,
|
|
"learning_rate": 1.800865800865801e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059678640216588974,
|
|
"step": 105,
|
|
"valid_targets_mean": 8825.8,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 0.3340080971659919,
|
|
"grad_norm": 0.17454211701359135,
|
|
"learning_rate": 1.8874458874458877e-05,
|
|
"loss": 0.1969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06872868537902832,
|
|
"step": 110,
|
|
"valid_targets_mean": 9457.8,
|
|
"valid_targets_min": 2369
|
|
},
|
|
{
|
|
"epoch": 0.3491902834008097,
|
|
"grad_norm": 0.1658884238108275,
|
|
"learning_rate": 1.974025974025974e-05,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06428892910480499,
|
|
"step": 115,
|
|
"valid_targets_mean": 8780.1,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 0.3643724696356275,
|
|
"grad_norm": 0.16632968608098295,
|
|
"learning_rate": 2.0606060606060608e-05,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06445731222629547,
|
|
"step": 120,
|
|
"valid_targets_mean": 8754.1,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 0.37955465587044535,
|
|
"grad_norm": 0.192205741450083,
|
|
"learning_rate": 2.1471861471861476e-05,
|
|
"loss": 0.1884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07370175421237946,
|
|
"step": 125,
|
|
"valid_targets_mean": 9624.0,
|
|
"valid_targets_min": 5281
|
|
},
|
|
{
|
|
"epoch": 0.39473684210526316,
|
|
"grad_norm": 0.18070406906511108,
|
|
"learning_rate": 2.233766233766234e-05,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05860336124897003,
|
|
"step": 130,
|
|
"valid_targets_mean": 8650.3,
|
|
"valid_targets_min": 2707
|
|
},
|
|
{
|
|
"epoch": 0.409919028340081,
|
|
"grad_norm": 0.1575667031827334,
|
|
"learning_rate": 2.3203463203463207e-05,
|
|
"loss": 0.1828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058790430426597595,
|
|
"step": 135,
|
|
"valid_targets_mean": 8942.3,
|
|
"valid_targets_min": 4505
|
|
},
|
|
{
|
|
"epoch": 0.4251012145748988,
|
|
"grad_norm": 0.1608844261850157,
|
|
"learning_rate": 2.4069264069264074e-05,
|
|
"loss": 0.1793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053758904337882996,
|
|
"step": 140,
|
|
"valid_targets_mean": 8703.2,
|
|
"valid_targets_min": 2783
|
|
},
|
|
{
|
|
"epoch": 0.4402834008097166,
|
|
"grad_norm": 0.17737092097319154,
|
|
"learning_rate": 2.4935064935064935e-05,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06288285553455353,
|
|
"step": 145,
|
|
"valid_targets_mean": 8819.5,
|
|
"valid_targets_min": 3279
|
|
},
|
|
{
|
|
"epoch": 0.45546558704453444,
|
|
"grad_norm": 0.1797532765316918,
|
|
"learning_rate": 2.5800865800865803e-05,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059265997260808945,
|
|
"step": 150,
|
|
"valid_targets_mean": 8359.1,
|
|
"valid_targets_min": 2624
|
|
},
|
|
{
|
|
"epoch": 0.4706477732793522,
|
|
"grad_norm": 0.1761190339904338,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058463435620069504,
|
|
"step": 155,
|
|
"valid_targets_mean": 8089.2,
|
|
"valid_targets_min": 2462
|
|
},
|
|
{
|
|
"epoch": 0.48582995951417,
|
|
"grad_norm": 0.1657972415480439,
|
|
"learning_rate": 2.7532467532467534e-05,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056585799902677536,
|
|
"step": 160,
|
|
"valid_targets_mean": 8475.0,
|
|
"valid_targets_min": 2841
|
|
},
|
|
{
|
|
"epoch": 0.5010121457489879,
|
|
"grad_norm": 0.16776154339515179,
|
|
"learning_rate": 2.83982683982684e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054723113775253296,
|
|
"step": 165,
|
|
"valid_targets_mean": 8380.9,
|
|
"valid_targets_min": 3546
|
|
},
|
|
{
|
|
"epoch": 0.5161943319838057,
|
|
"grad_norm": 0.1889666246453731,
|
|
"learning_rate": 2.9264069264069265e-05,
|
|
"loss": 0.171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052413374185562134,
|
|
"step": 170,
|
|
"valid_targets_mean": 8671.6,
|
|
"valid_targets_min": 2770
|
|
},
|
|
{
|
|
"epoch": 0.5313765182186235,
|
|
"grad_norm": 0.1902254701019613,
|
|
"learning_rate": 3.0129870129870133e-05,
|
|
"loss": 0.1676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05615796521306038,
|
|
"step": 175,
|
|
"valid_targets_mean": 8028.8,
|
|
"valid_targets_min": 3903
|
|
},
|
|
{
|
|
"epoch": 0.5465587044534413,
|
|
"grad_norm": 0.19777906577799276,
|
|
"learning_rate": 3.0995670995671e-05,
|
|
"loss": 0.1711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06585124135017395,
|
|
"step": 180,
|
|
"valid_targets_mean": 8567.8,
|
|
"valid_targets_min": 3896
|
|
},
|
|
{
|
|
"epoch": 0.5617408906882592,
|
|
"grad_norm": 0.18310430587977772,
|
|
"learning_rate": 3.1861471861471864e-05,
|
|
"loss": 0.1685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05651712790131569,
|
|
"step": 185,
|
|
"valid_targets_mean": 8706.2,
|
|
"valid_targets_min": 4483
|
|
},
|
|
{
|
|
"epoch": 0.5769230769230769,
|
|
"grad_norm": 0.17213950731686126,
|
|
"learning_rate": 3.272727272727273e-05,
|
|
"loss": 0.1691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05835697054862976,
|
|
"step": 190,
|
|
"valid_targets_mean": 9589.9,
|
|
"valid_targets_min": 3039
|
|
},
|
|
{
|
|
"epoch": 0.5921052631578947,
|
|
"grad_norm": 0.1777306192210503,
|
|
"learning_rate": 3.359307359307359e-05,
|
|
"loss": 0.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052583418786525726,
|
|
"step": 195,
|
|
"valid_targets_mean": 8595.4,
|
|
"valid_targets_min": 3420
|
|
},
|
|
{
|
|
"epoch": 0.6072874493927125,
|
|
"grad_norm": 0.19445858091664137,
|
|
"learning_rate": 3.445887445887446e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05798531323671341,
|
|
"step": 200,
|
|
"valid_targets_mean": 8717.7,
|
|
"valid_targets_min": 2668
|
|
},
|
|
{
|
|
"epoch": 0.6224696356275303,
|
|
"grad_norm": 0.20097557933773977,
|
|
"learning_rate": 3.532467532467533e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05606549233198166,
|
|
"step": 205,
|
|
"valid_targets_mean": 8518.2,
|
|
"valid_targets_min": 2136
|
|
},
|
|
{
|
|
"epoch": 0.6376518218623481,
|
|
"grad_norm": 0.20359547052580665,
|
|
"learning_rate": 3.6190476190476195e-05,
|
|
"loss": 0.162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05161520093679428,
|
|
"step": 210,
|
|
"valid_targets_mean": 8564.2,
|
|
"valid_targets_min": 4150
|
|
},
|
|
{
|
|
"epoch": 0.652834008097166,
|
|
"grad_norm": 0.18896725147903395,
|
|
"learning_rate": 3.705627705627706e-05,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05968295782804489,
|
|
"step": 215,
|
|
"valid_targets_mean": 9460.3,
|
|
"valid_targets_min": 3977
|
|
},
|
|
{
|
|
"epoch": 0.6680161943319838,
|
|
"grad_norm": 0.18726944140576257,
|
|
"learning_rate": 3.792207792207792e-05,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05979807674884796,
|
|
"step": 220,
|
|
"valid_targets_mean": 8020.6,
|
|
"valid_targets_min": 1960
|
|
},
|
|
{
|
|
"epoch": 0.6831983805668016,
|
|
"grad_norm": 0.17017695415367765,
|
|
"learning_rate": 3.878787878787879e-05,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05501836538314819,
|
|
"step": 225,
|
|
"valid_targets_mean": 9014.6,
|
|
"valid_targets_min": 2130
|
|
},
|
|
{
|
|
"epoch": 0.6983805668016194,
|
|
"grad_norm": 0.17274503224329754,
|
|
"learning_rate": 3.965367965367966e-05,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046624116599559784,
|
|
"step": 230,
|
|
"valid_targets_mean": 7745.8,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 0.7135627530364372,
|
|
"grad_norm": 0.17393375363238767,
|
|
"learning_rate": 3.999979449020199e-05,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0551910363137722,
|
|
"step": 235,
|
|
"valid_targets_mean": 8852.4,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 0.728744939271255,
|
|
"grad_norm": 0.17703744480422867,
|
|
"learning_rate": 3.99985386122866e-05,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061388783156871796,
|
|
"step": 240,
|
|
"valid_targets_mean": 9674.4,
|
|
"valid_targets_min": 2998
|
|
},
|
|
{
|
|
"epoch": 0.7439271255060729,
|
|
"grad_norm": 0.1755490065256388,
|
|
"learning_rate": 3.999614110017182e-05,
|
|
"loss": 0.159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05222570151090622,
|
|
"step": 245,
|
|
"valid_targets_mean": 8254.4,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 0.7591093117408907,
|
|
"grad_norm": 0.17854075713526074,
|
|
"learning_rate": 3.999260209072175e-05,
|
|
"loss": 0.1591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05122017860412598,
|
|
"step": 250,
|
|
"valid_targets_mean": 8235.9,
|
|
"valid_targets_min": 2508
|
|
},
|
|
{
|
|
"epoch": 0.7742914979757085,
|
|
"grad_norm": 0.17482517778760448,
|
|
"learning_rate": 3.998792178596384e-05,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05237601324915886,
|
|
"step": 255,
|
|
"valid_targets_mean": 8814.3,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 0.7894736842105263,
|
|
"grad_norm": 0.16915432528477337,
|
|
"learning_rate": 3.998210045307744e-05,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05112871527671814,
|
|
"step": 260,
|
|
"valid_targets_mean": 8595.0,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 0.8046558704453441,
|
|
"grad_norm": 0.1766712774202426,
|
|
"learning_rate": 3.997513842437845e-05,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057970188558101654,
|
|
"step": 265,
|
|
"valid_targets_mean": 9996.3,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 0.819838056680162,
|
|
"grad_norm": 0.18835227228990123,
|
|
"learning_rate": 3.996703609730042e-05,
|
|
"loss": 0.162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06339921057224274,
|
|
"step": 270,
|
|
"valid_targets_mean": 8771.0,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 0.8350202429149798,
|
|
"grad_norm": 0.1748806770778304,
|
|
"learning_rate": 3.995779393437183e-05,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05962568521499634,
|
|
"step": 275,
|
|
"valid_targets_mean": 9703.9,
|
|
"valid_targets_min": 5437
|
|
},
|
|
{
|
|
"epoch": 0.8502024291497976,
|
|
"grad_norm": 0.172131978256736,
|
|
"learning_rate": 3.99474124631897e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05455712974071503,
|
|
"step": 280,
|
|
"valid_targets_mean": 8803.2,
|
|
"valid_targets_min": 3976
|
|
},
|
|
{
|
|
"epoch": 0.8653846153846154,
|
|
"grad_norm": 0.17571512376247586,
|
|
"learning_rate": 3.993589227638944e-05,
|
|
"loss": 0.1571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04495442286133766,
|
|
"step": 285,
|
|
"valid_targets_mean": 7695.8,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 0.8805668016194332,
|
|
"grad_norm": 0.20401510750824134,
|
|
"learning_rate": 3.992323403161109e-05,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05426368862390518,
|
|
"step": 290,
|
|
"valid_targets_mean": 8560.0,
|
|
"valid_targets_min": 3349
|
|
},
|
|
{
|
|
"epoch": 0.895748987854251,
|
|
"grad_norm": 0.17603980186833093,
|
|
"learning_rate": 3.9909438451461695e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05377856642007828,
|
|
"step": 295,
|
|
"valid_targets_mean": 8933.8,
|
|
"valid_targets_min": 4202
|
|
},
|
|
{
|
|
"epoch": 0.9109311740890689,
|
|
"grad_norm": 0.1610920602617969,
|
|
"learning_rate": 3.989450632347411e-05,
|
|
"loss": 0.1553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05308445543050766,
|
|
"step": 300,
|
|
"valid_targets_mean": 9392.7,
|
|
"valid_targets_min": 4608
|
|
},
|
|
{
|
|
"epoch": 0.9261133603238867,
|
|
"grad_norm": 0.16162690284191394,
|
|
"learning_rate": 3.987843850006201e-05,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05333444103598595,
|
|
"step": 305,
|
|
"valid_targets_mean": 9501.2,
|
|
"valid_targets_min": 3889
|
|
},
|
|
{
|
|
"epoch": 0.9412955465587044,
|
|
"grad_norm": 0.16540047817471357,
|
|
"learning_rate": 3.9861235898471265e-05,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05259306728839874,
|
|
"step": 310,
|
|
"valid_targets_mean": 8548.6,
|
|
"valid_targets_min": 2683
|
|
},
|
|
{
|
|
"epoch": 0.9564777327935222,
|
|
"grad_norm": 0.17622661530132494,
|
|
"learning_rate": 3.984289950072754e-05,
|
|
"loss": 0.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05074026808142662,
|
|
"step": 315,
|
|
"valid_targets_mean": 8206.3,
|
|
"valid_targets_min": 3121
|
|
},
|
|
{
|
|
"epoch": 0.97165991902834,
|
|
"grad_norm": 0.15494480016013407,
|
|
"learning_rate": 3.982343035358026e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057811424136161804,
|
|
"step": 320,
|
|
"valid_targets_mean": 9368.8,
|
|
"valid_targets_min": 2512
|
|
},
|
|
{
|
|
"epoch": 0.9868421052631579,
|
|
"grad_norm": 0.17282421814742482,
|
|
"learning_rate": 3.980282956844284e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05163079500198364,
|
|
"step": 325,
|
|
"valid_targets_mean": 8683.1,
|
|
"valid_targets_min": 2766
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.26067604829558066,
|
|
"learning_rate": 3.9781098321329266e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1509656012058258,
|
|
"step": 330,
|
|
"valid_targets_mean": 8881.3,
|
|
"valid_targets_min": 4218
|
|
},
|
|
{
|
|
"epoch": 1.0151821862348178,
|
|
"grad_norm": 0.16457525883664004,
|
|
"learning_rate": 3.975823785278691e-05,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051272958517074585,
|
|
"step": 335,
|
|
"valid_targets_mean": 8870.9,
|
|
"valid_targets_min": 3617
|
|
},
|
|
{
|
|
"epoch": 1.0303643724696356,
|
|
"grad_norm": 0.176806463005662,
|
|
"learning_rate": 3.973424946782578e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048344798386096954,
|
|
"step": 340,
|
|
"valid_targets_mean": 8422.7,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 1.0455465587044535,
|
|
"grad_norm": 0.1892490713029691,
|
|
"learning_rate": 3.970913453584397e-05,
|
|
"loss": 0.1481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04805346950888634,
|
|
"step": 345,
|
|
"valid_targets_mean": 9017.8,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 1.0607287449392713,
|
|
"grad_norm": 0.18829313617941154,
|
|
"learning_rate": 3.9682894490549485e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04878433048725128,
|
|
"step": 350,
|
|
"valid_targets_mean": 8512.6,
|
|
"valid_targets_min": 2878
|
|
},
|
|
{
|
|
"epoch": 1.075910931174089,
|
|
"grad_norm": 0.16995872600965442,
|
|
"learning_rate": 3.965553082987846e-05,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048037558794021606,
|
|
"step": 355,
|
|
"valid_targets_mean": 8480.0,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 1.091093117408907,
|
|
"grad_norm": 0.15922633166556377,
|
|
"learning_rate": 3.9627045115909556e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04166801646351814,
|
|
"step": 360,
|
|
"valid_targets_mean": 7983.2,
|
|
"valid_targets_min": 2485
|
|
},
|
|
{
|
|
"epoch": 1.1062753036437247,
|
|
"grad_norm": 0.16629569645149142,
|
|
"learning_rate": 3.959743897477486e-05,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04984872788190842,
|
|
"step": 365,
|
|
"valid_targets_mean": 8933.7,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 1.1214574898785425,
|
|
"grad_norm": 0.1733164363419671,
|
|
"learning_rate": 3.9566714096567035e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050832800567150116,
|
|
"step": 370,
|
|
"valid_targets_mean": 9297.2,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 1.1366396761133604,
|
|
"grad_norm": 0.18052967645256035,
|
|
"learning_rate": 3.953487223524283e-05,
|
|
"loss": 0.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05133286118507385,
|
|
"step": 375,
|
|
"valid_targets_mean": 9175.1,
|
|
"valid_targets_min": 2563
|
|
},
|
|
{
|
|
"epoch": 1.1518218623481782,
|
|
"grad_norm": 0.18278921031603537,
|
|
"learning_rate": 3.950191520852294e-05,
|
|
"loss": 0.1496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05259602516889572,
|
|
"step": 380,
|
|
"valid_targets_mean": 9650.2,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 1.167004048582996,
|
|
"grad_norm": 0.16683773652096157,
|
|
"learning_rate": 3.94678448977883e-05,
|
|
"loss": 0.1491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04788624867796898,
|
|
"step": 385,
|
|
"valid_targets_mean": 8738.1,
|
|
"valid_targets_min": 4652
|
|
},
|
|
{
|
|
"epoch": 1.1821862348178138,
|
|
"grad_norm": 0.17453057017357662,
|
|
"learning_rate": 3.9432663247972614e-05,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054769936949014664,
|
|
"step": 390,
|
|
"valid_targets_mean": 8981.8,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 1.1973684210526316,
|
|
"grad_norm": 0.16378111115456734,
|
|
"learning_rate": 3.9396372267451356e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0511467382311821,
|
|
"step": 395,
|
|
"valid_targets_mean": 8501.0,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 1.2125506072874495,
|
|
"grad_norm": 0.2298428914758325,
|
|
"learning_rate": 3.935897402792713e-05,
|
|
"loss": 0.1464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05145154148340225,
|
|
"step": 400,
|
|
"valid_targets_mean": 8704.5,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 1.2277327935222673,
|
|
"grad_norm": 0.1678460651708729,
|
|
"learning_rate": 3.93204706643114e-05,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04993787035346031,
|
|
"step": 405,
|
|
"valid_targets_mean": 8705.5,
|
|
"valid_targets_min": 3010
|
|
},
|
|
{
|
|
"epoch": 1.242914979757085,
|
|
"grad_norm": 0.16958422967620565,
|
|
"learning_rate": 3.92808643746026e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04393620043992996,
|
|
"step": 410,
|
|
"valid_targets_mean": 7899.5,
|
|
"valid_targets_min": 2349
|
|
},
|
|
{
|
|
"epoch": 1.258097165991903,
|
|
"grad_norm": 0.165821077982339,
|
|
"learning_rate": 3.924015741976069e-05,
|
|
"loss": 0.1467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048875562846660614,
|
|
"step": 415,
|
|
"valid_targets_mean": 8937.2,
|
|
"valid_targets_min": 2877
|
|
},
|
|
{
|
|
"epoch": 1.2732793522267207,
|
|
"grad_norm": 0.15481467006434907,
|
|
"learning_rate": 3.919835212357805e-05,
|
|
"loss": 0.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05062958225607872,
|
|
"step": 420,
|
|
"valid_targets_mean": 9205.8,
|
|
"valid_targets_min": 4899
|
|
},
|
|
{
|
|
"epoch": 1.2884615384615383,
|
|
"grad_norm": 0.18144911688030052,
|
|
"learning_rate": 3.915545087254685e-05,
|
|
"loss": 0.1481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04798769950866699,
|
|
"step": 425,
|
|
"valid_targets_mean": 8680.7,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 1.3036437246963564,
|
|
"grad_norm": 0.17734087112473015,
|
|
"learning_rate": 3.911145611572282e-05,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047824546694755554,
|
|
"step": 430,
|
|
"valid_targets_mean": 8542.9,
|
|
"valid_targets_min": 3624
|
|
},
|
|
{
|
|
"epoch": 1.318825910931174,
|
|
"grad_norm": 0.1585178683752177,
|
|
"learning_rate": 3.906637036458541e-05,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04692993313074112,
|
|
"step": 435,
|
|
"valid_targets_mean": 9228.2,
|
|
"valid_targets_min": 4201
|
|
},
|
|
{
|
|
"epoch": 1.334008097165992,
|
|
"grad_norm": 0.15954946535191572,
|
|
"learning_rate": 3.902019619289446e-05,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04486957937479019,
|
|
"step": 440,
|
|
"valid_targets_mean": 8962.3,
|
|
"valid_targets_min": 2116
|
|
},
|
|
{
|
|
"epoch": 1.3491902834008096,
|
|
"grad_norm": 0.15908230205294083,
|
|
"learning_rate": 3.897293623654325e-05,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04471299424767494,
|
|
"step": 445,
|
|
"valid_targets_mean": 8539.6,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 1.3643724696356276,
|
|
"grad_norm": 0.15793195397380033,
|
|
"learning_rate": 3.892459319340802e-05,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04737589880824089,
|
|
"step": 450,
|
|
"valid_targets_mean": 8882.3,
|
|
"valid_targets_min": 5064
|
|
},
|
|
{
|
|
"epoch": 1.3795546558704452,
|
|
"grad_norm": 0.15956622848015198,
|
|
"learning_rate": 3.8875169823194e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04860437661409378,
|
|
"step": 455,
|
|
"valid_targets_mean": 9428.7,
|
|
"valid_targets_min": 5854
|
|
},
|
|
{
|
|
"epoch": 1.3947368421052633,
|
|
"grad_norm": 0.15573836285488737,
|
|
"learning_rate": 3.882466894727784e-05,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05208943411707878,
|
|
"step": 460,
|
|
"valid_targets_mean": 8659.9,
|
|
"valid_targets_min": 3170
|
|
},
|
|
{
|
|
"epoch": 1.4099190283400809,
|
|
"grad_norm": 0.15306590186384678,
|
|
"learning_rate": 3.8773093448546525e-05,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04370715096592903,
|
|
"step": 465,
|
|
"valid_targets_mean": 9370.1,
|
|
"valid_targets_min": 3050
|
|
},
|
|
{
|
|
"epoch": 1.425101214574899,
|
|
"grad_norm": 0.14768316620577182,
|
|
"learning_rate": 3.872044627123285e-05,
|
|
"loss": 0.1405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04435171186923981,
|
|
"step": 470,
|
|
"valid_targets_mean": 8776.6,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 1.4402834008097165,
|
|
"grad_norm": 0.1477310502704514,
|
|
"learning_rate": 3.8666730420747336e-05,
|
|
"loss": 0.1427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04756402224302292,
|
|
"step": 475,
|
|
"valid_targets_mean": 9003.5,
|
|
"valid_targets_min": 2337
|
|
},
|
|
{
|
|
"epoch": 1.4554655870445345,
|
|
"grad_norm": 0.16265226879400024,
|
|
"learning_rate": 3.861194896350664e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047036025673151016,
|
|
"step": 480,
|
|
"valid_targets_mean": 8930.4,
|
|
"valid_targets_min": 2960
|
|
},
|
|
{
|
|
"epoch": 1.4706477732793521,
|
|
"grad_norm": 0.1626146982915966,
|
|
"learning_rate": 3.855610502675851e-05,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047673143446445465,
|
|
"step": 485,
|
|
"valid_targets_mean": 8979.6,
|
|
"valid_targets_min": 3143
|
|
},
|
|
{
|
|
"epoch": 1.48582995951417,
|
|
"grad_norm": 0.17063474447899107,
|
|
"learning_rate": 3.8499201798403303e-05,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05014685541391373,
|
|
"step": 490,
|
|
"valid_targets_mean": 9393.0,
|
|
"valid_targets_min": 4461
|
|
},
|
|
{
|
|
"epoch": 1.5010121457489878,
|
|
"grad_norm": 0.16364909202102335,
|
|
"learning_rate": 3.8441242526811934e-05,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05002780631184578,
|
|
"step": 495,
|
|
"valid_targets_mean": 8865.5,
|
|
"valid_targets_min": 4513
|
|
},
|
|
{
|
|
"epoch": 1.5161943319838058,
|
|
"grad_norm": 0.15549959150616477,
|
|
"learning_rate": 3.8382230520640506e-05,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046682409942150116,
|
|
"step": 500,
|
|
"valid_targets_mean": 8912.3,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 1.5313765182186234,
|
|
"grad_norm": 0.15952209274632462,
|
|
"learning_rate": 3.832216914864139e-05,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05121661350131035,
|
|
"step": 505,
|
|
"valid_targets_mean": 9450.8,
|
|
"valid_targets_min": 5214
|
|
},
|
|
{
|
|
"epoch": 1.5465587044534415,
|
|
"grad_norm": 0.17884802766526564,
|
|
"learning_rate": 3.826106183947095e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04542143642902374,
|
|
"step": 510,
|
|
"valid_targets_mean": 8891.8,
|
|
"valid_targets_min": 2906
|
|
},
|
|
{
|
|
"epoch": 1.561740890688259,
|
|
"grad_norm": 0.16623924885791555,
|
|
"learning_rate": 3.8198912081493735e-05,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04548874497413635,
|
|
"step": 515,
|
|
"valid_targets_mean": 8452.1,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 1.5769230769230769,
|
|
"grad_norm": 0.15889217659374227,
|
|
"learning_rate": 3.813572342258349e-05,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045159127563238144,
|
|
"step": 520,
|
|
"valid_targets_mean": 8567.8,
|
|
"valid_targets_min": 3361
|
|
},
|
|
{
|
|
"epoch": 1.5921052631578947,
|
|
"grad_norm": 0.15033968156911973,
|
|
"learning_rate": 3.807149946992047e-05,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04633079469203949,
|
|
"step": 525,
|
|
"valid_targets_mean": 8937.7,
|
|
"valid_targets_min": 2136
|
|
},
|
|
{
|
|
"epoch": 1.6072874493927125,
|
|
"grad_norm": 0.1747330273132041,
|
|
"learning_rate": 3.800624388978561e-05,
|
|
"loss": 0.1435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05133660137653351,
|
|
"step": 530,
|
|
"valid_targets_mean": 9161.1,
|
|
"valid_targets_min": 3081
|
|
},
|
|
{
|
|
"epoch": 1.6224696356275303,
|
|
"grad_norm": 0.1650274855667034,
|
|
"learning_rate": 3.793996040735119e-05,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05362251028418541,
|
|
"step": 535,
|
|
"valid_targets_mean": 9512.2,
|
|
"valid_targets_min": 4618
|
|
},
|
|
{
|
|
"epoch": 1.6376518218623481,
|
|
"grad_norm": 0.1653175025766043,
|
|
"learning_rate": 3.787265280646825e-05,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04587847739458084,
|
|
"step": 540,
|
|
"valid_targets_mean": 9317.7,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 1.652834008097166,
|
|
"grad_norm": 0.15427307033023013,
|
|
"learning_rate": 3.7804324929450465e-05,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04268182814121246,
|
|
"step": 545,
|
|
"valid_targets_mean": 8158.3,
|
|
"valid_targets_min": 5338
|
|
},
|
|
{
|
|
"epoch": 1.6680161943319838,
|
|
"grad_norm": 0.15705494327581446,
|
|
"learning_rate": 3.7734980676854916e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04580182209610939,
|
|
"step": 550,
|
|
"valid_targets_mean": 8900.9,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 1.6831983805668016,
|
|
"grad_norm": 0.16209637909911334,
|
|
"learning_rate": 3.7664624007259375e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048418380320072174,
|
|
"step": 555,
|
|
"valid_targets_mean": 8476.7,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 1.6983805668016194,
|
|
"grad_norm": 0.15308690346469137,
|
|
"learning_rate": 3.759325893703631e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04391501098871231,
|
|
"step": 560,
|
|
"valid_targets_mean": 9046.7,
|
|
"valid_targets_min": 3708
|
|
},
|
|
{
|
|
"epoch": 1.7135627530364372,
|
|
"grad_norm": 0.1460247077316771,
|
|
"learning_rate": 3.752088954012366e-05,
|
|
"loss": 0.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04991579428315163,
|
|
"step": 565,
|
|
"valid_targets_mean": 8931.7,
|
|
"valid_targets_min": 2676
|
|
},
|
|
{
|
|
"epoch": 1.728744939271255,
|
|
"grad_norm": 0.16491501841380135,
|
|
"learning_rate": 3.74475199477922e-05,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05305258929729462,
|
|
"step": 570,
|
|
"valid_targets_mean": 9082.5,
|
|
"valid_targets_min": 3889
|
|
},
|
|
{
|
|
"epoch": 1.7439271255060729,
|
|
"grad_norm": 0.15671106559921882,
|
|
"learning_rate": 3.7373154348409775e-05,
|
|
"loss": 0.1418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04614695906639099,
|
|
"step": 575,
|
|
"valid_targets_mean": 8367.6,
|
|
"valid_targets_min": 4370
|
|
},
|
|
{
|
|
"epoch": 1.7591093117408907,
|
|
"grad_norm": 0.16137733366704612,
|
|
"learning_rate": 3.729779698720215e-05,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04977944493293762,
|
|
"step": 580,
|
|
"valid_targets_mean": 8765.1,
|
|
"valid_targets_min": 3202
|
|
},
|
|
{
|
|
"epoch": 1.7742914979757085,
|
|
"grad_norm": 0.1804575669775133,
|
|
"learning_rate": 3.7221452166010704e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043894704431295395,
|
|
"step": 585,
|
|
"valid_targets_mean": 8582.4,
|
|
"valid_targets_min": 3706
|
|
},
|
|
{
|
|
"epoch": 1.7894736842105263,
|
|
"grad_norm": 0.16049456861921607,
|
|
"learning_rate": 3.7144124243046815e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0525498241186142,
|
|
"step": 590,
|
|
"valid_targets_mean": 8860.8,
|
|
"valid_targets_min": 3828
|
|
},
|
|
{
|
|
"epoch": 1.8046558704453441,
|
|
"grad_norm": 0.16468743418518106,
|
|
"learning_rate": 3.7065817632643115e-05,
|
|
"loss": 0.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044600050896406174,
|
|
"step": 595,
|
|
"valid_targets_mean": 8531.2,
|
|
"valid_targets_min": 3881
|
|
},
|
|
{
|
|
"epoch": 1.819838056680162,
|
|
"grad_norm": 0.16644621279969488,
|
|
"learning_rate": 3.6986536805001466e-05,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04936189204454422,
|
|
"step": 600,
|
|
"valid_targets_mean": 8512.3,
|
|
"valid_targets_min": 2760
|
|
},
|
|
{
|
|
"epoch": 1.8350202429149798,
|
|
"grad_norm": 0.15965285813326344,
|
|
"learning_rate": 3.690628628593777e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048563506454229355,
|
|
"step": 605,
|
|
"valid_targets_mean": 8799.1,
|
|
"valid_targets_min": 3515
|
|
},
|
|
{
|
|
"epoch": 1.8502024291497976,
|
|
"grad_norm": 0.14861834303664817,
|
|
"learning_rate": 3.6825070656623626e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04463230445981026,
|
|
"step": 610,
|
|
"valid_targets_mean": 9052.3,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 1.8653846153846154,
|
|
"grad_norm": 0.15366428521133582,
|
|
"learning_rate": 3.67428945533248e-05,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04852573573589325,
|
|
"step": 615,
|
|
"valid_targets_mean": 9325.1,
|
|
"valid_targets_min": 4081
|
|
},
|
|
{
|
|
"epoch": 1.8805668016194332,
|
|
"grad_norm": 0.153953750776976,
|
|
"learning_rate": 3.6659762667136567e-05,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052868276834487915,
|
|
"step": 620,
|
|
"valid_targets_mean": 9947.7,
|
|
"valid_targets_min": 4796
|
|
},
|
|
{
|
|
"epoch": 1.895748987854251,
|
|
"grad_norm": 0.1616608725715566,
|
|
"learning_rate": 3.65756797437159e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04202054440975189,
|
|
"step": 625,
|
|
"valid_targets_mean": 8547.9,
|
|
"valid_targets_min": 2143
|
|
},
|
|
{
|
|
"epoch": 1.9109311740890689,
|
|
"grad_norm": 0.1702457387008086,
|
|
"learning_rate": 3.6490650583010585e-05,
|
|
"loss": 0.1405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04351646080613136,
|
|
"step": 630,
|
|
"valid_targets_mean": 8220.0,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 1.9261133603238867,
|
|
"grad_norm": 0.1605698012641613,
|
|
"learning_rate": 3.640468003898518e-05,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043886855244636536,
|
|
"step": 635,
|
|
"valid_targets_mean": 8190.2,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 1.9412955465587043,
|
|
"grad_norm": 0.1590681633258748,
|
|
"learning_rate": 3.631777301934395e-05,
|
|
"loss": 0.1385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04289356991648674,
|
|
"step": 640,
|
|
"valid_targets_mean": 8163.5,
|
|
"valid_targets_min": 3034
|
|
},
|
|
{
|
|
"epoch": 1.9564777327935223,
|
|
"grad_norm": 0.16228399632373447,
|
|
"learning_rate": 3.6229934485250684e-05,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045245829969644547,
|
|
"step": 645,
|
|
"valid_targets_mean": 8741.6,
|
|
"valid_targets_min": 2680
|
|
},
|
|
{
|
|
"epoch": 1.97165991902834,
|
|
"grad_norm": 0.2008639428797295,
|
|
"learning_rate": 3.6141169451045526e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04775421321392059,
|
|
"step": 650,
|
|
"valid_targets_mean": 9020.0,
|
|
"valid_targets_min": 3504
|
|
},
|
|
{
|
|
"epoch": 1.986842105263158,
|
|
"grad_norm": 0.14757871074233309,
|
|
"learning_rate": 3.605148298395865e-05,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04407476633787155,
|
|
"step": 655,
|
|
"valid_targets_mean": 8646.0,
|
|
"valid_targets_min": 2358
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.2558999576476579,
|
|
"learning_rate": 3.5960880203821086e-05,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13767139613628387,
|
|
"step": 660,
|
|
"valid_targets_mean": 9397.5,
|
|
"valid_targets_min": 2582
|
|
},
|
|
{
|
|
"epoch": 2.0151821862348176,
|
|
"grad_norm": 0.15935679170001496,
|
|
"learning_rate": 3.5869366282772354e-05,
|
|
"loss": 0.1326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04452720284461975,
|
|
"step": 665,
|
|
"valid_targets_mean": 8997.2,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 2.0303643724696356,
|
|
"grad_norm": 0.1536696386706909,
|
|
"learning_rate": 3.577694644496529e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042959071695804596,
|
|
"step": 670,
|
|
"valid_targets_mean": 8815.9,
|
|
"valid_targets_min": 4417
|
|
},
|
|
{
|
|
"epoch": 2.0455465587044532,
|
|
"grad_norm": 0.15147850871284985,
|
|
"learning_rate": 3.568362596626778e-05,
|
|
"loss": 0.1356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04049764573574066,
|
|
"step": 675,
|
|
"valid_targets_mean": 8330.5,
|
|
"valid_targets_min": 2034
|
|
},
|
|
{
|
|
"epoch": 2.0607287449392713,
|
|
"grad_norm": 0.17695260979045982,
|
|
"learning_rate": 3.558941017396158e-05,
|
|
"loss": 0.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044541601091623306,
|
|
"step": 680,
|
|
"valid_targets_mean": 8493.7,
|
|
"valid_targets_min": 3337
|
|
},
|
|
{
|
|
"epoch": 2.075910931174089,
|
|
"grad_norm": 0.1658627315457558,
|
|
"learning_rate": 3.549430444643821e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048384398221969604,
|
|
"step": 685,
|
|
"valid_targets_mean": 9160.6,
|
|
"valid_targets_min": 3120
|
|
},
|
|
{
|
|
"epoch": 2.091093117408907,
|
|
"grad_norm": 0.14890699972314517,
|
|
"learning_rate": 3.539831421289195e-05,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04115760326385498,
|
|
"step": 690,
|
|
"valid_targets_mean": 8539.5,
|
|
"valid_targets_min": 3485
|
|
},
|
|
{
|
|
"epoch": 2.1062753036437245,
|
|
"grad_norm": 0.15401535178265185,
|
|
"learning_rate": 3.530144495300984e-05,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04603859782218933,
|
|
"step": 695,
|
|
"valid_targets_mean": 9422.6,
|
|
"valid_targets_min": 2913
|
|
},
|
|
{
|
|
"epoch": 2.1214574898785425,
|
|
"grad_norm": 0.17281123052347433,
|
|
"learning_rate": 3.520370219665897e-05,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04628307744860649,
|
|
"step": 700,
|
|
"valid_targets_mean": 8580.9,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 2.13663967611336,
|
|
"grad_norm": 0.15752381282715167,
|
|
"learning_rate": 3.51050915235707e-05,
|
|
"loss": 0.1328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04009954258799553,
|
|
"step": 705,
|
|
"valid_targets_mean": 8849.5,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 2.151821862348178,
|
|
"grad_norm": 0.1825476839094314,
|
|
"learning_rate": 3.500561856302221e-05,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04230467975139618,
|
|
"step": 710,
|
|
"valid_targets_mean": 8192.2,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 2.167004048582996,
|
|
"grad_norm": 0.185721843487652,
|
|
"learning_rate": 3.49052889935151e-05,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04545104503631592,
|
|
"step": 715,
|
|
"valid_targets_mean": 8755.0,
|
|
"valid_targets_min": 4146
|
|
},
|
|
{
|
|
"epoch": 2.182186234817814,
|
|
"grad_norm": 0.23301552756078972,
|
|
"learning_rate": 3.480410854245125e-05,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04512922465801239,
|
|
"step": 720,
|
|
"valid_targets_mean": 8630.2,
|
|
"valid_targets_min": 2440
|
|
},
|
|
{
|
|
"epoch": 2.1973684210526314,
|
|
"grad_norm": 0.156936700577527,
|
|
"learning_rate": 3.470208298580589e-05,
|
|
"loss": 0.137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04238954186439514,
|
|
"step": 725,
|
|
"valid_targets_mean": 8521.6,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 2.2125506072874495,
|
|
"grad_norm": 0.15848710077041173,
|
|
"learning_rate": 3.459921814779781e-05,
|
|
"loss": 0.1312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0377664640545845,
|
|
"step": 730,
|
|
"valid_targets_mean": 8313.0,
|
|
"valid_targets_min": 2471
|
|
},
|
|
{
|
|
"epoch": 2.227732793522267,
|
|
"grad_norm": 0.14868949536979442,
|
|
"learning_rate": 3.449551990055694e-05,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040457479655742645,
|
|
"step": 735,
|
|
"valid_targets_mean": 8293.0,
|
|
"valid_targets_min": 169
|
|
},
|
|
{
|
|
"epoch": 2.242914979757085,
|
|
"grad_norm": 0.16327704261828033,
|
|
"learning_rate": 3.43909941637891e-05,
|
|
"loss": 0.1337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04823645204305649,
|
|
"step": 740,
|
|
"valid_targets_mean": 9331.3,
|
|
"valid_targets_min": 3646
|
|
},
|
|
{
|
|
"epoch": 2.2580971659919027,
|
|
"grad_norm": 0.16053069754910398,
|
|
"learning_rate": 3.428564690443807e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03866049647331238,
|
|
"step": 745,
|
|
"valid_targets_mean": 8606.2,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 2.2732793522267207,
|
|
"grad_norm": 0.16805675581563118,
|
|
"learning_rate": 3.4179484136345e-05,
|
|
"loss": 0.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04024505987763405,
|
|
"step": 750,
|
|
"valid_targets_mean": 8254.0,
|
|
"valid_targets_min": 1867
|
|
},
|
|
{
|
|
"epoch": 2.2884615384615383,
|
|
"grad_norm": 0.17159462960840738,
|
|
"learning_rate": 3.4072511919905055e-05,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050391484051942825,
|
|
"step": 755,
|
|
"valid_targets_mean": 8458.7,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 2.3036437246963564,
|
|
"grad_norm": 0.1689198648062733,
|
|
"learning_rate": 3.396473636172146e-05,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0432906337082386,
|
|
"step": 760,
|
|
"valid_targets_mean": 8106.2,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 2.318825910931174,
|
|
"grad_norm": 0.15935204107678358,
|
|
"learning_rate": 3.385616361425694e-05,
|
|
"loss": 0.1318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03819669038057327,
|
|
"step": 765,
|
|
"valid_targets_mean": 8321.8,
|
|
"valid_targets_min": 3086
|
|
},
|
|
{
|
|
"epoch": 2.334008097165992,
|
|
"grad_norm": 0.16063353203558794,
|
|
"learning_rate": 3.374679987548245e-05,
|
|
"loss": 0.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04191069304943085,
|
|
"step": 770,
|
|
"valid_targets_mean": 8667.0,
|
|
"valid_targets_min": 2675
|
|
},
|
|
{
|
|
"epoch": 2.3491902834008096,
|
|
"grad_norm": 0.15637531722821663,
|
|
"learning_rate": 3.363665138852339e-05,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046588316559791565,
|
|
"step": 775,
|
|
"valid_targets_mean": 8686.8,
|
|
"valid_targets_min": 3903
|
|
},
|
|
{
|
|
"epoch": 2.3643724696356276,
|
|
"grad_norm": 0.15611899733739656,
|
|
"learning_rate": 3.3525724441303206e-05,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045928120613098145,
|
|
"step": 780,
|
|
"valid_targets_mean": 9167.0,
|
|
"valid_targets_min": 3838
|
|
},
|
|
{
|
|
"epoch": 2.3795546558704452,
|
|
"grad_norm": 0.15404210675550017,
|
|
"learning_rate": 3.341402536618442e-05,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0457453578710556,
|
|
"step": 785,
|
|
"valid_targets_mean": 8483.3,
|
|
"valid_targets_min": 3143
|
|
},
|
|
{
|
|
"epoch": 2.3947368421052633,
|
|
"grad_norm": 0.15822997720827653,
|
|
"learning_rate": 3.3301560539607174e-05,
|
|
"loss": 0.1314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0460953526198864,
|
|
"step": 790,
|
|
"valid_targets_mean": 9069.6,
|
|
"valid_targets_min": 3841
|
|
},
|
|
{
|
|
"epoch": 2.409919028340081,
|
|
"grad_norm": 0.16329241992620094,
|
|
"learning_rate": 3.318833638172519e-05,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04712095111608505,
|
|
"step": 795,
|
|
"valid_targets_mean": 8651.1,
|
|
"valid_targets_min": 2365
|
|
},
|
|
{
|
|
"epoch": 2.425101214574899,
|
|
"grad_norm": 0.1535215378198103,
|
|
"learning_rate": 3.3074359356039306e-05,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041144803166389465,
|
|
"step": 800,
|
|
"valid_targets_mean": 9126.9,
|
|
"valid_targets_min": 5194
|
|
},
|
|
{
|
|
"epoch": 2.4402834008097165,
|
|
"grad_norm": 0.15466488319288582,
|
|
"learning_rate": 3.295963596902846e-05,
|
|
"loss": 0.1329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041372884064912796,
|
|
"step": 805,
|
|
"valid_targets_mean": 8714.0,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 2.4554655870445345,
|
|
"grad_norm": 0.1789436585950343,
|
|
"learning_rate": 3.284417276977829e-05,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0411355085670948,
|
|
"step": 810,
|
|
"valid_targets_mean": 8818.2,
|
|
"valid_targets_min": 2843
|
|
},
|
|
{
|
|
"epoch": 2.470647773279352,
|
|
"grad_norm": 0.15754087310337617,
|
|
"learning_rate": 3.2727976349607276e-05,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04501304402947426,
|
|
"step": 815,
|
|
"valid_targets_mean": 8838.6,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 2.48582995951417,
|
|
"grad_norm": 0.14921639988521102,
|
|
"learning_rate": 3.261105334169045e-05,
|
|
"loss": 0.1326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04635072499513626,
|
|
"step": 820,
|
|
"valid_targets_mean": 9581.3,
|
|
"valid_targets_min": 4055
|
|
},
|
|
{
|
|
"epoch": 2.501012145748988,
|
|
"grad_norm": 0.14250741228116526,
|
|
"learning_rate": 3.249341042068077e-05,
|
|
"loss": 0.1293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043387480080127716,
|
|
"step": 825,
|
|
"valid_targets_mean": 8986.5,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 2.516194331983806,
|
|
"grad_norm": 0.15741238584094508,
|
|
"learning_rate": 3.237505430232803e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04424908757209778,
|
|
"step": 830,
|
|
"valid_targets_mean": 8606.3,
|
|
"valid_targets_min": 4732
|
|
},
|
|
{
|
|
"epoch": 2.5313765182186234,
|
|
"grad_norm": 0.16497215809191318,
|
|
"learning_rate": 3.225599174309554e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04215235635638237,
|
|
"step": 835,
|
|
"valid_targets_mean": 9411.6,
|
|
"valid_targets_min": 2365
|
|
},
|
|
{
|
|
"epoch": 2.5465587044534415,
|
|
"grad_norm": 0.14907369869184323,
|
|
"learning_rate": 3.213622953977443e-05,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04386545717716217,
|
|
"step": 840,
|
|
"valid_targets_mean": 9131.6,
|
|
"valid_targets_min": 5686
|
|
},
|
|
{
|
|
"epoch": 2.561740890688259,
|
|
"grad_norm": 0.14806841435324464,
|
|
"learning_rate": 3.2015774529095595e-05,
|
|
"loss": 0.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04302568733692169,
|
|
"step": 845,
|
|
"valid_targets_mean": 9056.0,
|
|
"valid_targets_min": 3465
|
|
},
|
|
{
|
|
"epoch": 2.5769230769230766,
|
|
"grad_norm": 0.1525923575504136,
|
|
"learning_rate": 3.189463358733947e-05,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04825190454721451,
|
|
"step": 850,
|
|
"valid_targets_mean": 8029.7,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 2.5921052631578947,
|
|
"grad_norm": 0.15382208019852647,
|
|
"learning_rate": 3.1772813629943455e-05,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04323762655258179,
|
|
"step": 855,
|
|
"valid_targets_mean": 8941.4,
|
|
"valid_targets_min": 3512
|
|
},
|
|
{
|
|
"epoch": 2.6072874493927127,
|
|
"grad_norm": 0.16995153234462348,
|
|
"learning_rate": 3.1650321611107195e-05,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04226597398519516,
|
|
"step": 860,
|
|
"valid_targets_mean": 8734.8,
|
|
"valid_targets_min": 2738
|
|
},
|
|
{
|
|
"epoch": 2.6224696356275303,
|
|
"grad_norm": 0.15749590890685922,
|
|
"learning_rate": 3.152716452339549e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04876934364438057,
|
|
"step": 865,
|
|
"valid_targets_mean": 9120.7,
|
|
"valid_targets_min": 3841
|
|
},
|
|
{
|
|
"epoch": 2.637651821862348,
|
|
"grad_norm": 0.1601198917886538,
|
|
"learning_rate": 3.140334939733924e-05,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04381168633699417,
|
|
"step": 870,
|
|
"valid_targets_mean": 9345.4,
|
|
"valid_targets_min": 3724
|
|
},
|
|
{
|
|
"epoch": 2.652834008097166,
|
|
"grad_norm": 0.14891638890470824,
|
|
"learning_rate": 3.127888330103401e-05,
|
|
"loss": 0.1328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04753772169351578,
|
|
"step": 875,
|
|
"valid_targets_mean": 9191.9,
|
|
"valid_targets_min": 2454
|
|
},
|
|
{
|
|
"epoch": 2.668016194331984,
|
|
"grad_norm": 0.15038010633613558,
|
|
"learning_rate": 3.115377333973659e-05,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04456303268671036,
|
|
"step": 880,
|
|
"valid_targets_mean": 8895.0,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 2.6831983805668016,
|
|
"grad_norm": 0.15829418709553383,
|
|
"learning_rate": 3.1028026655459376e-05,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04604408144950867,
|
|
"step": 885,
|
|
"valid_targets_mean": 8898.2,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 2.698380566801619,
|
|
"grad_norm": 0.15379243184377864,
|
|
"learning_rate": 3.0901650426562634e-05,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04278646409511566,
|
|
"step": 890,
|
|
"valid_targets_mean": 8021.3,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 2.7135627530364372,
|
|
"grad_norm": 0.16217400334195092,
|
|
"learning_rate": 3.0774651867344765e-05,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04596666991710663,
|
|
"step": 895,
|
|
"valid_targets_mean": 9190.2,
|
|
"valid_targets_min": 2618
|
|
},
|
|
{
|
|
"epoch": 2.7287449392712553,
|
|
"grad_norm": 0.1535023297117499,
|
|
"learning_rate": 3.064703822763043e-05,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042456094175577164,
|
|
"step": 900,
|
|
"valid_targets_mean": 8620.0,
|
|
"valid_targets_min": 3497
|
|
},
|
|
{
|
|
"epoch": 2.743927125506073,
|
|
"grad_norm": 0.15676191481014795,
|
|
"learning_rate": 3.05188167923567e-05,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04132018983364105,
|
|
"step": 905,
|
|
"valid_targets_mean": 8797.9,
|
|
"valid_targets_min": 3961
|
|
},
|
|
{
|
|
"epoch": 2.7591093117408905,
|
|
"grad_norm": 0.15299501602871654,
|
|
"learning_rate": 3.0389994881157207e-05,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03875969350337982,
|
|
"step": 910,
|
|
"valid_targets_mean": 8671.8,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 2.7742914979757085,
|
|
"grad_norm": 0.14788803801176695,
|
|
"learning_rate": 3.0260579847944255e-05,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0463220551609993,
|
|
"step": 915,
|
|
"valid_targets_mean": 8983.8,
|
|
"valid_targets_min": 3997
|
|
},
|
|
{
|
|
"epoch": 2.7894736842105265,
|
|
"grad_norm": 0.14875353056773674,
|
|
"learning_rate": 3.0130579080489064e-05,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04253089055418968,
|
|
"step": 920,
|
|
"valid_targets_mean": 8917.7,
|
|
"valid_targets_min": 2760
|
|
},
|
|
{
|
|
"epoch": 2.804655870445344,
|
|
"grad_norm": 0.15938573203384698,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044379495084285736,
|
|
"step": 925,
|
|
"valid_targets_mean": 8190.1,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 2.8198380566801617,
|
|
"grad_norm": 0.1734821066424687,
|
|
"learning_rate": 2.986885006069894e-05,
|
|
"loss": 0.1302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0416317880153656,
|
|
"step": 930,
|
|
"valid_targets_mean": 8339.9,
|
|
"valid_targets_min": 2165
|
|
},
|
|
{
|
|
"epoch": 2.83502024291498,
|
|
"grad_norm": 0.15222747704784714,
|
|
"learning_rate": 2.9737136749395743e-05,
|
|
"loss": 0.1333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03966110944747925,
|
|
"step": 935,
|
|
"valid_targets_mean": 8681.9,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 2.850202429149798,
|
|
"grad_norm": 0.15866652733654446,
|
|
"learning_rate": 2.9604867585060834e-05,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043144941329956055,
|
|
"step": 940,
|
|
"valid_targets_mean": 8277.4,
|
|
"valid_targets_min": 2753
|
|
},
|
|
{
|
|
"epoch": 2.8653846153846154,
|
|
"grad_norm": 0.151759478914717,
|
|
"learning_rate": 2.9472050118396034e-05,
|
|
"loss": 0.1287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04288342967629433,
|
|
"step": 945,
|
|
"valid_targets_mean": 8615.8,
|
|
"valid_targets_min": 2228
|
|
},
|
|
{
|
|
"epoch": 2.880566801619433,
|
|
"grad_norm": 0.1481546454649069,
|
|
"learning_rate": 2.933869193140346e-05,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045005783438682556,
|
|
"step": 950,
|
|
"valid_targets_mean": 8879.0,
|
|
"valid_targets_min": 2933
|
|
},
|
|
{
|
|
"epoch": 2.895748987854251,
|
|
"grad_norm": 0.14158232705509818,
|
|
"learning_rate": 2.920480063695272e-05,
|
|
"loss": 0.1305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04272865876555443,
|
|
"step": 955,
|
|
"valid_targets_mean": 7664.7,
|
|
"valid_targets_min": 2321
|
|
},
|
|
{
|
|
"epoch": 2.910931174089069,
|
|
"grad_norm": 0.14861731456343164,
|
|
"learning_rate": 2.9070383878346358e-05,
|
|
"loss": 0.1287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0426495186984539,
|
|
"step": 960,
|
|
"valid_targets_mean": 8834.1,
|
|
"valid_targets_min": 3211
|
|
},
|
|
{
|
|
"epoch": 2.9261133603238867,
|
|
"grad_norm": 0.15799041768466898,
|
|
"learning_rate": 2.8935449328883478e-05,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05004576966166496,
|
|
"step": 965,
|
|
"valid_targets_mean": 9780.4,
|
|
"valid_targets_min": 5182
|
|
},
|
|
{
|
|
"epoch": 2.9412955465587043,
|
|
"grad_norm": 0.15604595746423008,
|
|
"learning_rate": 2.8800004691421743e-05,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03929636627435684,
|
|
"step": 970,
|
|
"valid_targets_mean": 8197.4,
|
|
"valid_targets_min": 2872
|
|
},
|
|
{
|
|
"epoch": 2.9564777327935223,
|
|
"grad_norm": 0.1482070849648165,
|
|
"learning_rate": 2.866405769793764e-05,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040082208812236786,
|
|
"step": 975,
|
|
"valid_targets_mean": 8498.5,
|
|
"valid_targets_min": 4977
|
|
},
|
|
{
|
|
"epoch": 2.97165991902834,
|
|
"grad_norm": 0.15175097097939422,
|
|
"learning_rate": 2.8527616109085082e-05,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04318765178322792,
|
|
"step": 980,
|
|
"valid_targets_mean": 8163.7,
|
|
"valid_targets_min": 1612
|
|
},
|
|
{
|
|
"epoch": 2.986842105263158,
|
|
"grad_norm": 0.15381180647510456,
|
|
"learning_rate": 2.8390687713752405e-05,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044973067939281464,
|
|
"step": 985,
|
|
"valid_targets_mean": 8489.5,
|
|
"valid_targets_min": 4762
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.2567637471193949,
|
|
"learning_rate": 2.8253280328617712e-05,
|
|
"loss": 0.1267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12599436938762665,
|
|
"step": 990,
|
|
"valid_targets_mean": 7403.2,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 3.0151821862348176,
|
|
"grad_norm": 0.1549445114950489,
|
|
"learning_rate": 2.811540179770268e-05,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043905895203351974,
|
|
"step": 995,
|
|
"valid_targets_mean": 9343.4,
|
|
"valid_targets_min": 2880
|
|
},
|
|
{
|
|
"epoch": 3.0303643724696356,
|
|
"grad_norm": 0.16045623184517988,
|
|
"learning_rate": 2.7977059991924734e-05,
|
|
"loss": 0.1293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03658217191696167,
|
|
"step": 1000,
|
|
"valid_targets_mean": 7559.3,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 3.0455465587044532,
|
|
"grad_norm": 0.16134151780744238,
|
|
"learning_rate": 2.7838262808647766e-05,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04394662380218506,
|
|
"step": 1005,
|
|
"valid_targets_mean": 8851.3,
|
|
"valid_targets_min": 3774
|
|
},
|
|
{
|
|
"epoch": 3.0607287449392713,
|
|
"grad_norm": 0.14840848118527455,
|
|
"learning_rate": 2.7699018171231288e-05,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03717267885804176,
|
|
"step": 1010,
|
|
"valid_targets_mean": 8412.5,
|
|
"valid_targets_min": 2458
|
|
},
|
|
{
|
|
"epoch": 3.075910931174089,
|
|
"grad_norm": 0.1572626068158782,
|
|
"learning_rate": 2.7559334028578135e-05,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03613312914967537,
|
|
"step": 1015,
|
|
"valid_targets_mean": 7928.6,
|
|
"valid_targets_min": 3499
|
|
},
|
|
{
|
|
"epoch": 3.091093117408907,
|
|
"grad_norm": 0.16150382376363603,
|
|
"learning_rate": 2.741921835468068e-05,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04448505491018295,
|
|
"step": 1020,
|
|
"valid_targets_mean": 8502.7,
|
|
"valid_targets_min": 2539
|
|
},
|
|
{
|
|
"epoch": 3.1062753036437245,
|
|
"grad_norm": 0.158621271118148,
|
|
"learning_rate": 2.727867914816563e-05,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03956116363406181,
|
|
"step": 1025,
|
|
"valid_targets_mean": 8719.2,
|
|
"valid_targets_min": 2494
|
|
},
|
|
{
|
|
"epoch": 3.1214574898785425,
|
|
"grad_norm": 0.15668666339866982,
|
|
"learning_rate": 2.713772443183744e-05,
|
|
"loss": 0.1267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04132881388068199,
|
|
"step": 1030,
|
|
"valid_targets_mean": 8657.9,
|
|
"valid_targets_min": 2668
|
|
},
|
|
{
|
|
"epoch": 3.13663967611336,
|
|
"grad_norm": 0.15425024672882404,
|
|
"learning_rate": 2.69963622522203e-05,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04236484318971634,
|
|
"step": 1035,
|
|
"valid_targets_mean": 9244.4,
|
|
"valid_targets_min": 2863
|
|
},
|
|
{
|
|
"epoch": 3.151821862348178,
|
|
"grad_norm": 0.15435070497140232,
|
|
"learning_rate": 2.6854600679098798e-05,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04837214946746826,
|
|
"step": 1040,
|
|
"valid_targets_mean": 9055.3,
|
|
"valid_targets_min": 3790
|
|
},
|
|
{
|
|
"epoch": 3.167004048582996,
|
|
"grad_norm": 0.16934482471930706,
|
|
"learning_rate": 2.6712447805057254e-05,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041339434683322906,
|
|
"step": 1045,
|
|
"valid_targets_mean": 7839.9,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 3.182186234817814,
|
|
"grad_norm": 0.16457580029302746,
|
|
"learning_rate": 2.656991174501775e-05,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04229741543531418,
|
|
"step": 1050,
|
|
"valid_targets_mean": 8372.1,
|
|
"valid_targets_min": 2661
|
|
},
|
|
{
|
|
"epoch": 3.1973684210526314,
|
|
"grad_norm": 0.15947440347148026,
|
|
"learning_rate": 2.642700063577689e-05,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044598013162612915,
|
|
"step": 1055,
|
|
"valid_targets_mean": 8906.9,
|
|
"valid_targets_min": 3625
|
|
},
|
|
{
|
|
"epoch": 3.2125506072874495,
|
|
"grad_norm": 0.15389395685369764,
|
|
"learning_rate": 2.6283722635541255e-05,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03959325700998306,
|
|
"step": 1060,
|
|
"valid_targets_mean": 8015.9,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 3.227732793522267,
|
|
"grad_norm": 0.15940435457093158,
|
|
"learning_rate": 2.6140085923461756e-05,
|
|
"loss": 0.129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044374607503414154,
|
|
"step": 1065,
|
|
"valid_targets_mean": 9583.9,
|
|
"valid_targets_min": 4463
|
|
},
|
|
{
|
|
"epoch": 3.242914979757085,
|
|
"grad_norm": 0.15084549507740558,
|
|
"learning_rate": 2.5996098699166678e-05,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0436861589550972,
|
|
"step": 1070,
|
|
"valid_targets_mean": 8967.4,
|
|
"valid_targets_min": 2770
|
|
},
|
|
{
|
|
"epoch": 3.2580971659919027,
|
|
"grad_norm": 0.14500488460255373,
|
|
"learning_rate": 2.585176918229359e-05,
|
|
"loss": 0.1264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04020432382822037,
|
|
"step": 1075,
|
|
"valid_targets_mean": 9149.8,
|
|
"valid_targets_min": 2736
|
|
},
|
|
{
|
|
"epoch": 3.2732793522267207,
|
|
"grad_norm": 0.15047003517813728,
|
|
"learning_rate": 2.5707105612020163e-05,
|
|
"loss": 0.1243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03743348643183708,
|
|
"step": 1080,
|
|
"valid_targets_mean": 9101.6,
|
|
"valid_targets_min": 2473
|
|
},
|
|
{
|
|
"epoch": 3.2884615384615383,
|
|
"grad_norm": 0.16144707240123685,
|
|
"learning_rate": 2.5562116246593765e-05,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0393046997487545,
|
|
"step": 1085,
|
|
"valid_targets_mean": 7955.2,
|
|
"valid_targets_min": 2192
|
|
},
|
|
{
|
|
"epoch": 3.3036437246963564,
|
|
"grad_norm": 0.14704275285197455,
|
|
"learning_rate": 2.5416809362860107e-05,
|
|
"loss": 0.1264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03915318474173546,
|
|
"step": 1090,
|
|
"valid_targets_mean": 8742.9,
|
|
"valid_targets_min": 3075
|
|
},
|
|
{
|
|
"epoch": 3.318825910931174,
|
|
"grad_norm": 0.1585633548765312,
|
|
"learning_rate": 2.5271193255790672e-05,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04191460460424423,
|
|
"step": 1095,
|
|
"valid_targets_mean": 9302.1,
|
|
"valid_targets_min": 4611
|
|
},
|
|
{
|
|
"epoch": 3.334008097165992,
|
|
"grad_norm": 0.1706399621358052,
|
|
"learning_rate": 2.5125276238009254e-05,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03882264345884323,
|
|
"step": 1100,
|
|
"valid_targets_mean": 8410.0,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 3.3491902834008096,
|
|
"grad_norm": 0.1500306352899057,
|
|
"learning_rate": 2.4979066639317405e-05,
|
|
"loss": 0.1256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03827561438083649,
|
|
"step": 1105,
|
|
"valid_targets_mean": 8941.5,
|
|
"valid_targets_min": 6188
|
|
},
|
|
{
|
|
"epoch": 3.3643724696356276,
|
|
"grad_norm": 0.1892057054652365,
|
|
"learning_rate": 2.4832572806218892e-05,
|
|
"loss": 0.1257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04153843969106674,
|
|
"step": 1110,
|
|
"valid_targets_mean": 8349.2,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 3.3795546558704452,
|
|
"grad_norm": 0.163759441081374,
|
|
"learning_rate": 2.4685803101443276e-05,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04361855611205101,
|
|
"step": 1115,
|
|
"valid_targets_mean": 8993.6,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 3.3947368421052633,
|
|
"grad_norm": 0.15252978757683208,
|
|
"learning_rate": 2.4538765903468486e-05,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04113656282424927,
|
|
"step": 1120,
|
|
"valid_targets_mean": 8663.5,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 3.409919028340081,
|
|
"grad_norm": 0.14702887939268963,
|
|
"learning_rate": 2.4391469606042533e-05,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038167573511600494,
|
|
"step": 1125,
|
|
"valid_targets_mean": 8722.2,
|
|
"valid_targets_min": 1867
|
|
},
|
|
{
|
|
"epoch": 3.425101214574899,
|
|
"grad_norm": 0.14733721432591673,
|
|
"learning_rate": 2.4243922617704364e-05,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04190227389335632,
|
|
"step": 1130,
|
|
"valid_targets_mean": 8955.2,
|
|
"valid_targets_min": 4418
|
|
},
|
|
{
|
|
"epoch": 3.4402834008097165,
|
|
"grad_norm": 0.16466556764930732,
|
|
"learning_rate": 2.4096133361303816e-05,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0419926792383194,
|
|
"step": 1135,
|
|
"valid_targets_mean": 8360.8,
|
|
"valid_targets_min": 5281
|
|
},
|
|
{
|
|
"epoch": 3.4554655870445345,
|
|
"grad_norm": 0.1587067928649629,
|
|
"learning_rate": 2.394811027352083e-05,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04451531916856766,
|
|
"step": 1140,
|
|
"valid_targets_mean": 8621.5,
|
|
"valid_targets_min": 2575
|
|
},
|
|
{
|
|
"epoch": 3.470647773279352,
|
|
"grad_norm": 0.14824184299680462,
|
|
"learning_rate": 2.3799861804383807e-05,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047169029712677,
|
|
"step": 1145,
|
|
"valid_targets_mean": 9804.8,
|
|
"valid_targets_min": 2900
|
|
},
|
|
{
|
|
"epoch": 3.48582995951417,
|
|
"grad_norm": 0.1546324707351065,
|
|
"learning_rate": 2.365139641678724e-05,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03950756788253784,
|
|
"step": 1150,
|
|
"valid_targets_mean": 7724.9,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 3.501012145748988,
|
|
"grad_norm": 0.15806473238550206,
|
|
"learning_rate": 2.350272258600861e-05,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0438276007771492,
|
|
"step": 1155,
|
|
"valid_targets_mean": 8750.5,
|
|
"valid_targets_min": 2994
|
|
},
|
|
{
|
|
"epoch": 3.516194331983806,
|
|
"grad_norm": 0.15327824111263816,
|
|
"learning_rate": 2.335384879922456e-05,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03901947662234306,
|
|
"step": 1160,
|
|
"valid_targets_mean": 7632.8,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 3.5313765182186234,
|
|
"grad_norm": 0.15227199025649257,
|
|
"learning_rate": 2.3204783555026394e-05,
|
|
"loss": 0.1254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041039686650037766,
|
|
"step": 1165,
|
|
"valid_targets_mean": 8952.3,
|
|
"valid_targets_min": 3359
|
|
},
|
|
{
|
|
"epoch": 3.5465587044534415,
|
|
"grad_norm": 0.1535381571149989,
|
|
"learning_rate": 2.3055535362934945e-05,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03722620755434036,
|
|
"step": 1170,
|
|
"valid_targets_mean": 8608.6,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 3.561740890688259,
|
|
"grad_norm": 0.1494179282174366,
|
|
"learning_rate": 2.2906112742914756e-05,
|
|
"loss": 0.1245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044452108442783356,
|
|
"step": 1175,
|
|
"valid_targets_mean": 9429.9,
|
|
"valid_targets_min": 3328
|
|
},
|
|
{
|
|
"epoch": 3.5769230769230766,
|
|
"grad_norm": 0.15838630357569386,
|
|
"learning_rate": 2.2756524224887776e-05,
|
|
"loss": 0.1239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04705264791846275,
|
|
"step": 1180,
|
|
"valid_targets_mean": 8732.2,
|
|
"valid_targets_min": 2608
|
|
},
|
|
{
|
|
"epoch": 3.5921052631578947,
|
|
"grad_norm": 0.479531958518713,
|
|
"learning_rate": 2.2606778348246366e-05,
|
|
"loss": 0.1272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051596008241176605,
|
|
"step": 1185,
|
|
"valid_targets_mean": 9538.2,
|
|
"valid_targets_min": 2406
|
|
},
|
|
{
|
|
"epoch": 3.6072874493927127,
|
|
"grad_norm": 0.15768302901091424,
|
|
"learning_rate": 2.245688366136586e-05,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03913229703903198,
|
|
"step": 1190,
|
|
"valid_targets_mean": 8739.5,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 3.6224696356275303,
|
|
"grad_norm": 0.1514649944833031,
|
|
"learning_rate": 2.2306848721116535e-05,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04456475377082825,
|
|
"step": 1195,
|
|
"valid_targets_mean": 9431.6,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 3.637651821862348,
|
|
"grad_norm": 0.3416764457607962,
|
|
"learning_rate": 2.2156682092375175e-05,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03972535580396652,
|
|
"step": 1200,
|
|
"valid_targets_mean": 7870.4,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 3.652834008097166,
|
|
"grad_norm": 0.14445018138146465,
|
|
"learning_rate": 2.2006392347536102e-05,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04410581290721893,
|
|
"step": 1205,
|
|
"valid_targets_mean": 9460.3,
|
|
"valid_targets_min": 3977
|
|
},
|
|
{
|
|
"epoch": 3.668016194331984,
|
|
"grad_norm": 0.1545189838399238,
|
|
"learning_rate": 2.1855988066021837e-05,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04512324556708336,
|
|
"step": 1210,
|
|
"valid_targets_mean": 8020.6,
|
|
"valid_targets_min": 1960
|
|
},
|
|
{
|
|
"epoch": 3.6831983805668016,
|
|
"grad_norm": 0.14639809082457697,
|
|
"learning_rate": 2.1705477833793332e-05,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040943436324596405,
|
|
"step": 1215,
|
|
"valid_targets_mean": 9014.6,
|
|
"valid_targets_min": 2130
|
|
},
|
|
{
|
|
"epoch": 3.698380566801619,
|
|
"grad_norm": 0.15430984892013616,
|
|
"learning_rate": 2.1554870242859813e-05,
|
|
"loss": 0.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03510109335184097,
|
|
"step": 1220,
|
|
"valid_targets_mean": 7745.8,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 3.7135627530364372,
|
|
"grad_norm": 0.15843955512661506,
|
|
"learning_rate": 2.140417389078833e-05,
|
|
"loss": 0.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041233956813812256,
|
|
"step": 1225,
|
|
"valid_targets_mean": 8852.4,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 3.7287449392712553,
|
|
"grad_norm": 0.14935902019976924,
|
|
"learning_rate": 2.1253397380212935e-05,
|
|
"loss": 0.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046594470739364624,
|
|
"step": 1230,
|
|
"valid_targets_mean": 9674.4,
|
|
"valid_targets_min": 2998
|
|
},
|
|
{
|
|
"epoch": 3.743927125506073,
|
|
"grad_norm": 0.1509011485962604,
|
|
"learning_rate": 2.110254931834359e-05,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039416853338479996,
|
|
"step": 1235,
|
|
"valid_targets_mean": 8254.4,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 3.7591093117408905,
|
|
"grad_norm": 0.14808132553079567,
|
|
"learning_rate": 2.095163831647485e-05,
|
|
"loss": 0.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038367923349142075,
|
|
"step": 1240,
|
|
"valid_targets_mean": 8235.9,
|
|
"valid_targets_min": 2508
|
|
},
|
|
{
|
|
"epoch": 3.7742914979757085,
|
|
"grad_norm": 0.14556659272738734,
|
|
"learning_rate": 2.0800672989494225e-05,
|
|
"loss": 0.1215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04000256210565567,
|
|
"step": 1245,
|
|
"valid_targets_mean": 8814.3,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 3.7894736842105265,
|
|
"grad_norm": 0.15593220962145082,
|
|
"learning_rate": 2.0649661955390447e-05,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03942839801311493,
|
|
"step": 1250,
|
|
"valid_targets_mean": 8595.0,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 3.804655870445344,
|
|
"grad_norm": 0.1577930285895594,
|
|
"learning_rate": 2.0498613834761462e-05,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044675737619400024,
|
|
"step": 1255,
|
|
"valid_targets_mean": 9996.3,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 3.8198380566801617,
|
|
"grad_norm": 0.15470832272042367,
|
|
"learning_rate": 2.0347537250322338e-05,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0493011511862278,
|
|
"step": 1260,
|
|
"valid_targets_mean": 8771.0,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 3.83502024291498,
|
|
"grad_norm": 0.1491026520073737,
|
|
"learning_rate": 2.0196440826413033e-05,
|
|
"loss": 0.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046027205884456635,
|
|
"step": 1265,
|
|
"valid_targets_mean": 9703.9,
|
|
"valid_targets_min": 5437
|
|
},
|
|
{
|
|
"epoch": 3.850202429149798,
|
|
"grad_norm": 0.148142490294004,
|
|
"learning_rate": 2.004533318850605e-05,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04249934107065201,
|
|
"step": 1270,
|
|
"valid_targets_mean": 8803.2,
|
|
"valid_targets_min": 3976
|
|
},
|
|
{
|
|
"epoch": 3.8653846153846154,
|
|
"grad_norm": 0.14750493395334863,
|
|
"learning_rate": 1.9894222962714046e-05,
|
|
"loss": 0.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03465726971626282,
|
|
"step": 1275,
|
|
"valid_targets_mean": 7695.8,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 3.880566801619433,
|
|
"grad_norm": 0.1481419173314804,
|
|
"learning_rate": 1.9743118775297432e-05,
|
|
"loss": 0.1218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04196140170097351,
|
|
"step": 1280,
|
|
"valid_targets_mean": 8560.0,
|
|
"valid_targets_min": 3349
|
|
},
|
|
{
|
|
"epoch": 3.895748987854251,
|
|
"grad_norm": 1.3876213884955102,
|
|
"learning_rate": 1.95920292521719e-05,
|
|
"loss": 0.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04186993092298508,
|
|
"step": 1285,
|
|
"valid_targets_mean": 8933.8,
|
|
"valid_targets_min": 4202
|
|
},
|
|
{
|
|
"epoch": 3.910931174089069,
|
|
"grad_norm": 0.1496918217027555,
|
|
"learning_rate": 1.9440963018416002e-05,
|
|
"loss": 0.1218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041470885276794434,
|
|
"step": 1290,
|
|
"valid_targets_mean": 9392.7,
|
|
"valid_targets_min": 4608
|
|
},
|
|
{
|
|
"epoch": 3.9261133603238867,
|
|
"grad_norm": 0.14656629892490194,
|
|
"learning_rate": 1.9289928697778826e-05,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04186471551656723,
|
|
"step": 1295,
|
|
"valid_targets_mean": 9501.2,
|
|
"valid_targets_min": 3889
|
|
},
|
|
{
|
|
"epoch": 3.9412955465587043,
|
|
"grad_norm": 0.14649861326086483,
|
|
"learning_rate": 1.9138934912187647e-05,
|
|
"loss": 0.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04140534996986389,
|
|
"step": 1300,
|
|
"valid_targets_mean": 8548.6,
|
|
"valid_targets_min": 2683
|
|
},
|
|
{
|
|
"epoch": 3.9564777327935223,
|
|
"grad_norm": 0.1515435388712278,
|
|
"learning_rate": 1.898799028125578e-05,
|
|
"loss": 0.122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04036547988653183,
|
|
"step": 1305,
|
|
"valid_targets_mean": 8206.3,
|
|
"valid_targets_min": 3121
|
|
},
|
|
{
|
|
"epoch": 3.97165991902834,
|
|
"grad_norm": 0.15066621564477503,
|
|
"learning_rate": 1.8837103421790486e-05,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04589315503835678,
|
|
"step": 1310,
|
|
"valid_targets_mean": 9368.8,
|
|
"valid_targets_min": 2512
|
|
},
|
|
{
|
|
"epoch": 3.986842105263158,
|
|
"grad_norm": 0.14075359439004434,
|
|
"learning_rate": 1.8686282947301108e-05,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04044787585735321,
|
|
"step": 1315,
|
|
"valid_targets_mean": 8683.1,
|
|
"valid_targets_min": 2766
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.2335485578626459,
|
|
"learning_rate": 1.8535537467507326e-05,
|
|
"loss": 0.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11808916926383972,
|
|
"step": 1320,
|
|
"valid_targets_mean": 8881.3,
|
|
"valid_targets_min": 4218
|
|
},
|
|
{
|
|
"epoch": 4.015182186234818,
|
|
"grad_norm": 0.14424937076296693,
|
|
"learning_rate": 1.8384875587847698e-05,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0421166867017746,
|
|
"step": 1325,
|
|
"valid_targets_mean": 8659.3,
|
|
"valid_targets_min": 4154
|
|
},
|
|
{
|
|
"epoch": 4.030364372469635,
|
|
"grad_norm": 0.15483960464041605,
|
|
"learning_rate": 1.823430590898839e-05,
|
|
"loss": 0.122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04533958062529564,
|
|
"step": 1330,
|
|
"valid_targets_mean": 9499.3,
|
|
"valid_targets_min": 4445
|
|
},
|
|
{
|
|
"epoch": 4.045546558704453,
|
|
"grad_norm": 0.1484622166732696,
|
|
"learning_rate": 1.8083837026332187e-05,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03699018061161041,
|
|
"step": 1335,
|
|
"valid_targets_mean": 8525.4,
|
|
"valid_targets_min": 3169
|
|
},
|
|
{
|
|
"epoch": 4.060728744939271,
|
|
"grad_norm": 0.14629525654044165,
|
|
"learning_rate": 1.7933477529527862e-05,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038242943584918976,
|
|
"step": 1340,
|
|
"valid_targets_mean": 8128.8,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 4.075910931174089,
|
|
"grad_norm": 0.15361660200311683,
|
|
"learning_rate": 1.778323600197978e-05,
|
|
"loss": 0.1215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04874580353498459,
|
|
"step": 1345,
|
|
"valid_targets_mean": 9147.9,
|
|
"valid_targets_min": 2614
|
|
},
|
|
{
|
|
"epoch": 4.0910931174089065,
|
|
"grad_norm": 0.15153768243921545,
|
|
"learning_rate": 1.7633121020357945e-05,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04092816263437271,
|
|
"step": 1350,
|
|
"valid_targets_mean": 8476.8,
|
|
"valid_targets_min": 2352
|
|
},
|
|
{
|
|
"epoch": 4.1062753036437245,
|
|
"grad_norm": 0.15267652346075833,
|
|
"learning_rate": 1.748314115410835e-05,
|
|
"loss": 0.1236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04103293642401695,
|
|
"step": 1355,
|
|
"valid_targets_mean": 8737.6,
|
|
"valid_targets_min": 2696
|
|
},
|
|
{
|
|
"epoch": 4.1214574898785425,
|
|
"grad_norm": 0.150385217579552,
|
|
"learning_rate": 1.733330496496383e-05,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03916670009493828,
|
|
"step": 1360,
|
|
"valid_targets_mean": 8396.7,
|
|
"valid_targets_min": 3006
|
|
},
|
|
{
|
|
"epoch": 4.136639676113361,
|
|
"grad_norm": 0.1482375008379734,
|
|
"learning_rate": 1.718362100645527e-05,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03730641305446625,
|
|
"step": 1365,
|
|
"valid_targets_mean": 8974.2,
|
|
"valid_targets_min": 3112
|
|
},
|
|
{
|
|
"epoch": 4.151821862348178,
|
|
"grad_norm": 0.15616251949520352,
|
|
"learning_rate": 1.7034097823423356e-05,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040191471576690674,
|
|
"step": 1370,
|
|
"valid_targets_mean": 8521.4,
|
|
"valid_targets_min": 1976
|
|
},
|
|
{
|
|
"epoch": 4.167004048582996,
|
|
"grad_norm": 0.14771942755467865,
|
|
"learning_rate": 1.6884743951530737e-05,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043233104050159454,
|
|
"step": 1375,
|
|
"valid_targets_mean": 9189.1,
|
|
"valid_targets_min": 5070
|
|
},
|
|
{
|
|
"epoch": 4.182186234817814,
|
|
"grad_norm": 0.1563735023291358,
|
|
"learning_rate": 1.6735567916774814e-05,
|
|
"loss": 0.1226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041832953691482544,
|
|
"step": 1380,
|
|
"valid_targets_mean": 8467.6,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 4.197368421052632,
|
|
"grad_norm": 0.15513523824599554,
|
|
"learning_rate": 1.6586578235000975e-05,
|
|
"loss": 0.121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04277468100190163,
|
|
"step": 1385,
|
|
"valid_targets_mean": 8739.2,
|
|
"valid_targets_min": 4972
|
|
},
|
|
{
|
|
"epoch": 4.212550607287449,
|
|
"grad_norm": 0.14661571157289024,
|
|
"learning_rate": 1.64377834114165e-05,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03889099881052971,
|
|
"step": 1390,
|
|
"valid_targets_mean": 9052.2,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 4.227732793522267,
|
|
"grad_norm": 0.14833290730520524,
|
|
"learning_rate": 1.6289191940105004e-05,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04571734368801117,
|
|
"step": 1395,
|
|
"valid_targets_mean": 9368.6,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 4.242914979757085,
|
|
"grad_norm": 0.1487734906128939,
|
|
"learning_rate": 1.614081230354158e-05,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041585370898246765,
|
|
"step": 1400,
|
|
"valid_targets_mean": 8597.0,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 4.258097165991903,
|
|
"grad_norm": 0.15408266169396434,
|
|
"learning_rate": 1.599265297210852e-05,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04356096684932709,
|
|
"step": 1405,
|
|
"valid_targets_mean": 9392.8,
|
|
"valid_targets_min": 3799
|
|
},
|
|
{
|
|
"epoch": 4.27327935222672,
|
|
"grad_norm": 0.16767574643036068,
|
|
"learning_rate": 1.5844722403611814e-05,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03793244808912277,
|
|
"step": 1410,
|
|
"valid_targets_mean": 8547.8,
|
|
"valid_targets_min": 3952
|
|
},
|
|
{
|
|
"epoch": 4.288461538461538,
|
|
"grad_norm": 0.14731010983941775,
|
|
"learning_rate": 1.5697029042798334e-05,
|
|
"loss": 0.1208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04251984879374504,
|
|
"step": 1415,
|
|
"valid_targets_mean": 9005.2,
|
|
"valid_targets_min": 2636
|
|
},
|
|
{
|
|
"epoch": 4.303643724696356,
|
|
"grad_norm": 0.1507572644969978,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 0.1189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043081291019916534,
|
|
"step": 1420,
|
|
"valid_targets_mean": 8887.3,
|
|
"valid_targets_min": 2725
|
|
},
|
|
{
|
|
"epoch": 4.318825910931174,
|
|
"grad_norm": 0.15347883612243557,
|
|
"learning_rate": 1.5402387655021106e-05,
|
|
"loss": 0.1215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042788051068782806,
|
|
"step": 1425,
|
|
"valid_targets_mean": 9474.6,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 4.334008097165992,
|
|
"grad_norm": 0.14665855810488923,
|
|
"learning_rate": 1.5255456447920622e-05,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04078325256705284,
|
|
"step": 1430,
|
|
"valid_targets_mean": 8637.2,
|
|
"valid_targets_min": 2038
|
|
},
|
|
{
|
|
"epoch": 4.34919028340081,
|
|
"grad_norm": 0.3338345154862536,
|
|
"learning_rate": 1.5108796087269704e-05,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04376282915472984,
|
|
"step": 1435,
|
|
"valid_targets_mean": 8192.0,
|
|
"valid_targets_min": 2257
|
|
},
|
|
{
|
|
"epoch": 4.364372469635628,
|
|
"grad_norm": 0.14597048522104478,
|
|
"learning_rate": 1.4962414945304269e-05,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03519024699926376,
|
|
"step": 1440,
|
|
"valid_targets_mean": 7382.0,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 4.379554655870446,
|
|
"grad_norm": 0.15002300827824064,
|
|
"learning_rate": 1.48163213783208e-05,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04229108616709709,
|
|
"step": 1445,
|
|
"valid_targets_mean": 8745.8,
|
|
"valid_targets_min": 2841
|
|
},
|
|
{
|
|
"epoch": 4.394736842105263,
|
|
"grad_norm": 0.1776084975120104,
|
|
"learning_rate": 1.4670523726199304e-05,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03828802704811096,
|
|
"step": 1450,
|
|
"valid_targets_mean": 8288.6,
|
|
"valid_targets_min": 2254
|
|
},
|
|
{
|
|
"epoch": 4.409919028340081,
|
|
"grad_norm": 0.13961816947767516,
|
|
"learning_rate": 1.4525030311927228e-05,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04169708117842674,
|
|
"step": 1455,
|
|
"valid_targets_mean": 9465.1,
|
|
"valid_targets_min": 3617
|
|
},
|
|
{
|
|
"epoch": 4.425101214574899,
|
|
"grad_norm": 0.15510366280812515,
|
|
"learning_rate": 1.4379849441124345e-05,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04110729321837425,
|
|
"step": 1460,
|
|
"valid_targets_mean": 8532.7,
|
|
"valid_targets_min": 2906
|
|
},
|
|
{
|
|
"epoch": 4.440283400809717,
|
|
"grad_norm": 0.1620826149741563,
|
|
"learning_rate": 1.4234989401568584e-05,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04146167263388634,
|
|
"step": 1465,
|
|
"valid_targets_mean": 8441.8,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 4.455465587044534,
|
|
"grad_norm": 0.14568911238335258,
|
|
"learning_rate": 1.4090458462722964e-05,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03728796914219856,
|
|
"step": 1470,
|
|
"valid_targets_mean": 8762.0,
|
|
"valid_targets_min": 2716
|
|
},
|
|
{
|
|
"epoch": 4.470647773279352,
|
|
"grad_norm": 0.15300414438971766,
|
|
"learning_rate": 1.3946264875263485e-05,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03709544986486435,
|
|
"step": 1475,
|
|
"valid_targets_mean": 8773.9,
|
|
"valid_targets_min": 2646
|
|
},
|
|
{
|
|
"epoch": 4.48582995951417,
|
|
"grad_norm": 0.1638478213785049,
|
|
"learning_rate": 1.380241687060815e-05,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03780919313430786,
|
|
"step": 1480,
|
|
"valid_targets_mean": 8156.3,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 4.501012145748988,
|
|
"grad_norm": 0.14444753558592385,
|
|
"learning_rate": 1.3658922660447065e-05,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03851360082626343,
|
|
"step": 1485,
|
|
"valid_targets_mean": 8738.2,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 4.516194331983805,
|
|
"grad_norm": 0.1805172877884244,
|
|
"learning_rate": 1.3515790436273664e-05,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03554951027035713,
|
|
"step": 1490,
|
|
"valid_targets_mean": 8502.8,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 4.531376518218623,
|
|
"grad_norm": 0.14586650107000615,
|
|
"learning_rate": 1.3373028368917091e-05,
|
|
"loss": 0.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039785757660865784,
|
|
"step": 1495,
|
|
"valid_targets_mean": 9104.2,
|
|
"valid_targets_min": 2926
|
|
},
|
|
{
|
|
"epoch": 4.5465587044534415,
|
|
"grad_norm": 0.14582760660986635,
|
|
"learning_rate": 1.3230644608075766e-05,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039004694670438766,
|
|
"step": 1500,
|
|
"valid_targets_mean": 9152.1,
|
|
"valid_targets_min": 4467
|
|
},
|
|
{
|
|
"epoch": 4.5617408906882595,
|
|
"grad_norm": 0.15061910939999357,
|
|
"learning_rate": 1.3088647281852138e-05,
|
|
"loss": 0.1198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041532181203365326,
|
|
"step": 1505,
|
|
"valid_targets_mean": 8585.3,
|
|
"valid_targets_min": 2815
|
|
},
|
|
{
|
|
"epoch": 4.576923076923077,
|
|
"grad_norm": 0.16874033495524854,
|
|
"learning_rate": 1.2947044496288718e-05,
|
|
"loss": 0.122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038819119334220886,
|
|
"step": 1510,
|
|
"valid_targets_mean": 8010.6,
|
|
"valid_targets_min": 2141
|
|
},
|
|
{
|
|
"epoch": 4.592105263157895,
|
|
"grad_norm": 0.14629754081603139,
|
|
"learning_rate": 1.2805844334905286e-05,
|
|
"loss": 0.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037289418280124664,
|
|
"step": 1515,
|
|
"valid_targets_mean": 8595.7,
|
|
"valid_targets_min": 4041
|
|
},
|
|
{
|
|
"epoch": 4.607287449392713,
|
|
"grad_norm": 0.1518190609304727,
|
|
"learning_rate": 1.266505485823748e-05,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03761237859725952,
|
|
"step": 1520,
|
|
"valid_targets_mean": 8275.9,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 4.62246963562753,
|
|
"grad_norm": 0.1557583794946101,
|
|
"learning_rate": 1.2524684103376643e-05,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036553531885147095,
|
|
"step": 1525,
|
|
"valid_targets_mean": 8159.9,
|
|
"valid_targets_min": 3255
|
|
},
|
|
{
|
|
"epoch": 4.637651821862348,
|
|
"grad_norm": 0.14820839625174673,
|
|
"learning_rate": 1.238474008351101e-05,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03886360675096512,
|
|
"step": 1530,
|
|
"valid_targets_mean": 8213.2,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 4.652834008097166,
|
|
"grad_norm": 0.15930696443099474,
|
|
"learning_rate": 1.2245230787468279e-05,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04124082624912262,
|
|
"step": 1535,
|
|
"valid_targets_mean": 9385.9,
|
|
"valid_targets_min": 3903
|
|
},
|
|
{
|
|
"epoch": 4.668016194331984,
|
|
"grad_norm": 0.1758900225432911,
|
|
"learning_rate": 1.210616417925954e-05,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03479640558362007,
|
|
"step": 1540,
|
|
"valid_targets_mean": 8017.1,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 4.683198380566802,
|
|
"grad_norm": 0.15632267623149473,
|
|
"learning_rate": 1.1967548197624679e-05,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04241876304149628,
|
|
"step": 1545,
|
|
"valid_targets_mean": 9148.4,
|
|
"valid_targets_min": 2683
|
|
},
|
|
{
|
|
"epoch": 4.698380566801619,
|
|
"grad_norm": 0.15081345374550376,
|
|
"learning_rate": 1.182939075557915e-05,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03627622500061989,
|
|
"step": 1550,
|
|
"valid_targets_mean": 7729.2,
|
|
"valid_targets_min": 2551
|
|
},
|
|
{
|
|
"epoch": 4.713562753036437,
|
|
"grad_norm": 0.14353319160577724,
|
|
"learning_rate": 1.1691699739962275e-05,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04693248122930527,
|
|
"step": 1555,
|
|
"valid_targets_mean": 9254.6,
|
|
"valid_targets_min": 3327
|
|
},
|
|
{
|
|
"epoch": 4.728744939271255,
|
|
"grad_norm": 0.14527623316996213,
|
|
"learning_rate": 1.1554483010987015e-05,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038654111325740814,
|
|
"step": 1560,
|
|
"valid_targets_mean": 8169.5,
|
|
"valid_targets_min": 4188
|
|
},
|
|
{
|
|
"epoch": 4.743927125506072,
|
|
"grad_norm": 0.1449440866731695,
|
|
"learning_rate": 1.1417748401791261e-05,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04223459213972092,
|
|
"step": 1565,
|
|
"valid_targets_mean": 9472.7,
|
|
"valid_targets_min": 2863
|
|
},
|
|
{
|
|
"epoch": 4.7591093117408905,
|
|
"grad_norm": 0.14274341690157497,
|
|
"learning_rate": 1.1281503717990675e-05,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03809473291039467,
|
|
"step": 1570,
|
|
"valid_targets_mean": 8510.4,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 4.7742914979757085,
|
|
"grad_norm": 0.13432781173789377,
|
|
"learning_rate": 1.1145756737233078e-05,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0386568158864975,
|
|
"step": 1575,
|
|
"valid_targets_mean": 9388.8,
|
|
"valid_targets_min": 5430
|
|
},
|
|
{
|
|
"epoch": 4.7894736842105265,
|
|
"grad_norm": 0.15307205497325405,
|
|
"learning_rate": 1.1010515208754487e-05,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038382697850465775,
|
|
"step": 1580,
|
|
"valid_targets_mean": 8235.3,
|
|
"valid_targets_min": 3600
|
|
},
|
|
{
|
|
"epoch": 4.804655870445345,
|
|
"grad_norm": 0.15035714073438566,
|
|
"learning_rate": 1.087578685293674e-05,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04251728951931,
|
|
"step": 1585,
|
|
"valid_targets_mean": 8914.3,
|
|
"valid_targets_min": 2429
|
|
},
|
|
{
|
|
"epoch": 4.819838056680162,
|
|
"grad_norm": 0.1740143950172111,
|
|
"learning_rate": 1.0741579360866752e-05,
|
|
"loss": 0.1225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04433390498161316,
|
|
"step": 1590,
|
|
"valid_targets_mean": 8687.8,
|
|
"valid_targets_min": 2578
|
|
},
|
|
{
|
|
"epoch": 4.83502024291498,
|
|
"grad_norm": 0.14415125189275932,
|
|
"learning_rate": 1.0607900393897463e-05,
|
|
"loss": 0.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043581925332546234,
|
|
"step": 1595,
|
|
"valid_targets_mean": 9375.7,
|
|
"valid_targets_min": 3546
|
|
},
|
|
{
|
|
"epoch": 4.850202429149798,
|
|
"grad_norm": 0.15238644737473722,
|
|
"learning_rate": 1.0474757583210498e-05,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039840709418058395,
|
|
"step": 1600,
|
|
"valid_targets_mean": 9248.8,
|
|
"valid_targets_min": 2347
|
|
},
|
|
{
|
|
"epoch": 4.865384615384615,
|
|
"grad_norm": 0.14031680438463875,
|
|
"learning_rate": 1.0342158529380544e-05,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041934989392757416,
|
|
"step": 1605,
|
|
"valid_targets_mean": 9779.1,
|
|
"valid_targets_min": 4292
|
|
},
|
|
{
|
|
"epoch": 4.880566801619433,
|
|
"grad_norm": 0.1447525871338929,
|
|
"learning_rate": 1.0210110801941426e-05,
|
|
"loss": 0.1208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04487600177526474,
|
|
"step": 1610,
|
|
"valid_targets_mean": 9244.1,
|
|
"valid_targets_min": 2925
|
|
},
|
|
{
|
|
"epoch": 4.895748987854251,
|
|
"grad_norm": 0.14392030766765268,
|
|
"learning_rate": 1.0078621938954028e-05,
|
|
"loss": 0.1198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041137952357530594,
|
|
"step": 1615,
|
|
"valid_targets_mean": 9484.2,
|
|
"valid_targets_min": 2717
|
|
},
|
|
{
|
|
"epoch": 4.910931174089069,
|
|
"grad_norm": 0.15624483085261517,
|
|
"learning_rate": 9.947699446575951e-06,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04009411484003067,
|
|
"step": 1620,
|
|
"valid_targets_mean": 8797.8,
|
|
"valid_targets_min": 3872
|
|
},
|
|
{
|
|
"epoch": 4.926113360323887,
|
|
"grad_norm": 0.1490815518073885,
|
|
"learning_rate": 9.817350798633068e-06,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04571372643113136,
|
|
"step": 1625,
|
|
"valid_targets_mean": 9223.1,
|
|
"valid_targets_min": 3599
|
|
},
|
|
{
|
|
"epoch": 4.941295546558704,
|
|
"grad_norm": 0.14560061202609845,
|
|
"learning_rate": 9.687583436192785e-06,
|
|
"loss": 0.121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04014992713928223,
|
|
"step": 1630,
|
|
"valid_targets_mean": 9132.6,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 4.956477732793522,
|
|
"grad_norm": 0.14205730178853657,
|
|
"learning_rate": 9.558404767139335e-06,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041506752371788025,
|
|
"step": 1635,
|
|
"valid_targets_mean": 9088.0,
|
|
"valid_targets_min": 2783
|
|
},
|
|
{
|
|
"epoch": 4.97165991902834,
|
|
"grad_norm": 0.1477242935530279,
|
|
"learning_rate": 9.429822165750893e-06,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04207073897123337,
|
|
"step": 1640,
|
|
"valid_targets_mean": 8506.6,
|
|
"valid_targets_min": 2766
|
|
},
|
|
{
|
|
"epoch": 4.9868421052631575,
|
|
"grad_norm": 0.14651036543762613,
|
|
"learning_rate": 9.301842972278557e-06,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03509750962257385,
|
|
"step": 1645,
|
|
"valid_targets_mean": 8407.2,
|
|
"valid_targets_min": 4752
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.2591231885587111,
|
|
"learning_rate": 9.174474492527359e-06,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11598698794841766,
|
|
"step": 1650,
|
|
"valid_targets_mean": 8702.3,
|
|
"valid_targets_min": 3856
|
|
},
|
|
{
|
|
"epoch": 5.015182186234818,
|
|
"grad_norm": 0.14424400311655217,
|
|
"learning_rate": 9.047723997439206e-06,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03879241645336151,
|
|
"step": 1655,
|
|
"valid_targets_mean": 9161.7,
|
|
"valid_targets_min": 3681
|
|
},
|
|
{
|
|
"epoch": 5.030364372469635,
|
|
"grad_norm": 0.14274802218201163,
|
|
"learning_rate": 8.921598722677796e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03961879760026932,
|
|
"step": 1660,
|
|
"valid_targets_mean": 8720.9,
|
|
"valid_targets_min": 2960
|
|
},
|
|
{
|
|
"epoch": 5.045546558704453,
|
|
"grad_norm": 0.13930242960708336,
|
|
"learning_rate": 8.796105868215592e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03404147922992706,
|
|
"step": 1665,
|
|
"valid_targets_mean": 9287.9,
|
|
"valid_targets_min": 2396
|
|
},
|
|
{
|
|
"epoch": 5.060728744939271,
|
|
"grad_norm": 0.14769078278156114,
|
|
"learning_rate": 8.671252597922768e-06,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0380229689180851,
|
|
"step": 1670,
|
|
"valid_targets_mean": 8819.9,
|
|
"valid_targets_min": 4068
|
|
},
|
|
{
|
|
"epoch": 5.075910931174089,
|
|
"grad_norm": 0.15269319550742577,
|
|
"learning_rate": 8.547046039158283e-06,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03661078214645386,
|
|
"step": 1675,
|
|
"valid_targets_mean": 8611.3,
|
|
"valid_targets_min": 4370
|
|
},
|
|
{
|
|
"epoch": 5.0910931174089065,
|
|
"grad_norm": 0.1440779865704462,
|
|
"learning_rate": 8.423493282362982e-06,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04542199522256851,
|
|
"step": 1680,
|
|
"valid_targets_mean": 9269.4,
|
|
"valid_targets_min": 3034
|
|
},
|
|
{
|
|
"epoch": 5.1062753036437245,
|
|
"grad_norm": 0.14966505878927874,
|
|
"learning_rate": 8.300601380654883e-06,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03750111907720566,
|
|
"step": 1685,
|
|
"valid_targets_mean": 8543.4,
|
|
"valid_targets_min": 3265
|
|
},
|
|
{
|
|
"epoch": 5.1214574898785425,
|
|
"grad_norm": 0.13821851007664596,
|
|
"learning_rate": 8.178377349426471e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040841199457645416,
|
|
"step": 1690,
|
|
"valid_targets_mean": 9076.1,
|
|
"valid_targets_min": 2962
|
|
},
|
|
{
|
|
"epoch": 5.136639676113361,
|
|
"grad_norm": 0.1516326462671708,
|
|
"learning_rate": 8.056828165944282e-06,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03998754546046257,
|
|
"step": 1695,
|
|
"valid_targets_mean": 8670.2,
|
|
"valid_targets_min": 3290
|
|
},
|
|
{
|
|
"epoch": 5.151821862348178,
|
|
"grad_norm": 0.14593862868856372,
|
|
"learning_rate": 7.93596076895055e-06,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03843914344906807,
|
|
"step": 1700,
|
|
"valid_targets_mean": 8765.9,
|
|
"valid_targets_min": 3742
|
|
},
|
|
{
|
|
"epoch": 5.167004048582996,
|
|
"grad_norm": 0.14448412348791823,
|
|
"learning_rate": 7.815782058267156e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04122088849544525,
|
|
"step": 1705,
|
|
"valid_targets_mean": 8931.1,
|
|
"valid_targets_min": 4341
|
|
},
|
|
{
|
|
"epoch": 5.182186234817814,
|
|
"grad_norm": 0.14702096384066451,
|
|
"learning_rate": 7.696298894401697e-06,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04166455566883087,
|
|
"step": 1710,
|
|
"valid_targets_mean": 8917.3,
|
|
"valid_targets_min": 5060
|
|
},
|
|
{
|
|
"epoch": 5.197368421052632,
|
|
"grad_norm": 0.1569685954392535,
|
|
"learning_rate": 7.577518098155869e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04449982941150665,
|
|
"step": 1715,
|
|
"valid_targets_mean": 8579.6,
|
|
"valid_targets_min": 2321
|
|
},
|
|
{
|
|
"epoch": 5.212550607287449,
|
|
"grad_norm": 0.14989183376235357,
|
|
"learning_rate": 7.459446450236087e-06,
|
|
"loss": 0.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031897980719804764,
|
|
"step": 1720,
|
|
"valid_targets_mean": 7559.5,
|
|
"valid_targets_min": 2976
|
|
},
|
|
{
|
|
"epoch": 5.227732793522267,
|
|
"grad_norm": 0.15619635661449857,
|
|
"learning_rate": 7.3420906908664345e-06,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03696688264608383,
|
|
"step": 1725,
|
|
"valid_targets_mean": 8058.6,
|
|
"valid_targets_min": 2887
|
|
},
|
|
{
|
|
"epoch": 5.242914979757085,
|
|
"grad_norm": 0.13251186237436371,
|
|
"learning_rate": 7.225457519403838e-06,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03453413397073746,
|
|
"step": 1730,
|
|
"valid_targets_mean": 8878.0,
|
|
"valid_targets_min": 4159
|
|
},
|
|
{
|
|
"epoch": 5.258097165991903,
|
|
"grad_norm": 0.143419093982024,
|
|
"learning_rate": 7.109553593955671e-06,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04014468565583229,
|
|
"step": 1735,
|
|
"valid_targets_mean": 8907.2,
|
|
"valid_targets_min": 5495
|
|
},
|
|
{
|
|
"epoch": 5.27327935222672,
|
|
"grad_norm": 0.15225455347406655,
|
|
"learning_rate": 6.99438553099965e-06,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03962121158838272,
|
|
"step": 1740,
|
|
"valid_targets_mean": 8761.2,
|
|
"valid_targets_min": 3532
|
|
},
|
|
{
|
|
"epoch": 5.288461538461538,
|
|
"grad_norm": 0.14897138541014693,
|
|
"learning_rate": 6.879959905006135e-06,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03960913419723511,
|
|
"step": 1745,
|
|
"valid_targets_mean": 9448.6,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 5.303643724696356,
|
|
"grad_norm": 0.14474707850777266,
|
|
"learning_rate": 6.766283248062817e-06,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04120979458093643,
|
|
"step": 1750,
|
|
"valid_targets_mean": 9467.6,
|
|
"valid_targets_min": 2293
|
|
},
|
|
{
|
|
"epoch": 5.318825910931174,
|
|
"grad_norm": 0.14445893348886882,
|
|
"learning_rate": 6.653362049501826e-06,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03942955285310745,
|
|
"step": 1755,
|
|
"valid_targets_mean": 9189.1,
|
|
"valid_targets_min": 3182
|
|
},
|
|
{
|
|
"epoch": 5.334008097165992,
|
|
"grad_norm": 0.14139641428639302,
|
|
"learning_rate": 6.541202755529299e-06,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03828743100166321,
|
|
"step": 1760,
|
|
"valid_targets_mean": 8188.7,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 5.34919028340081,
|
|
"grad_norm": 0.1585935504275301,
|
|
"learning_rate": 6.429811768857359e-06,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03740239888429642,
|
|
"step": 1765,
|
|
"valid_targets_mean": 7538.2,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 5.364372469635628,
|
|
"grad_norm": 0.13822187722631937,
|
|
"learning_rate": 6.319195448338642e-06,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03545282036066055,
|
|
"step": 1770,
|
|
"valid_targets_mean": 9471.3,
|
|
"valid_targets_min": 2928
|
|
},
|
|
{
|
|
"epoch": 5.379554655870446,
|
|
"grad_norm": 0.18339114162325051,
|
|
"learning_rate": 6.2093601086032754e-06,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039641544222831726,
|
|
"step": 1775,
|
|
"valid_targets_mean": 9020.2,
|
|
"valid_targets_min": 4367
|
|
},
|
|
{
|
|
"epoch": 5.394736842105263,
|
|
"grad_norm": 0.14890247459558204,
|
|
"learning_rate": 6.100312019698411e-06,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03684474527835846,
|
|
"step": 1780,
|
|
"valid_targets_mean": 8096.4,
|
|
"valid_targets_min": 3082
|
|
},
|
|
{
|
|
"epoch": 5.409919028340081,
|
|
"grad_norm": 0.14728618973821408,
|
|
"learning_rate": 5.9920574067303114e-06,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03865164518356323,
|
|
"step": 1785,
|
|
"valid_targets_mean": 8358.6,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 5.425101214574899,
|
|
"grad_norm": 0.19345526736377938,
|
|
"learning_rate": 5.8846024495089425e-06,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03934324160218239,
|
|
"step": 1790,
|
|
"valid_targets_mean": 8059.2,
|
|
"valid_targets_min": 3433
|
|
},
|
|
{
|
|
"epoch": 5.440283400809717,
|
|
"grad_norm": 0.14786136993813478,
|
|
"learning_rate": 5.777953282195228e-06,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039022721350193024,
|
|
"step": 1795,
|
|
"valid_targets_mean": 8380.5,
|
|
"valid_targets_min": 2994
|
|
},
|
|
{
|
|
"epoch": 5.455465587044534,
|
|
"grad_norm": 0.1436482033628107,
|
|
"learning_rate": 5.672115992950855e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03559436276555061,
|
|
"step": 1800,
|
|
"valid_targets_mean": 8211.4,
|
|
"valid_targets_min": 2872
|
|
},
|
|
{
|
|
"epoch": 5.470647773279352,
|
|
"grad_norm": 0.1439814494773858,
|
|
"learning_rate": 5.567096623590758e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03805895149707794,
|
|
"step": 1805,
|
|
"valid_targets_mean": 8432.4,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 5.48582995951417,
|
|
"grad_norm": 0.14488695807970728,
|
|
"learning_rate": 5.462901169238175e-06,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03775247931480408,
|
|
"step": 1810,
|
|
"valid_targets_mean": 8045.4,
|
|
"valid_targets_min": 3451
|
|
},
|
|
{
|
|
"epoch": 5.501012145748988,
|
|
"grad_norm": 0.15441694010046844,
|
|
"learning_rate": 5.359535577982431e-06,
|
|
"loss": 0.1149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03794621676206589,
|
|
"step": 1815,
|
|
"valid_targets_mean": 8607.8,
|
|
"valid_targets_min": 2059
|
|
},
|
|
{
|
|
"epoch": 5.516194331983805,
|
|
"grad_norm": 0.15181708590259155,
|
|
"learning_rate": 5.257005750539388e-06,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04094664752483368,
|
|
"step": 1820,
|
|
"valid_targets_mean": 9457.4,
|
|
"valid_targets_min": 4370
|
|
},
|
|
{
|
|
"epoch": 5.531376518218623,
|
|
"grad_norm": 0.16390213109151072,
|
|
"learning_rate": 5.155317539914601e-06,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041782595217227936,
|
|
"step": 1825,
|
|
"valid_targets_mean": 8889.4,
|
|
"valid_targets_min": 2081
|
|
},
|
|
{
|
|
"epoch": 5.5465587044534415,
|
|
"grad_norm": 0.1424139993649042,
|
|
"learning_rate": 5.054476751069179e-06,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04093995690345764,
|
|
"step": 1830,
|
|
"valid_targets_mean": 8631.6,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 5.5617408906882595,
|
|
"grad_norm": 0.1414334751861775,
|
|
"learning_rate": 4.954489140588412e-06,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037676893174648285,
|
|
"step": 1835,
|
|
"valid_targets_mean": 7567.7,
|
|
"valid_targets_min": 2666
|
|
},
|
|
{
|
|
"epoch": 5.576923076923077,
|
|
"grad_norm": 0.13846615938455498,
|
|
"learning_rate": 4.85536041635315e-06,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035420142114162445,
|
|
"step": 1840,
|
|
"valid_targets_mean": 8916.7,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 5.592105263157895,
|
|
"grad_norm": 0.1427062476382017,
|
|
"learning_rate": 4.757096237213976e-06,
|
|
"loss": 0.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04470570757985115,
|
|
"step": 1845,
|
|
"valid_targets_mean": 9017.0,
|
|
"valid_targets_min": 2887
|
|
},
|
|
{
|
|
"epoch": 5.607287449392713,
|
|
"grad_norm": 0.15070798137106511,
|
|
"learning_rate": 4.659702212668151e-06,
|
|
"loss": 0.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03700145334005356,
|
|
"step": 1850,
|
|
"valid_targets_mean": 8481.5,
|
|
"valid_targets_min": 3582
|
|
},
|
|
{
|
|
"epoch": 5.62246963562753,
|
|
"grad_norm": 0.14110711174121632,
|
|
"learning_rate": 4.5631839025393694e-06,
|
|
"loss": 0.1162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0384586937725544,
|
|
"step": 1855,
|
|
"valid_targets_mean": 8763.9,
|
|
"valid_targets_min": 4639
|
|
},
|
|
{
|
|
"epoch": 5.637651821862348,
|
|
"grad_norm": 0.14912715026621207,
|
|
"learning_rate": 4.467546816660433e-06,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03578153997659683,
|
|
"step": 1860,
|
|
"valid_targets_mean": 8308.3,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 5.652834008097166,
|
|
"grad_norm": 0.1357692940329741,
|
|
"learning_rate": 4.372796414558665e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038578979671001434,
|
|
"step": 1865,
|
|
"valid_targets_mean": 8764.2,
|
|
"valid_targets_min": 2847
|
|
},
|
|
{
|
|
"epoch": 5.668016194331984,
|
|
"grad_norm": 0.1411754045160628,
|
|
"learning_rate": 4.278938105144255e-06,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03638707473874092,
|
|
"step": 1870,
|
|
"valid_targets_mean": 8899.0,
|
|
"valid_targets_min": 2536
|
|
},
|
|
{
|
|
"epoch": 5.683198380566802,
|
|
"grad_norm": 0.14710443981379975,
|
|
"learning_rate": 4.185977246401509e-06,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040743954479694366,
|
|
"step": 1875,
|
|
"valid_targets_mean": 9253.1,
|
|
"valid_targets_min": 1987
|
|
},
|
|
{
|
|
"epoch": 5.698380566801619,
|
|
"grad_norm": 0.14675654339709615,
|
|
"learning_rate": 4.093919145082959e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03777092695236206,
|
|
"step": 1880,
|
|
"valid_targets_mean": 8474.8,
|
|
"valid_targets_min": 3588
|
|
},
|
|
{
|
|
"epoch": 5.713562753036437,
|
|
"grad_norm": 0.1526031297385441,
|
|
"learning_rate": 4.002769056406453e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035124462097883224,
|
|
"step": 1885,
|
|
"valid_targets_mean": 7587.0,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 5.728744939271255,
|
|
"grad_norm": 0.14616344080894714,
|
|
"learning_rate": 3.912532183755115e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042123839259147644,
|
|
"step": 1890,
|
|
"valid_targets_mean": 9098.9,
|
|
"valid_targets_min": 4583
|
|
},
|
|
{
|
|
"epoch": 5.743927125506072,
|
|
"grad_norm": 0.1496472924158553,
|
|
"learning_rate": 3.82321367838034e-06,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0379486158490181,
|
|
"step": 1895,
|
|
"valid_targets_mean": 7507.1,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 5.7591093117408905,
|
|
"grad_norm": 0.1473831270526658,
|
|
"learning_rate": 3.734818639107709e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04747821390628815,
|
|
"step": 1900,
|
|
"valid_targets_mean": 9314.4,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 5.7742914979757085,
|
|
"grad_norm": 0.15284228615456252,
|
|
"learning_rate": 3.647352112045943e-06,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04142976552248001,
|
|
"step": 1905,
|
|
"valid_targets_mean": 8664.0,
|
|
"valid_targets_min": 3029
|
|
},
|
|
{
|
|
"epoch": 5.7894736842105265,
|
|
"grad_norm": 0.13945642627105082,
|
|
"learning_rate": 3.560819090298808e-06,
|
|
"loss": 0.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03512915223836899,
|
|
"step": 1910,
|
|
"valid_targets_mean": 8466.9,
|
|
"valid_targets_min": 3952
|
|
},
|
|
{
|
|
"epoch": 5.804655870445345,
|
|
"grad_norm": 0.1441196458850472,
|
|
"learning_rate": 3.4752245136801065e-06,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03937532380223274,
|
|
"step": 1915,
|
|
"valid_targets_mean": 8712.3,
|
|
"valid_targets_min": 3070
|
|
},
|
|
{
|
|
"epoch": 5.819838056680162,
|
|
"grad_norm": 0.14805464419083436,
|
|
"learning_rate": 3.3905732684316626e-06,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042844273149967194,
|
|
"step": 1920,
|
|
"valid_targets_mean": 8763.1,
|
|
"valid_targets_min": 2590
|
|
},
|
|
{
|
|
"epoch": 5.83502024291498,
|
|
"grad_norm": 0.1480358319690882,
|
|
"learning_rate": 3.3068701869444177e-06,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036682602018117905,
|
|
"step": 1925,
|
|
"valid_targets_mean": 8740.9,
|
|
"valid_targets_min": 4435
|
|
},
|
|
{
|
|
"epoch": 5.850202429149798,
|
|
"grad_norm": 0.1459878867812898,
|
|
"learning_rate": 3.2241200474825307e-06,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03735990822315216,
|
|
"step": 1930,
|
|
"valid_targets_mean": 8422.2,
|
|
"valid_targets_min": 1999
|
|
},
|
|
{
|
|
"epoch": 5.865384615384615,
|
|
"grad_norm": 0.13985608359356416,
|
|
"learning_rate": 3.1423275739106353e-06,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039207570254802704,
|
|
"step": 1935,
|
|
"valid_targets_mean": 8200.9,
|
|
"valid_targets_min": 1866
|
|
},
|
|
{
|
|
"epoch": 5.880566801619433,
|
|
"grad_norm": 0.143373016362057,
|
|
"learning_rate": 3.0614974354241547e-06,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03678063675761223,
|
|
"step": 1940,
|
|
"valid_targets_mean": 7849.2,
|
|
"valid_targets_min": 3093
|
|
},
|
|
{
|
|
"epoch": 5.895748987854251,
|
|
"grad_norm": 0.16871177013021846,
|
|
"learning_rate": 2.9816342462827806e-06,
|
|
"loss": 0.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04940785467624664,
|
|
"step": 1945,
|
|
"valid_targets_mean": 10040.4,
|
|
"valid_targets_min": 4380
|
|
},
|
|
{
|
|
"epoch": 5.910931174089069,
|
|
"grad_norm": 0.14822822828188056,
|
|
"learning_rate": 2.9027425655470366e-06,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03873114287853241,
|
|
"step": 1950,
|
|
"valid_targets_mean": 8875.3,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 5.926113360323887,
|
|
"grad_norm": 0.14512102842997385,
|
|
"learning_rate": 2.824826896818036e-06,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03738601505756378,
|
|
"step": 1955,
|
|
"valid_targets_mean": 9014.9,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 5.941295546558704,
|
|
"grad_norm": 0.1497478643983066,
|
|
"learning_rate": 2.747891687980384e-06,
|
|
"loss": 0.1218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039003290235996246,
|
|
"step": 1960,
|
|
"valid_targets_mean": 8714.8,
|
|
"valid_targets_min": 3442
|
|
},
|
|
{
|
|
"epoch": 5.956477732793522,
|
|
"grad_norm": 0.163417446658834,
|
|
"learning_rate": 2.6719413309482843e-06,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03898151218891144,
|
|
"step": 1965,
|
|
"valid_targets_mean": 8736.3,
|
|
"valid_targets_min": 3420
|
|
},
|
|
{
|
|
"epoch": 5.97165991902834,
|
|
"grad_norm": 0.16046320194223443,
|
|
"learning_rate": 2.5969801614147838e-06,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03890128806233406,
|
|
"step": 1970,
|
|
"valid_targets_mean": 8541.1,
|
|
"valid_targets_min": 2956
|
|
},
|
|
{
|
|
"epoch": 5.9868421052631575,
|
|
"grad_norm": 0.14081229862869707,
|
|
"learning_rate": 2.5230124586043016e-06,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039920657873153687,
|
|
"step": 1975,
|
|
"valid_targets_mean": 9111.5,
|
|
"valid_targets_min": 1749
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.2210745792297546,
|
|
"learning_rate": 2.45004244502834e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11022559553384781,
|
|
"step": 1980,
|
|
"valid_targets_mean": 9550.6,
|
|
"valid_targets_min": 3039
|
|
},
|
|
{
|
|
"epoch": 6.015182186234818,
|
|
"grad_norm": 0.15866018349146954,
|
|
"learning_rate": 2.3780742862444205e-06,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036943186074495316,
|
|
"step": 1985,
|
|
"valid_targets_mean": 7901.8,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 6.030364372469635,
|
|
"grad_norm": 0.14551510211747842,
|
|
"learning_rate": 2.3071120906183064e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036335233598947525,
|
|
"step": 1990,
|
|
"valid_targets_mean": 8818.8,
|
|
"valid_targets_min": 3430
|
|
},
|
|
{
|
|
"epoch": 6.045546558704453,
|
|
"grad_norm": 0.14601887358413895,
|
|
"learning_rate": 2.237159909089468e-06,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042120106518268585,
|
|
"step": 1995,
|
|
"valid_targets_mean": 8899.8,
|
|
"valid_targets_min": 4659
|
|
},
|
|
{
|
|
"epoch": 6.060728744939271,
|
|
"grad_norm": 0.14673314248006084,
|
|
"learning_rate": 2.168221734939824e-06,
|
|
"loss": 0.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041596632450819016,
|
|
"step": 2000,
|
|
"valid_targets_mean": 8668.5,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 6.075910931174089,
|
|
"grad_norm": 0.14826330146432706,
|
|
"learning_rate": 2.1003015035658024e-06,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03817155212163925,
|
|
"step": 2005,
|
|
"valid_targets_mean": 8258.6,
|
|
"valid_targets_min": 1642
|
|
},
|
|
{
|
|
"epoch": 6.0910931174089065,
|
|
"grad_norm": 0.14880537215799367,
|
|
"learning_rate": 2.0334030922536606e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03734733909368515,
|
|
"step": 2010,
|
|
"valid_targets_mean": 8594.4,
|
|
"valid_targets_min": 2538
|
|
},
|
|
{
|
|
"epoch": 6.1062753036437245,
|
|
"grad_norm": 0.13914311607492816,
|
|
"learning_rate": 1.9675303199581554e-06,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03844892233610153,
|
|
"step": 2015,
|
|
"valid_targets_mean": 8857.8,
|
|
"valid_targets_min": 4382
|
|
},
|
|
{
|
|
"epoch": 6.1214574898785425,
|
|
"grad_norm": 0.13340896702047678,
|
|
"learning_rate": 1.90268694708454e-06,
|
|
"loss": 0.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039715059101581573,
|
|
"step": 2020,
|
|
"valid_targets_mean": 9329.3,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 6.136639676113361,
|
|
"grad_norm": 0.14607399803779483,
|
|
"learning_rate": 1.8388766752739017e-06,
|
|
"loss": 0.1198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03926191106438637,
|
|
"step": 2025,
|
|
"valid_targets_mean": 9367.1,
|
|
"valid_targets_min": 4886
|
|
},
|
|
{
|
|
"epoch": 6.151821862348178,
|
|
"grad_norm": 0.14583823007727012,
|
|
"learning_rate": 1.77610314719183e-06,
|
|
"loss": 0.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039285898208618164,
|
|
"step": 2030,
|
|
"valid_targets_mean": 8590.9,
|
|
"valid_targets_min": 2843
|
|
},
|
|
{
|
|
"epoch": 6.167004048582996,
|
|
"grad_norm": 0.15365261639107758,
|
|
"learning_rate": 1.7143699463204932e-06,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04441247880458832,
|
|
"step": 2035,
|
|
"valid_targets_mean": 8972.8,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 6.182186234817814,
|
|
"grad_norm": 0.14025226551505982,
|
|
"learning_rate": 1.6536805967540614e-06,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03822282701730728,
|
|
"step": 2040,
|
|
"valid_targets_mean": 9076.2,
|
|
"valid_targets_min": 5418
|
|
},
|
|
{
|
|
"epoch": 6.197368421052632,
|
|
"grad_norm": 0.14302235970328148,
|
|
"learning_rate": 1.5940385629975353e-06,
|
|
"loss": 0.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04065093398094177,
|
|
"step": 2045,
|
|
"valid_targets_mean": 9012.9,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 6.212550607287449,
|
|
"grad_norm": 0.1402140426132668,
|
|
"learning_rate": 1.535447249768971e-06,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03799460828304291,
|
|
"step": 2050,
|
|
"valid_targets_mean": 8516.2,
|
|
"valid_targets_min": 4839
|
|
},
|
|
{
|
|
"epoch": 6.227732793522267,
|
|
"grad_norm": 0.1496430988528578,
|
|
"learning_rate": 1.4779100018051118e-06,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035870231688022614,
|
|
"step": 2055,
|
|
"valid_targets_mean": 8151.8,
|
|
"valid_targets_min": 2477
|
|
},
|
|
{
|
|
"epoch": 6.242914979757085,
|
|
"grad_norm": 0.13748325962148478,
|
|
"learning_rate": 1.421430103670456e-06,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032935213297605515,
|
|
"step": 2060,
|
|
"valid_targets_mean": 7886.8,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 6.258097165991903,
|
|
"grad_norm": 0.14435602092002336,
|
|
"learning_rate": 1.366010779569764e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042122870683670044,
|
|
"step": 2065,
|
|
"valid_targets_mean": 9228.5,
|
|
"valid_targets_min": 2102
|
|
},
|
|
{
|
|
"epoch": 6.27327935222672,
|
|
"grad_norm": 0.14141621542931476,
|
|
"learning_rate": 1.3116551931639899e-06,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04121895879507065,
|
|
"step": 2070,
|
|
"valid_targets_mean": 9679.3,
|
|
"valid_targets_min": 3515
|
|
},
|
|
{
|
|
"epoch": 6.288461538461538,
|
|
"grad_norm": 0.13897354438983578,
|
|
"learning_rate": 1.258366447389674e-06,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04003419727087021,
|
|
"step": 2075,
|
|
"valid_targets_mean": 9406.4,
|
|
"valid_targets_min": 2588
|
|
},
|
|
{
|
|
"epoch": 6.303643724696356,
|
|
"grad_norm": 0.14005263662113093,
|
|
"learning_rate": 1.2061475842818337e-06,
|
|
"loss": 0.1152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03755183890461922,
|
|
"step": 2080,
|
|
"valid_targets_mean": 9020.0,
|
|
"valid_targets_min": 2909
|
|
},
|
|
{
|
|
"epoch": 6.318825910931174,
|
|
"grad_norm": 0.14395477563618123,
|
|
"learning_rate": 1.1550015848002816e-06,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039493974298238754,
|
|
"step": 2085,
|
|
"valid_targets_mean": 8563.3,
|
|
"valid_targets_min": 1999
|
|
},
|
|
{
|
|
"epoch": 6.334008097165992,
|
|
"grad_norm": 0.15997282788284062,
|
|
"learning_rate": 1.1049313686594675e-06,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036930982023477554,
|
|
"step": 2090,
|
|
"valid_targets_mean": 8724.4,
|
|
"valid_targets_min": 3133
|
|
},
|
|
{
|
|
"epoch": 6.34919028340081,
|
|
"grad_norm": 0.147846780125884,
|
|
"learning_rate": 1.0559397941618022e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04172082617878914,
|
|
"step": 2095,
|
|
"valid_targets_mean": 8694.1,
|
|
"valid_targets_min": 3642
|
|
},
|
|
{
|
|
"epoch": 6.364372469635628,
|
|
"grad_norm": 0.1439043433212688,
|
|
"learning_rate": 1.008029658034484e-06,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03991272673010826,
|
|
"step": 2100,
|
|
"valid_targets_mean": 9369.8,
|
|
"valid_targets_min": 4291
|
|
},
|
|
{
|
|
"epoch": 6.379554655870446,
|
|
"grad_norm": 0.1450788633369987,
|
|
"learning_rate": 9.61203695269859e-07,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037730589509010315,
|
|
"step": 2105,
|
|
"valid_targets_mean": 9330.0,
|
|
"valid_targets_min": 2463
|
|
},
|
|
{
|
|
"epoch": 6.394736842105263,
|
|
"grad_norm": 0.14281052508020034,
|
|
"learning_rate": 9.154645789692718e-07,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040253590792417526,
|
|
"step": 2110,
|
|
"valid_targets_mean": 9340.1,
|
|
"valid_targets_min": 4819
|
|
},
|
|
{
|
|
"epoch": 6.409919028340081,
|
|
"grad_norm": 0.14581750032864477,
|
|
"learning_rate": 8.708149201904814e-07,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042613834142684937,
|
|
"step": 2115,
|
|
"valid_targets_mean": 8733.5,
|
|
"valid_targets_min": 2421
|
|
},
|
|
{
|
|
"epoch": 6.425101214574899,
|
|
"grad_norm": 0.14768979692577996,
|
|
"learning_rate": 8.272572677986001e-07,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03527721390128136,
|
|
"step": 2120,
|
|
"valid_targets_mean": 7520.8,
|
|
"valid_targets_min": 4338
|
|
},
|
|
{
|
|
"epoch": 6.440283400809717,
|
|
"grad_norm": 0.15392151001329257,
|
|
"learning_rate": 7.847941083206057e-07,
|
|
"loss": 0.1185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040631502866744995,
|
|
"step": 2125,
|
|
"valid_targets_mean": 8336.2,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 6.455465587044534,
|
|
"grad_norm": 0.14732462646369482,
|
|
"learning_rate": 7.4342786580337e-07,
|
|
"loss": 0.1134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036711059510707855,
|
|
"step": 2130,
|
|
"valid_targets_mean": 8752.2,
|
|
"valid_targets_min": 2650
|
|
},
|
|
{
|
|
"epoch": 6.470647773279352,
|
|
"grad_norm": 0.14033227704969123,
|
|
"learning_rate": 7.031609016753016e-07,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0404055193066597,
|
|
"step": 2135,
|
|
"valid_targets_mean": 8999.9,
|
|
"valid_targets_min": 2663
|
|
},
|
|
{
|
|
"epoch": 6.48582995951417,
|
|
"grad_norm": 0.13771854262371436,
|
|
"learning_rate": 6.639955146115284e-07,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03602340444922447,
|
|
"step": 2140,
|
|
"valid_targets_mean": 9227.4,
|
|
"valid_targets_min": 4684
|
|
},
|
|
{
|
|
"epoch": 6.501012145748988,
|
|
"grad_norm": 0.1504219902367466,
|
|
"learning_rate": 6.259339404026876e-07,
|
|
"loss": 0.1139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03943486139178276,
|
|
"step": 2145,
|
|
"valid_targets_mean": 8011.0,
|
|
"valid_targets_min": 2156
|
|
},
|
|
{
|
|
"epoch": 6.516194331983805,
|
|
"grad_norm": 0.13775586752383376,
|
|
"learning_rate": 5.889783518272785e-07,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038126520812511444,
|
|
"step": 2150,
|
|
"valid_targets_mean": 8704.8,
|
|
"valid_targets_min": 2688
|
|
},
|
|
{
|
|
"epoch": 6.531376518218623,
|
|
"grad_norm": 0.14075337609993224,
|
|
"learning_rate": 5.53130858527644e-07,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03781341761350632,
|
|
"step": 2155,
|
|
"valid_targets_mean": 8278.8,
|
|
"valid_targets_min": 2725
|
|
},
|
|
{
|
|
"epoch": 6.5465587044534415,
|
|
"grad_norm": 0.14994946760854388,
|
|
"learning_rate": 5.183935068895207e-07,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04326803982257843,
|
|
"step": 2160,
|
|
"valid_targets_mean": 8562.2,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 6.5617408906882595,
|
|
"grad_norm": 0.14852709170653972,
|
|
"learning_rate": 4.847682799252474e-07,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038237787783145905,
|
|
"step": 2165,
|
|
"valid_targets_mean": 8814.0,
|
|
"valid_targets_min": 2796
|
|
},
|
|
{
|
|
"epoch": 6.576923076923077,
|
|
"grad_norm": 0.1343038610254666,
|
|
"learning_rate": 4.522570971605289e-07,
|
|
"loss": 0.1157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0384284108877182,
|
|
"step": 2170,
|
|
"valid_targets_mean": 8814.6,
|
|
"valid_targets_min": 4286
|
|
},
|
|
{
|
|
"epoch": 6.592105263157895,
|
|
"grad_norm": 0.14998964028070333,
|
|
"learning_rate": 4.208618145248866e-07,
|
|
"loss": 0.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036478783935308456,
|
|
"step": 2175,
|
|
"valid_targets_mean": 8182.2,
|
|
"valid_targets_min": 3648
|
|
},
|
|
{
|
|
"epoch": 6.607287449392713,
|
|
"grad_norm": 0.14885078466327517,
|
|
"learning_rate": 3.9058422424568923e-07,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03634038195014,
|
|
"step": 2180,
|
|
"valid_targets_mean": 7734.0,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 6.62246963562753,
|
|
"grad_norm": 0.1299995767983947,
|
|
"learning_rate": 3.614260547458659e-07,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0391826368868351,
|
|
"step": 2185,
|
|
"valid_targets_mean": 9233.7,
|
|
"valid_targets_min": 2760
|
|
},
|
|
{
|
|
"epoch": 6.637651821862348,
|
|
"grad_norm": 0.1435265838060437,
|
|
"learning_rate": 3.3338897054521205e-07,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03954654186964035,
|
|
"step": 2190,
|
|
"valid_targets_mean": 9451.9,
|
|
"valid_targets_min": 3895
|
|
},
|
|
{
|
|
"epoch": 6.652834008097166,
|
|
"grad_norm": 0.15041269542984378,
|
|
"learning_rate": 3.0647457216538724e-07,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041305407881736755,
|
|
"step": 2195,
|
|
"valid_targets_mean": 9684.6,
|
|
"valid_targets_min": 4726
|
|
},
|
|
{
|
|
"epoch": 6.668016194331984,
|
|
"grad_norm": 0.1460484338066276,
|
|
"learning_rate": 2.8068439603853747e-07,
|
|
"loss": 0.1134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037806905806064606,
|
|
"step": 2200,
|
|
"valid_targets_mean": 8496.2,
|
|
"valid_targets_min": 2340
|
|
},
|
|
{
|
|
"epoch": 6.683198380566802,
|
|
"grad_norm": 0.14393604436119115,
|
|
"learning_rate": 2.5601991441959407e-07,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034557752311229706,
|
|
"step": 2205,
|
|
"valid_targets_mean": 8299.8,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 6.698380566801619,
|
|
"grad_norm": 0.13342831432735433,
|
|
"learning_rate": 2.3248253530222753e-07,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03726230189204216,
|
|
"step": 2210,
|
|
"valid_targets_mean": 9063.8,
|
|
"valid_targets_min": 2926
|
|
},
|
|
{
|
|
"epoch": 6.713562753036437,
|
|
"grad_norm": 0.14136924533036044,
|
|
"learning_rate": 2.1007360233846308e-07,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040094614028930664,
|
|
"step": 2215,
|
|
"valid_targets_mean": 8874.2,
|
|
"valid_targets_min": 3116
|
|
},
|
|
{
|
|
"epoch": 6.728744939271255,
|
|
"grad_norm": 0.15175094628388122,
|
|
"learning_rate": 1.8879439476198636e-07,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04260304570198059,
|
|
"step": 2220,
|
|
"valid_targets_mean": 8778.2,
|
|
"valid_targets_min": 2697
|
|
},
|
|
{
|
|
"epoch": 6.743927125506072,
|
|
"grad_norm": 0.16581135416231693,
|
|
"learning_rate": 1.6864612731511298e-07,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03748450428247452,
|
|
"step": 2225,
|
|
"valid_targets_mean": 8330.2,
|
|
"valid_targets_min": 3663
|
|
},
|
|
{
|
|
"epoch": 6.7591093117408905,
|
|
"grad_norm": 0.1457538660749771,
|
|
"learning_rate": 1.4962995017944626e-07,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038353487849235535,
|
|
"step": 2230,
|
|
"valid_targets_mean": 8465.8,
|
|
"valid_targets_min": 2814
|
|
},
|
|
{
|
|
"epoch": 6.7742914979757085,
|
|
"grad_norm": 0.13983082647684608,
|
|
"learning_rate": 1.3174694891021188e-07,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03419236093759537,
|
|
"step": 2235,
|
|
"valid_targets_mean": 8212.3,
|
|
"valid_targets_min": 2747
|
|
},
|
|
{
|
|
"epoch": 6.7894736842105265,
|
|
"grad_norm": 0.14058623756776067,
|
|
"learning_rate": 1.1499814437429869e-07,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039577312767505646,
|
|
"step": 2240,
|
|
"valid_targets_mean": 8556.0,
|
|
"valid_targets_min": 2873
|
|
},
|
|
{
|
|
"epoch": 6.804655870445345,
|
|
"grad_norm": 0.1443007110257819,
|
|
"learning_rate": 9.938449269197181e-08,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04069925472140312,
|
|
"step": 2245,
|
|
"valid_targets_mean": 8928.5,
|
|
"valid_targets_min": 5279
|
|
},
|
|
{
|
|
"epoch": 6.819838056680162,
|
|
"grad_norm": 0.13950837051052306,
|
|
"learning_rate": 8.490688518229651e-08,
|
|
"loss": 0.1127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03460806980729103,
|
|
"step": 2250,
|
|
"valid_targets_mean": 8512.6,
|
|
"valid_targets_min": 2554
|
|
},
|
|
{
|
|
"epoch": 6.83502024291498,
|
|
"grad_norm": 0.13863311502969372,
|
|
"learning_rate": 7.156614831225428e-08,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036703385412693024,
|
|
"step": 2255,
|
|
"valid_targets_mean": 9018.9,
|
|
"valid_targets_min": 4465
|
|
},
|
|
{
|
|
"epoch": 6.850202429149798,
|
|
"grad_norm": 0.1472552455035362,
|
|
"learning_rate": 5.936304364956513e-08,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04302659258246422,
|
|
"step": 2260,
|
|
"valid_targets_mean": 8520.2,
|
|
"valid_targets_min": 4862
|
|
},
|
|
{
|
|
"epoch": 6.865384615384615,
|
|
"grad_norm": 0.1505878900047233,
|
|
"learning_rate": 4.829826781921343e-08,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03354855254292488,
|
|
"step": 2265,
|
|
"valid_targets_mean": 7919.0,
|
|
"valid_targets_min": 3115
|
|
},
|
|
{
|
|
"epoch": 6.880566801619433,
|
|
"grad_norm": 0.14705429455875768,
|
|
"learning_rate": 3.837245246367749e-08,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04106744006276131,
|
|
"step": 2270,
|
|
"valid_targets_mean": 8678.0,
|
|
"valid_targets_min": 1749
|
|
},
|
|
{
|
|
"epoch": 6.895748987854251,
|
|
"grad_norm": 0.1488725076801424,
|
|
"learning_rate": 2.958616420687177e-08,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041926369071006775,
|
|
"step": 2275,
|
|
"valid_targets_mean": 9183.1,
|
|
"valid_targets_min": 2663
|
|
},
|
|
{
|
|
"epoch": 6.910931174089069,
|
|
"grad_norm": 0.14639756472083776,
|
|
"learning_rate": 2.1939904621806062e-08,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036773331463336945,
|
|
"step": 2280,
|
|
"valid_targets_mean": 7727.3,
|
|
"valid_targets_min": 2425
|
|
},
|
|
{
|
|
"epoch": 6.926113360323887,
|
|
"grad_norm": 0.13722893129141453,
|
|
"learning_rate": 1.5434110201946184e-08,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03645988181233406,
|
|
"step": 2285,
|
|
"valid_targets_mean": 8755.3,
|
|
"valid_targets_min": 3392
|
|
},
|
|
{
|
|
"epoch": 6.941295546558704,
|
|
"grad_norm": 0.14030306447897334,
|
|
"learning_rate": 1.006915233629835e-08,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03784427419304848,
|
|
"step": 2290,
|
|
"valid_targets_mean": 8643.1,
|
|
"valid_targets_min": 2583
|
|
},
|
|
{
|
|
"epoch": 6.956477732793522,
|
|
"grad_norm": 0.15274489980120057,
|
|
"learning_rate": 5.845337288210573e-09,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04250333458185196,
|
|
"step": 2295,
|
|
"valid_targets_mean": 8507.6,
|
|
"valid_targets_min": 2692
|
|
},
|
|
{
|
|
"epoch": 6.97165991902834,
|
|
"grad_norm": 0.1490629627414723,
|
|
"learning_rate": 2.7629061778866597e-09,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03616956248879433,
|
|
"step": 2300,
|
|
"valid_targets_mean": 8145.3,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 6.9868421052631575,
|
|
"grad_norm": 0.14434890154092997,
|
|
"learning_rate": 8.220349686216545e-10,
|
|
"loss": 0.1134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0341312438249588,
|
|
"step": 2305,
|
|
"valid_targets_mean": 8061.8,
|
|
"valid_targets_min": 3124
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.24837721626019138,
|
|
"learning_rate": 2.2834456763209944e-11,
|
|
"loss": 0.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11622576415538788,
|
|
"step": 2310,
|
|
"valid_targets_mean": 8488.6,
|
|
"valid_targets_min": 2998
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11622576415538788,
|
|
"step": 2310,
|
|
"total_flos": 1.7174429464038212e+19,
|
|
"train_loss": 0.05696848721215219,
|
|
"train_runtime": 34728.8752,
|
|
"train_samples_per_second": 6.369,
|
|
"train_steps_per_second": 0.067,
|
|
"valid_targets_mean": 8488.6,
|
|
"valid_targets_min": 2998
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 2310,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.7174429464038212e+19,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|