5126 lines
143 KiB
JSON
5126 lines
143 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 2310,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.015182186234817813,
|
||
|
|
"grad_norm": 7.44463643677737,
|
||
|
|
"learning_rate": 6.926406926406927e-07,
|
||
|
|
"loss": 0.3843,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13057151436805725,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 4714.4,
|
||
|
|
"valid_targets_min": 2106
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.030364372469635626,
|
||
|
|
"grad_norm": 5.885597927087539,
|
||
|
|
"learning_rate": 1.5584415584415584e-06,
|
||
|
|
"loss": 0.3659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10937661677598953,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 3845.9,
|
||
|
|
"valid_targets_min": 1829
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04554655870445344,
|
||
|
|
"grad_norm": 4.849821169984242,
|
||
|
|
"learning_rate": 2.4242424242424244e-06,
|
||
|
|
"loss": 0.3588,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1189289391040802,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 3509.7,
|
||
|
|
"valid_targets_min": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06072874493927125,
|
||
|
|
"grad_norm": 2.445594080062749,
|
||
|
|
"learning_rate": 3.2900432900432905e-06,
|
||
|
|
"loss": 0.3206,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10257112979888916,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 4497.1,
|
||
|
|
"valid_targets_min": 1716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07591093117408906,
|
||
|
|
"grad_norm": 2.04806536433542,
|
||
|
|
"learning_rate": 4.155844155844157e-06,
|
||
|
|
"loss": 0.319,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10601706802845001,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 2807.7,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09109311740890688,
|
||
|
|
"grad_norm": 1.102078489917235,
|
||
|
|
"learning_rate": 5.021645021645022e-06,
|
||
|
|
"loss": 0.2854,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09575830399990082,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 3731.5,
|
||
|
|
"valid_targets_min": 1799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1062753036437247,
|
||
|
|
"grad_norm": 0.7210631909204307,
|
||
|
|
"learning_rate": 5.887445887445888e-06,
|
||
|
|
"loss": 0.2563,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07700647413730621,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 3727.4,
|
||
|
|
"valid_targets_min": 1884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1214574898785425,
|
||
|
|
"grad_norm": 0.6136169452953454,
|
||
|
|
"learning_rate": 6.753246753246754e-06,
|
||
|
|
"loss": 0.2445,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07860705256462097,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 3958.4,
|
||
|
|
"valid_targets_min": 1907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13663967611336034,
|
||
|
|
"grad_norm": 0.557208275932916,
|
||
|
|
"learning_rate": 7.61904761904762e-06,
|
||
|
|
"loss": 0.2336,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07819276303052902,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 3577.2,
|
||
|
|
"valid_targets_min": 1914
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15182186234817813,
|
||
|
|
"grad_norm": 0.37681570655861857,
|
||
|
|
"learning_rate": 8.484848484848486e-06,
|
||
|
|
"loss": 0.2085,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06455516070127487,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 3796.2,
|
||
|
|
"valid_targets_min": 2089
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16700404858299595,
|
||
|
|
"grad_norm": 0.42024733164150413,
|
||
|
|
"learning_rate": 9.350649350649352e-06,
|
||
|
|
"loss": 0.1907,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.059568047523498535,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 4283.2,
|
||
|
|
"valid_targets_min": 1982
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18218623481781376,
|
||
|
|
"grad_norm": 0.24400900427892094,
|
||
|
|
"learning_rate": 1.0216450216450218e-05,
|
||
|
|
"loss": 0.1762,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.054281093180179596,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 3749.1,
|
||
|
|
"valid_targets_min": 1799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19736842105263158,
|
||
|
|
"grad_norm": 0.21636536656120997,
|
||
|
|
"learning_rate": 1.1082251082251083e-05,
|
||
|
|
"loss": 0.1681,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04884766787290573,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 3822.7,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2125506072874494,
|
||
|
|
"grad_norm": 0.19461575822555846,
|
||
|
|
"learning_rate": 1.1948051948051949e-05,
|
||
|
|
"loss": 0.1635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.059515006840229034,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 4578.4,
|
||
|
|
"valid_targets_min": 2087
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22773279352226722,
|
||
|
|
"grad_norm": 0.18381921577304394,
|
||
|
|
"learning_rate": 1.2813852813852813e-05,
|
||
|
|
"loss": 0.1582,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.051513925194740295,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 4356.1,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.242914979757085,
|
||
|
|
"grad_norm": 0.19980206948240578,
|
||
|
|
"learning_rate": 1.3679653679653682e-05,
|
||
|
|
"loss": 0.1562,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.057043835520744324,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 3675.0,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25809716599190285,
|
||
|
|
"grad_norm": 0.17713117301286477,
|
||
|
|
"learning_rate": 1.4545454545454546e-05,
|
||
|
|
"loss": 0.1534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04782434552907944,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 3828.6,
|
||
|
|
"valid_targets_min": 1709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2732793522267207,
|
||
|
|
"grad_norm": 0.18331587500572735,
|
||
|
|
"learning_rate": 1.5411255411255414e-05,
|
||
|
|
"loss": 0.1553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05902521684765816,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 3919.8,
|
||
|
|
"valid_targets_min": 1982
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28846153846153844,
|
||
|
|
"grad_norm": 0.1663357939699184,
|
||
|
|
"learning_rate": 1.6277056277056278e-05,
|
||
|
|
"loss": 0.1357,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04300123453140259,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 3938.0,
|
||
|
|
"valid_targets_min": 1918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30364372469635625,
|
||
|
|
"grad_norm": 0.17993383204515867,
|
||
|
|
"learning_rate": 1.7142857142857142e-05,
|
||
|
|
"loss": 0.1429,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04914538189768791,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 3767.9,
|
||
|
|
"valid_targets_min": 1906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3188259109311741,
|
||
|
|
"grad_norm": 0.18706330726994613,
|
||
|
|
"learning_rate": 1.800865800865801e-05,
|
||
|
|
"loss": 0.1355,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03939981013536453,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 3263.9,
|
||
|
|
"valid_targets_min": 1662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3340080971659919,
|
||
|
|
"grad_norm": 0.20485705866440299,
|
||
|
|
"learning_rate": 1.8874458874458877e-05,
|
||
|
|
"loss": 0.1294,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04176958650350571,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 3164.8,
|
||
|
|
"valid_targets_min": 1361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3491902834008097,
|
||
|
|
"grad_norm": 0.1872857476102004,
|
||
|
|
"learning_rate": 1.974025974025974e-05,
|
||
|
|
"loss": 0.1295,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04393088445067406,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 3204.0,
|
||
|
|
"valid_targets_min": 1951
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3643724696356275,
|
||
|
|
"grad_norm": 0.213295341704759,
|
||
|
|
"learning_rate": 2.0606060606060608e-05,
|
||
|
|
"loss": 0.1303,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04451216757297516,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 3489.1,
|
||
|
|
"valid_targets_min": 1662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37955465587044535,
|
||
|
|
"grad_norm": 0.17978009684841045,
|
||
|
|
"learning_rate": 2.1471861471861476e-05,
|
||
|
|
"loss": 0.1233,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03368481248617172,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 2989.6,
|
||
|
|
"valid_targets_min": 1981
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39473684210526316,
|
||
|
|
"grad_norm": 0.16896121925279073,
|
||
|
|
"learning_rate": 2.233766233766234e-05,
|
||
|
|
"loss": 0.1235,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0424782931804657,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 4682.2,
|
||
|
|
"valid_targets_min": 1652
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.409919028340081,
|
||
|
|
"grad_norm": 0.17808671936739895,
|
||
|
|
"learning_rate": 2.3203463203463207e-05,
|
||
|
|
"loss": 0.1209,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04030633717775345,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 5853.7,
|
||
|
|
"valid_targets_min": 1517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4251012145748988,
|
||
|
|
"grad_norm": 0.2088097013917014,
|
||
|
|
"learning_rate": 2.4069264069264074e-05,
|
||
|
|
"loss": 0.123,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04398595541715622,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 3755.6,
|
||
|
|
"valid_targets_min": 1786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4402834008097166,
|
||
|
|
"grad_norm": 0.2032024872108984,
|
||
|
|
"learning_rate": 2.4935064935064935e-05,
|
||
|
|
"loss": 0.1163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.032414473593235016,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 3964.7,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45546558704453444,
|
||
|
|
"grad_norm": 0.23298211994316195,
|
||
|
|
"learning_rate": 2.5800865800865803e-05,
|
||
|
|
"loss": 0.1192,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.044113077223300934,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 2901.4,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4706477732793522,
|
||
|
|
"grad_norm": 0.17747994714750667,
|
||
|
|
"learning_rate": 2.6666666666666667e-05,
|
||
|
|
"loss": 0.1194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.039363015443086624,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 4395.2,
|
||
|
|
"valid_targets_min": 1858
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48582995951417,
|
||
|
|
"grad_norm": 0.2080929242521209,
|
||
|
|
"learning_rate": 2.7532467532467534e-05,
|
||
|
|
"loss": 0.1139,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03759466111660004,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 3544.7,
|
||
|
|
"valid_targets_min": 2025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5010121457489879,
|
||
|
|
"grad_norm": 0.19441683682780456,
|
||
|
|
"learning_rate": 2.83982683982684e-05,
|
||
|
|
"loss": 0.1135,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03790004551410675,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 3318.6,
|
||
|
|
"valid_targets_min": 1709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5161943319838057,
|
||
|
|
"grad_norm": 0.19003889668840618,
|
||
|
|
"learning_rate": 2.9264069264069265e-05,
|
||
|
|
"loss": 0.114,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03792513906955719,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 3784.6,
|
||
|
|
"valid_targets_min": 1877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5313765182186235,
|
||
|
|
"grad_norm": 0.20511490682778752,
|
||
|
|
"learning_rate": 3.0129870129870133e-05,
|
||
|
|
"loss": 0.1078,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03073268011212349,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 4622.6,
|
||
|
|
"valid_targets_min": 1924
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5465587044534413,
|
||
|
|
"grad_norm": 0.19191993021400966,
|
||
|
|
"learning_rate": 3.0995670995671e-05,
|
||
|
|
"loss": 0.1071,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.031047457829117775,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 4335.8,
|
||
|
|
"valid_targets_min": 1884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5617408906882592,
|
||
|
|
"grad_norm": 0.23109861383640815,
|
||
|
|
"learning_rate": 3.1861471861471864e-05,
|
||
|
|
"loss": 0.1118,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04437777027487755,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 3850.8,
|
||
|
|
"valid_targets_min": 1517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5769230769230769,
|
||
|
|
"grad_norm": 0.2286514640578093,
|
||
|
|
"learning_rate": 3.272727272727273e-05,
|
||
|
|
"loss": 0.105,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03371407464146614,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 4441.3,
|
||
|
|
"valid_targets_min": 1940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5921052631578947,
|
||
|
|
"grad_norm": 0.20823718864627255,
|
||
|
|
"learning_rate": 3.359307359307359e-05,
|
||
|
|
"loss": 0.1036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03502805531024933,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 3621.6,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6072874493927125,
|
||
|
|
"grad_norm": 0.21654220245549002,
|
||
|
|
"learning_rate": 3.445887445887446e-05,
|
||
|
|
"loss": 0.104,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03596603125333786,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 3383.7,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6224696356275303,
|
||
|
|
"grad_norm": 0.26685429533699434,
|
||
|
|
"learning_rate": 3.532467532467533e-05,
|
||
|
|
"loss": 0.1069,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0359579399228096,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 3611.7,
|
||
|
|
"valid_targets_min": 1763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6376518218623481,
|
||
|
|
"grad_norm": 0.2594409949935294,
|
||
|
|
"learning_rate": 3.6190476190476195e-05,
|
||
|
|
"loss": 0.101,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03977029025554657,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 3533.5,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.652834008097166,
|
||
|
|
"grad_norm": 0.21010131737465074,
|
||
|
|
"learning_rate": 3.705627705627706e-05,
|
||
|
|
"loss": 0.0997,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.036513783037662506,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 3618.1,
|
||
|
|
"valid_targets_min": 1909
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6680161943319838,
|
||
|
|
"grad_norm": 0.23506507997333073,
|
||
|
|
"learning_rate": 3.792207792207792e-05,
|
||
|
|
"loss": 0.0971,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03473879396915436,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 4144.3,
|
||
|
|
"valid_targets_min": 1705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6831983805668016,
|
||
|
|
"grad_norm": 0.22120984855480424,
|
||
|
|
"learning_rate": 3.878787878787879e-05,
|
||
|
|
"loss": 0.0964,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0331815741956234,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 3167.5,
|
||
|
|
"valid_targets_min": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6983805668016194,
|
||
|
|
"grad_norm": 0.25988340878886085,
|
||
|
|
"learning_rate": 3.965367965367966e-05,
|
||
|
|
"loss": 0.0926,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.028744719922542572,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 4077.3,
|
||
|
|
"valid_targets_min": 1799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7135627530364372,
|
||
|
|
"grad_norm": 0.2198707685293358,
|
||
|
|
"learning_rate": 3.999979449020199e-05,
|
||
|
|
"loss": 0.0949,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.027942223474383354,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 3489.4,
|
||
|
|
"valid_targets_min": 2089
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.728744939271255,
|
||
|
|
"grad_norm": 0.2512013331681874,
|
||
|
|
"learning_rate": 3.99985386122866e-05,
|
||
|
|
"loss": 0.0918,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03290600702166557,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 3947.7,
|
||
|
|
"valid_targets_min": 1982
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7439271255060729,
|
||
|
|
"grad_norm": 0.3080227691249006,
|
||
|
|
"learning_rate": 3.999614110017182e-05,
|
||
|
|
"loss": 0.0911,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03256411850452423,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 3425.2,
|
||
|
|
"valid_targets_min": 1763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7591093117408907,
|
||
|
|
"grad_norm": 0.24376380120538735,
|
||
|
|
"learning_rate": 3.999260209072175e-05,
|
||
|
|
"loss": 0.0902,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.033616803586483,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 4074.4,
|
||
|
|
"valid_targets_min": 1802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7742914979757085,
|
||
|
|
"grad_norm": 0.26393343539237596,
|
||
|
|
"learning_rate": 3.998792178596384e-05,
|
||
|
|
"loss": 0.0884,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.031163375824689865,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 3201.4,
|
||
|
|
"valid_targets_min": 1771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7894736842105263,
|
||
|
|
"grad_norm": 0.2950071263104927,
|
||
|
|
"learning_rate": 3.998210045307744e-05,
|
||
|
|
"loss": 0.0831,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.028338339179754257,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 2662.2,
|
||
|
|
"valid_targets_min": 1614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8046558704453441,
|
||
|
|
"grad_norm": 0.2498074801573284,
|
||
|
|
"learning_rate": 3.997513842437845e-05,
|
||
|
|
"loss": 0.0866,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02717544138431549,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 3514.4,
|
||
|
|
"valid_targets_min": 1858
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.819838056680162,
|
||
|
|
"grad_norm": 0.23032972865701806,
|
||
|
|
"learning_rate": 3.996703609730042e-05,
|
||
|
|
"loss": 0.0808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.025840098038315773,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 4206.3,
|
||
|
|
"valid_targets_min": 1716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8350202429149798,
|
||
|
|
"grad_norm": 0.24366277463581495,
|
||
|
|
"learning_rate": 3.995779393437183e-05,
|
||
|
|
"loss": 0.0813,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.028123484924435616,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 4017.1,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8502024291497976,
|
||
|
|
"grad_norm": 0.24048544757846063,
|
||
|
|
"learning_rate": 3.99474124631897e-05,
|
||
|
|
"loss": 0.0758,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.025402957573533058,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 3661.7,
|
||
|
|
"valid_targets_min": 1709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8653846153846154,
|
||
|
|
"grad_norm": 0.25608041435536016,
|
||
|
|
"learning_rate": 3.993589227638944e-05,
|
||
|
|
"loss": 0.0791,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.026522433385252953,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 3941.0,
|
||
|
|
"valid_targets_min": 1906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8805668016194332,
|
||
|
|
"grad_norm": 0.22710504614212065,
|
||
|
|
"learning_rate": 3.992323403161109e-05,
|
||
|
|
"loss": 0.0763,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.023328838869929314,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 3859.6,
|
||
|
|
"valid_targets_min": 2009
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.895748987854251,
|
||
|
|
"grad_norm": 0.2654629959385941,
|
||
|
|
"learning_rate": 3.9909438451461695e-05,
|
||
|
|
"loss": 0.0745,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02832880988717079,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 4143.8,
|
||
|
|
"valid_targets_min": 1914
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9109311740890689,
|
||
|
|
"grad_norm": 0.29183154303619374,
|
||
|
|
"learning_rate": 3.989450632347411e-05,
|
||
|
|
"loss": 0.0734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.022790387272834778,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 3484.5,
|
||
|
|
"valid_targets_min": 1921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9261133603238867,
|
||
|
|
"grad_norm": 0.3566532221776508,
|
||
|
|
"learning_rate": 3.987843850006201e-05,
|
||
|
|
"loss": 0.0712,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.022190725430846214,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 2891.0,
|
||
|
|
"valid_targets_min": 1662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9412955465587044,
|
||
|
|
"grad_norm": 0.26674310703985965,
|
||
|
|
"learning_rate": 3.9861235898471265e-05,
|
||
|
|
"loss": 0.0729,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.023799803107976913,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 3956.6,
|
||
|
|
"valid_targets_min": 1833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9564777327935222,
|
||
|
|
"grad_norm": 0.29199745257891796,
|
||
|
|
"learning_rate": 3.984289950072754e-05,
|
||
|
|
"loss": 0.0713,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02254338748753071,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 4054.1,
|
||
|
|
"valid_targets_min": 1914
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.97165991902834,
|
||
|
|
"grad_norm": 0.2608782949115974,
|
||
|
|
"learning_rate": 3.982343035358026e-05,
|
||
|
|
"loss": 0.0651,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.020145319402217865,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 3363.0,
|
||
|
|
"valid_targets_min": 1952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9868421052631579,
|
||
|
|
"grad_norm": 0.2605029425084141,
|
||
|
|
"learning_rate": 3.980282956844284e-05,
|
||
|
|
"loss": 0.0698,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.021091826260089874,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 4190.8,
|
||
|
|
"valid_targets_min": 2033
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 0.49958698750589686,
|
||
|
|
"learning_rate": 3.9781098321329266e-05,
|
||
|
|
"loss": 0.0643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06208954006433487,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 3570.5,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0151821862348178,
|
||
|
|
"grad_norm": 0.3517614757355847,
|
||
|
|
"learning_rate": 3.975823785278691e-05,
|
||
|
|
"loss": 0.0621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.020536350086331367,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 3419.4,
|
||
|
|
"valid_targets_min": 1951
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0303643724696356,
|
||
|
|
"grad_norm": 0.2854228478643194,
|
||
|
|
"learning_rate": 3.973424946782578e-05,
|
||
|
|
"loss": 0.0598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.017223788425326347,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 3528.8,
|
||
|
|
"valid_targets_min": 1868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0455465587044535,
|
||
|
|
"grad_norm": 0.3233091980289824,
|
||
|
|
"learning_rate": 3.970913453584397e-05,
|
||
|
|
"loss": 0.059,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02106652781367302,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 2958.7,
|
||
|
|
"valid_targets_min": 1361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0607287449392713,
|
||
|
|
"grad_norm": 0.3064728717802903,
|
||
|
|
"learning_rate": 3.9682894490549485e-05,
|
||
|
|
"loss": 0.0571,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.017787862569093704,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 3791.8,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.075910931174089,
|
||
|
|
"grad_norm": 0.3550747006437259,
|
||
|
|
"learning_rate": 3.965553082987846e-05,
|
||
|
|
"loss": 0.0577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01955043524503708,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 3638.7,
|
||
|
|
"valid_targets_min": 2175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.091093117408907,
|
||
|
|
"grad_norm": 0.3595844929237117,
|
||
|
|
"learning_rate": 3.9627045115909556e-05,
|
||
|
|
"loss": 0.0529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.021266572177410126,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 3870.2,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1062753036437247,
|
||
|
|
"grad_norm": 0.35674131793904656,
|
||
|
|
"learning_rate": 3.959743897477486e-05,
|
||
|
|
"loss": 0.0555,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.019414305686950684,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 3224.4,
|
||
|
|
"valid_targets_min": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1214574898785425,
|
||
|
|
"grad_norm": 0.33354777284849,
|
||
|
|
"learning_rate": 3.9566714096567035e-05,
|
||
|
|
"loss": 0.0507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01832713559269905,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 2999.0,
|
||
|
|
"valid_targets_min": 2026
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1366396761133604,
|
||
|
|
"grad_norm": 0.3572142082641998,
|
||
|
|
"learning_rate": 3.953487223524283e-05,
|
||
|
|
"loss": 0.0497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01831241324543953,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 3691.7,
|
||
|
|
"valid_targets_min": 1984
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1518218623481782,
|
||
|
|
"grad_norm": 0.29755826461303286,
|
||
|
|
"learning_rate": 3.950191520852294e-05,
|
||
|
|
"loss": 0.0472,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01382131315767765,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 4103.9,
|
||
|
|
"valid_targets_min": 1614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.167004048582996,
|
||
|
|
"grad_norm": 0.30999054365851664,
|
||
|
|
"learning_rate": 3.94678448977883e-05,
|
||
|
|
"loss": 0.048,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.015520155429840088,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 4121.5,
|
||
|
|
"valid_targets_min": 1972
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1821862348178138,
|
||
|
|
"grad_norm": 0.3249382615221146,
|
||
|
|
"learning_rate": 3.9432663247972614e-05,
|
||
|
|
"loss": 0.0446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.015577755868434906,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 4134.5,
|
||
|
|
"valid_targets_min": 1970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1973684210526316,
|
||
|
|
"grad_norm": 0.3390863501155492,
|
||
|
|
"learning_rate": 3.9396372267451356e-05,
|
||
|
|
"loss": 0.0415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01336907222867012,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 3892.3,
|
||
|
|
"valid_targets_min": 1909
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2125506072874495,
|
||
|
|
"grad_norm": 0.32830844847848906,
|
||
|
|
"learning_rate": 3.935897402792713e-05,
|
||
|
|
"loss": 0.0426,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.013108298182487488,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 3761.9,
|
||
|
|
"valid_targets_min": 1915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2277327935222673,
|
||
|
|
"grad_norm": 0.27327239189649566,
|
||
|
|
"learning_rate": 3.93204706643114e-05,
|
||
|
|
"loss": 0.0397,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.012908820062875748,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 5056.4,
|
||
|
|
"valid_targets_min": 2001
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.242914979757085,
|
||
|
|
"grad_norm": 0.38210126886564383,
|
||
|
|
"learning_rate": 3.92808643746026e-05,
|
||
|
|
"loss": 0.0408,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.013885008171200752,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 3872.5,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.258097165991903,
|
||
|
|
"grad_norm": 0.3706267351498937,
|
||
|
|
"learning_rate": 3.924015741976069e-05,
|
||
|
|
"loss": 0.0419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.016143832355737686,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 3473.5,
|
||
|
|
"valid_targets_min": 1716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2732793522267207,
|
||
|
|
"grad_norm": 0.37411564814204534,
|
||
|
|
"learning_rate": 3.919835212357805e-05,
|
||
|
|
"loss": 0.0379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.011693507432937622,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 3039.6,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2884615384615383,
|
||
|
|
"grad_norm": 0.3738071501361852,
|
||
|
|
"learning_rate": 3.915545087254685e-05,
|
||
|
|
"loss": 0.0363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.012262335047125816,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 3385.7,
|
||
|
|
"valid_targets_min": 1847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3036437246963564,
|
||
|
|
"grad_norm": 0.3576938080986102,
|
||
|
|
"learning_rate": 3.911145611572282e-05,
|
||
|
|
"loss": 0.0365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.011721448972821236,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 3911.8,
|
||
|
|
"valid_targets_min": 1950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.318825910931174,
|
||
|
|
"grad_norm": 0.3801696139828887,
|
||
|
|
"learning_rate": 3.906637036458541e-05,
|
||
|
|
"loss": 0.0364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.012942207977175713,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 4149.7,
|
||
|
|
"valid_targets_min": 1836
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.334008097165992,
|
||
|
|
"grad_norm": 0.42154049142005257,
|
||
|
|
"learning_rate": 3.902019619289446e-05,
|
||
|
|
"loss": 0.0332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.010295730084180832,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 3260.5,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3491902834008096,
|
||
|
|
"grad_norm": 0.34325697993861265,
|
||
|
|
"learning_rate": 3.897293623654325e-05,
|
||
|
|
"loss": 0.032,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01096697524189949,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 4510.7,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3643724696356276,
|
||
|
|
"grad_norm": 0.3393654204009086,
|
||
|
|
"learning_rate": 3.892459319340802e-05,
|
||
|
|
"loss": 0.0315,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0076165408827364445,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 4055.2,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3795546558704452,
|
||
|
|
"grad_norm": 0.3178749236155793,
|
||
|
|
"learning_rate": 3.8875169823194e-05,
|
||
|
|
"loss": 0.0328,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008760586380958557,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 4131.2,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3947368421052633,
|
||
|
|
"grad_norm": 0.35459393594679645,
|
||
|
|
"learning_rate": 3.882466894727784e-05,
|
||
|
|
"loss": 0.029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.011605681851506233,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 4701.0,
|
||
|
|
"valid_targets_min": 2136
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4099190283400809,
|
||
|
|
"grad_norm": 0.3554147810860507,
|
||
|
|
"learning_rate": 3.8773093448546525e-05,
|
||
|
|
"loss": 0.0305,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.009093903936445713,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 4877.9,
|
||
|
|
"valid_targets_min": 1993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.425101214574899,
|
||
|
|
"grad_norm": 0.3322981382434928,
|
||
|
|
"learning_rate": 3.872044627123285e-05,
|
||
|
|
"loss": 0.0277,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008347099646925926,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 3889.0,
|
||
|
|
"valid_targets_min": 1933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4402834008097165,
|
||
|
|
"grad_norm": 0.3586832841082899,
|
||
|
|
"learning_rate": 3.8666730420747336e-05,
|
||
|
|
"loss": 0.0252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008482735604047775,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 5741.5,
|
||
|
|
"valid_targets_min": 1719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4554655870445345,
|
||
|
|
"grad_norm": 0.3449683621618516,
|
||
|
|
"learning_rate": 3.861194896350664e-05,
|
||
|
|
"loss": 0.025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.007174181751906872,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 3971.8,
|
||
|
|
"valid_targets_min": 2054
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4706477732793521,
|
||
|
|
"grad_norm": 0.38172194791195513,
|
||
|
|
"learning_rate": 3.855610502675851e-05,
|
||
|
|
"loss": 0.0258,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008220924995839596,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 3225.4,
|
||
|
|
"valid_targets_min": 1763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.48582995951417,
|
||
|
|
"grad_norm": 0.39468747059493126,
|
||
|
|
"learning_rate": 3.8499201798403303e-05,
|
||
|
|
"loss": 0.0237,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008082335814833641,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 3134.7,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5010121457489878,
|
||
|
|
"grad_norm": 0.38510105990532767,
|
||
|
|
"learning_rate": 3.8441242526811934e-05,
|
||
|
|
"loss": 0.0242,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.007228832691907883,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 3831.7,
|
||
|
|
"valid_targets_min": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5161943319838058,
|
||
|
|
"grad_norm": 0.36977732484318715,
|
||
|
|
"learning_rate": 3.8382230520640506e-05,
|
||
|
|
"loss": 0.0227,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008262425661087036,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 2815.1,
|
||
|
|
"valid_targets_min": 1662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5313765182186234,
|
||
|
|
"grad_norm": 0.35357810495611536,
|
||
|
|
"learning_rate": 3.832216914864139e-05,
|
||
|
|
"loss": 0.0224,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005848760716617107,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 4798.8,
|
||
|
|
"valid_targets_min": 1891
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5465587044534415,
|
||
|
|
"grad_norm": 0.3904998911339273,
|
||
|
|
"learning_rate": 3.826106183947095e-05,
|
||
|
|
"loss": 0.0224,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.006712470203638077,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 3039.0,
|
||
|
|
"valid_targets_min": 1857
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.561740890688259,
|
||
|
|
"grad_norm": 0.4129800687052208,
|
||
|
|
"learning_rate": 3.8198912081493735e-05,
|
||
|
|
"loss": 0.0199,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.006589945405721664,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 4029.6,
|
||
|
|
"valid_targets_min": 1361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5769230769230769,
|
||
|
|
"grad_norm": 0.3340998683272901,
|
||
|
|
"learning_rate": 3.813572342258349e-05,
|
||
|
|
"loss": 0.0216,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008358299732208252,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 5110.7,
|
||
|
|
"valid_targets_min": 1988
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5921052631578947,
|
||
|
|
"grad_norm": 0.33745329083462733,
|
||
|
|
"learning_rate": 3.807149946992047e-05,
|
||
|
|
"loss": 0.0185,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005512586794793606,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 3048.0,
|
||
|
|
"valid_targets_min": 1662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6072874493927125,
|
||
|
|
"grad_norm": 0.312750093757099,
|
||
|
|
"learning_rate": 3.800624388978561e-05,
|
||
|
|
"loss": 0.0198,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00563475489616394,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 3952.0,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6224696356275303,
|
||
|
|
"grad_norm": 0.3507969684421235,
|
||
|
|
"learning_rate": 3.793996040735119e-05,
|
||
|
|
"loss": 0.0198,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.006162258796393871,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 4687.3,
|
||
|
|
"valid_targets_min": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6376518218623481,
|
||
|
|
"grad_norm": 0.3978767585489112,
|
||
|
|
"learning_rate": 3.787265280646825e-05,
|
||
|
|
"loss": 0.0181,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0057417117059230804,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 3631.1,
|
||
|
|
"valid_targets_min": 2011
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.652834008097166,
|
||
|
|
"grad_norm": 0.3989031119123343,
|
||
|
|
"learning_rate": 3.7804324929450465e-05,
|
||
|
|
"loss": 0.0177,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005967521108686924,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 2856.7,
|
||
|
|
"valid_targets_min": 1966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6680161943319838,
|
||
|
|
"grad_norm": 0.3348844681723015,
|
||
|
|
"learning_rate": 3.7734980676854916e-05,
|
||
|
|
"loss": 0.0144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005094584077596664,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 4242.0,
|
||
|
|
"valid_targets_min": 1719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6831983805668016,
|
||
|
|
"grad_norm": 0.4032708111959252,
|
||
|
|
"learning_rate": 3.7664624007259375e-05,
|
||
|
|
"loss": 0.0161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0055177537724375725,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 4124.0,
|
||
|
|
"valid_targets_min": 2062
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6983805668016194,
|
||
|
|
"grad_norm": 0.4266538721171362,
|
||
|
|
"learning_rate": 3.759325893703631e-05,
|
||
|
|
"loss": 0.0153,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.007654810324311256,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 3496.0,
|
||
|
|
"valid_targets_min": 2161
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7135627530364372,
|
||
|
|
"grad_norm": 0.3869133942682408,
|
||
|
|
"learning_rate": 3.752088954012366e-05,
|
||
|
|
"loss": 0.0154,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005498781334608793,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 3297.1,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.728744939271255,
|
||
|
|
"grad_norm": 0.43044241562549973,
|
||
|
|
"learning_rate": 3.74475199477922e-05,
|
||
|
|
"loss": 0.0142,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00495559349656105,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 3406.3,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7439271255060729,
|
||
|
|
"grad_norm": 0.34397533392514673,
|
||
|
|
"learning_rate": 3.7373154348409775e-05,
|
||
|
|
"loss": 0.0138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0035975920036435127,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 3701.0,
|
||
|
|
"valid_targets_min": 1751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7591093117408907,
|
||
|
|
"grad_norm": 0.43393397037535875,
|
||
|
|
"learning_rate": 3.729779698720215e-05,
|
||
|
|
"loss": 0.0137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.004775308538228273,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 3627.1,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7742914979757085,
|
||
|
|
"grad_norm": 0.3800998644305244,
|
||
|
|
"learning_rate": 3.7221452166010704e-05,
|
||
|
|
"loss": 0.0137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005272631533443928,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 3924.4,
|
||
|
|
"valid_targets_min": 1928
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7894736842105263,
|
||
|
|
"grad_norm": 0.3443164029133719,
|
||
|
|
"learning_rate": 3.7144124243046815e-05,
|
||
|
|
"loss": 0.013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0038471673615276814,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 5039.9,
|
||
|
|
"valid_targets_min": 1716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8046558704453441,
|
||
|
|
"grad_norm": 0.350417365467524,
|
||
|
|
"learning_rate": 3.7065817632643115e-05,
|
||
|
|
"loss": 0.0123,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.003794080577790737,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 4666.0,
|
||
|
|
"valid_targets_min": 1614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.819838056680162,
|
||
|
|
"grad_norm": 0.36417396659912826,
|
||
|
|
"learning_rate": 3.6986536805001466e-05,
|
||
|
|
"loss": 0.0115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.003392170649021864,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 3548.5,
|
||
|
|
"valid_targets_min": 1705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8350202429149798,
|
||
|
|
"grad_norm": 0.36140932437576206,
|
||
|
|
"learning_rate": 3.690628628593777e-05,
|
||
|
|
"loss": 0.0115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.003543621161952615,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 4250.8,
|
||
|
|
"valid_targets_min": 1987
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8502024291497976,
|
||
|
|
"grad_norm": 0.3849250151627738,
|
||
|
|
"learning_rate": 3.6825070656623626e-05,
|
||
|
|
"loss": 0.0109,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.003462968859821558,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 4138.9,
|
||
|
|
"valid_targets_min": 1786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8653846153846154,
|
||
|
|
"grad_norm": 0.4065519993338767,
|
||
|
|
"learning_rate": 3.67428945533248e-05,
|
||
|
|
"loss": 0.0107,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.003528381697833538,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 3596.1,
|
||
|
|
"valid_targets_min": 1915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8805668016194332,
|
||
|
|
"grad_norm": 0.39709724005253444,
|
||
|
|
"learning_rate": 3.6659762667136567e-05,
|
||
|
|
"loss": 0.0104,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.004830116406083107,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 3995.3,
|
||
|
|
"valid_targets_min": 1771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.895748987854251,
|
||
|
|
"grad_norm": 0.43135250156903715,
|
||
|
|
"learning_rate": 3.65756797437159e-05,
|
||
|
|
"loss": 0.0107,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00334871094673872,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 4311.0,
|
||
|
|
"valid_targets_min": 1952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9109311740890689,
|
||
|
|
"grad_norm": 0.41559937795001983,
|
||
|
|
"learning_rate": 3.6490650583010585e-05,
|
||
|
|
"loss": 0.0105,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002513420768082142,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 3494.0,
|
||
|
|
"valid_targets_min": 1933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9261133603238867,
|
||
|
|
"grad_norm": 0.3191353293129526,
|
||
|
|
"learning_rate": 3.640468003898518e-05,
|
||
|
|
"loss": 0.0092,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0022717821411788464,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 4398.0,
|
||
|
|
"valid_targets_min": 2108
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9412955465587043,
|
||
|
|
"grad_norm": 0.3615731065002591,
|
||
|
|
"learning_rate": 3.631777301934395e-05,
|
||
|
|
"loss": 0.0102,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002962145721539855,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 3165.5,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9564777327935223,
|
||
|
|
"grad_norm": 0.3574760313813989,
|
||
|
|
"learning_rate": 3.6229934485250684e-05,
|
||
|
|
"loss": 0.0088,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002793194493278861,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 4149.3,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.97165991902834,
|
||
|
|
"grad_norm": 0.3198872124954838,
|
||
|
|
"learning_rate": 3.6141169451045526e-05,
|
||
|
|
"loss": 0.009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002392456866800785,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 3438.7,
|
||
|
|
"valid_targets_min": 1799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.986842105263158,
|
||
|
|
"grad_norm": 0.36040642033041,
|
||
|
|
"learning_rate": 3.605148298395865e-05,
|
||
|
|
"loss": 0.0084,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002848510630428791,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 4353.1,
|
||
|
|
"valid_targets_min": 2250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 0.46690595438771804,
|
||
|
|
"learning_rate": 3.5960880203821086e-05,
|
||
|
|
"loss": 0.0074,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005791595205664635,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 5059.7,
|
||
|
|
"valid_targets_min": 2012
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0151821862348176,
|
||
|
|
"grad_norm": 0.3086961534141033,
|
||
|
|
"learning_rate": 3.5869366282772354e-05,
|
||
|
|
"loss": 0.0068,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0027070892974734306,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 4679.2,
|
||
|
|
"valid_targets_min": 1914
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0303643724696356,
|
||
|
|
"grad_norm": 0.5141555939134816,
|
||
|
|
"learning_rate": 3.577694644496529e-05,
|
||
|
|
"loss": 0.0068,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002436819253489375,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 4227.5,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0455465587044532,
|
||
|
|
"grad_norm": 0.43908242627038974,
|
||
|
|
"learning_rate": 3.568362596626778e-05,
|
||
|
|
"loss": 0.0073,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0031463452614843845,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 2652.4,
|
||
|
|
"valid_targets_min": 1908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0607287449392713,
|
||
|
|
"grad_norm": 0.3110008922775847,
|
||
|
|
"learning_rate": 3.558941017396158e-05,
|
||
|
|
"loss": 0.0067,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002493108157068491,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 3305.4,
|
||
|
|
"valid_targets_min": 1838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.075910931174089,
|
||
|
|
"grad_norm": 0.38436476625887644,
|
||
|
|
"learning_rate": 3.549430444643821e-05,
|
||
|
|
"loss": 0.0069,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0021933787502348423,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 4065.9,
|
||
|
|
"valid_targets_min": 1361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.091093117408907,
|
||
|
|
"grad_norm": 0.2967727514035273,
|
||
|
|
"learning_rate": 3.539831421289195e-05,
|
||
|
|
"loss": 0.0063,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001930978032760322,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 3370.7,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1062753036437245,
|
||
|
|
"grad_norm": 0.30290776522140095,
|
||
|
|
"learning_rate": 3.530144495300984e-05,
|
||
|
|
"loss": 0.0065,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0023562649730592966,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 3699.5,
|
||
|
|
"valid_targets_min": 1889
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1214574898785425,
|
||
|
|
"grad_norm": 0.351534713475806,
|
||
|
|
"learning_rate": 3.520370219665897e-05,
|
||
|
|
"loss": 0.0062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002899039536714554,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 3886.0,
|
||
|
|
"valid_targets_min": 1709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.13663967611336,
|
||
|
|
"grad_norm": 0.3563510200016387,
|
||
|
|
"learning_rate": 3.51050915235707e-05,
|
||
|
|
"loss": 0.0062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0015795058570802212,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 3066.1,
|
||
|
|
"valid_targets_min": 1914
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.151821862348178,
|
||
|
|
"grad_norm": 0.3546551147431412,
|
||
|
|
"learning_rate": 3.500561856302221e-05,
|
||
|
|
"loss": 0.0051,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0022271571215242147,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 3339.6,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.167004048582996,
|
||
|
|
"grad_norm": 0.35262458466544816,
|
||
|
|
"learning_rate": 3.49052889935151e-05,
|
||
|
|
"loss": 0.0061,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002120173303410411,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 4100.4,
|
||
|
|
"valid_targets_min": 1917
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.182186234817814,
|
||
|
|
"grad_norm": 0.3293340860049528,
|
||
|
|
"learning_rate": 3.480410854245125e-05,
|
||
|
|
"loss": 0.0057,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0014285575598478317,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 3649.8,
|
||
|
|
"valid_targets_min": 1833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1973684210526314,
|
||
|
|
"grad_norm": 0.5061960834389405,
|
||
|
|
"learning_rate": 3.470208298580589e-05,
|
||
|
|
"loss": 0.005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0015443963930010796,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 3069.6,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2125506072874495,
|
||
|
|
"grad_norm": 0.4309051225343975,
|
||
|
|
"learning_rate": 3.459921814779781e-05,
|
||
|
|
"loss": 0.0058,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0017398251220583916,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 3375.6,
|
||
|
|
"valid_targets_min": 1662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.227732793522267,
|
||
|
|
"grad_norm": 0.3077097538983566,
|
||
|
|
"learning_rate": 3.449551990055694e-05,
|
||
|
|
"loss": 0.0059,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0018000556156039238,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 3955.1,
|
||
|
|
"valid_targets_min": 1952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.242914979757085,
|
||
|
|
"grad_norm": 0.30156612197970506,
|
||
|
|
"learning_rate": 3.43909941637891e-05,
|
||
|
|
"loss": 0.0047,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001549935550428927,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 4175.6,
|
||
|
|
"valid_targets_min": 1799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2580971659919027,
|
||
|
|
"grad_norm": 0.3303990159285708,
|
||
|
|
"learning_rate": 3.428564690443807e-05,
|
||
|
|
"loss": 0.005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00195104512386024,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 3796.0,
|
||
|
|
"valid_targets_min": 1972
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2732793522267207,
|
||
|
|
"grad_norm": 0.3550032784559833,
|
||
|
|
"learning_rate": 3.4179484136345e-05,
|
||
|
|
"loss": 0.0051,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0022843745537102222,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 3809.5,
|
||
|
|
"valid_targets_min": 2012
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2884615384615383,
|
||
|
|
"grad_norm": 0.32069183968575876,
|
||
|
|
"learning_rate": 3.4072511919905055e-05,
|
||
|
|
"loss": 0.0048,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0013017680030316114,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 3084.6,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3036437246963564,
|
||
|
|
"grad_norm": 0.3034076767029701,
|
||
|
|
"learning_rate": 3.396473636172146e-05,
|
||
|
|
"loss": 0.0053,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0012294766493141651,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 3970.5,
|
||
|
|
"valid_targets_min": 2022
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.318825910931174,
|
||
|
|
"grad_norm": 0.2846591620683624,
|
||
|
|
"learning_rate": 3.385616361425694e-05,
|
||
|
|
"loss": 0.0042,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0012074895203113556,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 3346.2,
|
||
|
|
"valid_targets_min": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.334008097165992,
|
||
|
|
"grad_norm": 0.3098011660093303,
|
||
|
|
"learning_rate": 3.374679987548245e-05,
|
||
|
|
"loss": 0.0047,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0016755054239183664,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 3709.0,
|
||
|
|
"valid_targets_min": 1868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3491902834008096,
|
||
|
|
"grad_norm": 0.33684540313817013,
|
||
|
|
"learning_rate": 3.363665138852339e-05,
|
||
|
|
"loss": 0.0047,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0017445024568587542,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 3343.7,
|
||
|
|
"valid_targets_min": 1716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3643724696356276,
|
||
|
|
"grad_norm": 0.28841752638713786,
|
||
|
|
"learning_rate": 3.3525724441303206e-05,
|
||
|
|
"loss": 0.0045,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0013562100939452648,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 4165.9,
|
||
|
|
"valid_targets_min": 1891
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3795546558704452,
|
||
|
|
"grad_norm": 0.31441583198110107,
|
||
|
|
"learning_rate": 3.341402536618442e-05,
|
||
|
|
"loss": 0.0047,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0013484981609508395,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 3558.7,
|
||
|
|
"valid_targets_min": 1652
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3947368421052633,
|
||
|
|
"grad_norm": 0.2949036404882296,
|
||
|
|
"learning_rate": 3.3301560539607174e-05,
|
||
|
|
"loss": 0.0043,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010312916710972786,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 3842.0,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.409919028340081,
|
||
|
|
"grad_norm": 0.2620839599941604,
|
||
|
|
"learning_rate": 3.318833638172519e-05,
|
||
|
|
"loss": 0.0042,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0012249585706740618,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 4623.7,
|
||
|
|
"valid_targets_min": 1709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.425101214574899,
|
||
|
|
"grad_norm": 0.3800432042851446,
|
||
|
|
"learning_rate": 3.3074359356039306e-05,
|
||
|
|
"loss": 0.0041,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0013209573226049542,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 4689.0,
|
||
|
|
"valid_targets_min": 2002
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4402834008097165,
|
||
|
|
"grad_norm": 0.29825430303257133,
|
||
|
|
"learning_rate": 3.295963596902846e-05,
|
||
|
|
"loss": 0.0038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010712643852457404,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 3675.2,
|
||
|
|
"valid_targets_min": 1763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4554655870445345,
|
||
|
|
"grad_norm": 0.3549713889298078,
|
||
|
|
"learning_rate": 3.284417276977829e-05,
|
||
|
|
"loss": 0.0039,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0020012641325592995,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 3737.3,
|
||
|
|
"valid_targets_min": 2033
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.470647773279352,
|
||
|
|
"grad_norm": 0.2860627611811797,
|
||
|
|
"learning_rate": 3.2727976349607276e-05,
|
||
|
|
"loss": 0.0036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0011668192455545068,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 3223.5,
|
||
|
|
"valid_targets_min": 1884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.48582995951417,
|
||
|
|
"grad_norm": 0.4171167140004683,
|
||
|
|
"learning_rate": 3.261105334169045e-05,
|
||
|
|
"loss": 0.004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001440704334527254,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 3412.6,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.501012145748988,
|
||
|
|
"grad_norm": 0.2657891511887698,
|
||
|
|
"learning_rate": 3.249341042068077e-05,
|
||
|
|
"loss": 0.0034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001034251181408763,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 3558.2,
|
||
|
|
"valid_targets_min": 1940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.516194331983806,
|
||
|
|
"grad_norm": 0.30654961251081514,
|
||
|
|
"learning_rate": 3.237505430232803e-05,
|
||
|
|
"loss": 0.0034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010008191457018256,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 3850.9,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5313765182186234,
|
||
|
|
"grad_norm": 0.4077917075540302,
|
||
|
|
"learning_rate": 3.225599174309554e-05,
|
||
|
|
"loss": 0.0038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001669516321271658,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 3555.7,
|
||
|
|
"valid_targets_min": 1847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5465587044534415,
|
||
|
|
"grad_norm": 0.24530074683513636,
|
||
|
|
"learning_rate": 3.213622953977443e-05,
|
||
|
|
"loss": 0.0039,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010254059452563524,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 3955.5,
|
||
|
|
"valid_targets_min": 1999
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.561740890688259,
|
||
|
|
"grad_norm": 0.32864369790655235,
|
||
|
|
"learning_rate": 3.2015774529095595e-05,
|
||
|
|
"loss": 0.0033,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001230793772265315,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 3143.4,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5769230769230766,
|
||
|
|
"grad_norm": 0.3057052080874856,
|
||
|
|
"learning_rate": 3.189463358733947e-05,
|
||
|
|
"loss": 0.0035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001283580786548555,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 4546.0,
|
||
|
|
"valid_targets_min": 1913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5921052631578947,
|
||
|
|
"grad_norm": 0.26443076197508136,
|
||
|
|
"learning_rate": 3.1772813629943455e-05,
|
||
|
|
"loss": 0.0032,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007922856602817774,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 3890.5,
|
||
|
|
"valid_targets_min": 2091
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6072874493927127,
|
||
|
|
"grad_norm": 0.3024850587375544,
|
||
|
|
"learning_rate": 3.1650321611107195e-05,
|
||
|
|
"loss": 0.0032,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0013172691687941551,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 3662.3,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6224696356275303,
|
||
|
|
"grad_norm": 0.2245143625363592,
|
||
|
|
"learning_rate": 3.152716452339549e-05,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010381643660366535,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 3687.8,
|
||
|
|
"valid_targets_min": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.637651821862348,
|
||
|
|
"grad_norm": 0.2319845253309359,
|
||
|
|
"learning_rate": 3.140334939733924e-05,
|
||
|
|
"loss": 0.003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010071625001728535,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 3984.7,
|
||
|
|
"valid_targets_min": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.652834008097166,
|
||
|
|
"grad_norm": 0.2999816531524745,
|
||
|
|
"learning_rate": 3.127888330103401e-05,
|
||
|
|
"loss": 0.0033,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0014974744990468025,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 3974.6,
|
||
|
|
"valid_targets_min": 1959
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.668016194331984,
|
||
|
|
"grad_norm": 0.24459887355408508,
|
||
|
|
"learning_rate": 3.115377333973659e-05,
|
||
|
|
"loss": 0.0032,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007494261953979731,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 3982.6,
|
||
|
|
"valid_targets_min": 1719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6831983805668016,
|
||
|
|
"grad_norm": 0.3456234320943784,
|
||
|
|
"learning_rate": 3.1028026655459376e-05,
|
||
|
|
"loss": 0.0031,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0008580339490436018,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 2904.8,
|
||
|
|
"valid_targets_min": 1829
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.698380566801619,
|
||
|
|
"grad_norm": 0.2471478749326246,
|
||
|
|
"learning_rate": 3.0901650426562634e-05,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0008178052376024425,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 3374.6,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7135627530364372,
|
||
|
|
"grad_norm": 0.2464200482588868,
|
||
|
|
"learning_rate": 3.0774651867344765e-05,
|
||
|
|
"loss": 0.0027,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007923309458419681,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 3469.3,
|
||
|
|
"valid_targets_min": 1716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7287449392712553,
|
||
|
|
"grad_norm": 0.24743551699636934,
|
||
|
|
"learning_rate": 3.064703822763043e-05,
|
||
|
|
"loss": 0.0025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0009716333006508648,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 3838.9,
|
||
|
|
"valid_targets_min": 1928
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.743927125506073,
|
||
|
|
"grad_norm": 0.24382707091111458,
|
||
|
|
"learning_rate": 3.05188167923567e-05,
|
||
|
|
"loss": 0.0025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007524531101807952,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 3641.1,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7591093117408905,
|
||
|
|
"grad_norm": 0.27563793352761773,
|
||
|
|
"learning_rate": 3.0389994881157207e-05,
|
||
|
|
"loss": 0.0027,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010416421573609114,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 3916.1,
|
||
|
|
"valid_targets_min": 2119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7742914979757085,
|
||
|
|
"grad_norm": 0.2548236090714257,
|
||
|
|
"learning_rate": 3.0260579847944255e-05,
|
||
|
|
"loss": 0.0027,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001117991516366601,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 3880.7,
|
||
|
|
"valid_targets_min": 1909
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7894736842105265,
|
||
|
|
"grad_norm": 0.24013475075200436,
|
||
|
|
"learning_rate": 3.0130579080489064e-05,
|
||
|
|
"loss": 0.0027,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0009403123985975981,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 3948.9,
|
||
|
|
"valid_targets_min": 1764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.804655870445344,
|
||
|
|
"grad_norm": 0.29011835156635063,
|
||
|
|
"learning_rate": 3.0000000000000004e-05,
|
||
|
|
"loss": 0.0025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0008392734453082085,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 4018.8,
|
||
|
|
"valid_targets_min": 1939
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8198380566801617,
|
||
|
|
"grad_norm": 0.2859299851817194,
|
||
|
|
"learning_rate": 2.986885006069894e-05,
|
||
|
|
"loss": 0.0024,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007751369848847389,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 3843.9,
|
||
|
|
"valid_targets_min": 1970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.83502024291498,
|
||
|
|
"grad_norm": 0.2437202356368781,
|
||
|
|
"learning_rate": 2.9737136749395743e-05,
|
||
|
|
"loss": 0.0024,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006786256562918425,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 3863.4,
|
||
|
|
"valid_targets_min": 1802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.850202429149798,
|
||
|
|
"grad_norm": 0.18808040900211082,
|
||
|
|
"learning_rate": 2.9604867585060834e-05,
|
||
|
|
"loss": 0.0023,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004895664169453084,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 3717.2,
|
||
|
|
"valid_targets_min": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8653846153846154,
|
||
|
|
"grad_norm": 0.24739316453302462,
|
||
|
|
"learning_rate": 2.9472050118396034e-05,
|
||
|
|
"loss": 0.0022,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007075212197378278,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 3184.4,
|
||
|
|
"valid_targets_min": 1933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.880566801619433,
|
||
|
|
"grad_norm": 0.25348710037557143,
|
||
|
|
"learning_rate": 2.933869193140346e-05,
|
||
|
|
"loss": 0.0027,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000714290770702064,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 3851.3,
|
||
|
|
"valid_targets_min": 1857
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.895748987854251,
|
||
|
|
"grad_norm": 0.30784504777080807,
|
||
|
|
"learning_rate": 2.920480063695272e-05,
|
||
|
|
"loss": 0.0025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010556428460404277,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 2977.5,
|
||
|
|
"valid_targets_min": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.910931174089069,
|
||
|
|
"grad_norm": 0.2911314615861319,
|
||
|
|
"learning_rate": 2.9070383878346358e-05,
|
||
|
|
"loss": 0.0026,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0009442472364753485,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 3793.8,
|
||
|
|
"valid_targets_min": 2108
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9261133603238867,
|
||
|
|
"grad_norm": 0.20444215891104295,
|
||
|
|
"learning_rate": 2.8935449328883478e-05,
|
||
|
|
"loss": 0.0023,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006053050747141242,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 3659.3,
|
||
|
|
"valid_targets_min": 2040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9412955465587043,
|
||
|
|
"grad_norm": 0.21145601842681971,
|
||
|
|
"learning_rate": 2.8800004691421743e-05,
|
||
|
|
"loss": 0.0019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006384883308783174,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 4265.0,
|
||
|
|
"valid_targets_min": 2112
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9564777327935223,
|
||
|
|
"grad_norm": 0.2699283880687608,
|
||
|
|
"learning_rate": 2.866405769793764e-05,
|
||
|
|
"loss": 0.002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006649479037150741,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 3668.4,
|
||
|
|
"valid_targets_min": 1908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.97165991902834,
|
||
|
|
"grad_norm": 0.23944753018673426,
|
||
|
|
"learning_rate": 2.8527616109085082e-05,
|
||
|
|
"loss": 0.0021,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005429279990494251,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 3805.8,
|
||
|
|
"valid_targets_min": 1982
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.986842105263158,
|
||
|
|
"grad_norm": 0.25163553220972906,
|
||
|
|
"learning_rate": 2.8390687713752405e-05,
|
||
|
|
"loss": 0.0023,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006394551019184291,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 3765.7,
|
||
|
|
"valid_targets_min": 1993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"grad_norm": 0.39846333222513913,
|
||
|
|
"learning_rate": 2.8253280328617712e-05,
|
||
|
|
"loss": 0.0022,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0021019824780523777,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 3093.5,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0151821862348176,
|
||
|
|
"grad_norm": 0.2381160114566344,
|
||
|
|
"learning_rate": 2.811540179770268e-05,
|
||
|
|
"loss": 0.0019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006175469607114792,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 3804.4,
|
||
|
|
"valid_targets_min": 1995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0303643724696356,
|
||
|
|
"grad_norm": 0.2884186698366421,
|
||
|
|
"learning_rate": 2.7977059991924734e-05,
|
||
|
|
"loss": 0.002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007128264405764639,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 3500.3,
|
||
|
|
"valid_targets_min": 1993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0455465587044532,
|
||
|
|
"grad_norm": 0.20093812555604723,
|
||
|
|
"learning_rate": 2.7838262808647766e-05,
|
||
|
|
"loss": 0.0019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004944343236275017,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 4355.8,
|
||
|
|
"valid_targets_min": 1719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0607287449392713,
|
||
|
|
"grad_norm": 0.26522159915364935,
|
||
|
|
"learning_rate": 2.7699018171231288e-05,
|
||
|
|
"loss": 0.0019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000667234358843416,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 3384.2,
|
||
|
|
"valid_targets_min": 1836
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.075910931174089,
|
||
|
|
"grad_norm": 0.22054124105859335,
|
||
|
|
"learning_rate": 2.7559334028578135e-05,
|
||
|
|
"loss": 0.0019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000564101617783308,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 3431.9,
|
||
|
|
"valid_targets_min": 2012
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.091093117408907,
|
||
|
|
"grad_norm": 0.2575385847664046,
|
||
|
|
"learning_rate": 2.741921835468068e-05,
|
||
|
|
"loss": 0.0021,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007870650733821094,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 2928.0,
|
||
|
|
"valid_targets_min": 1775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1062753036437245,
|
||
|
|
"grad_norm": 0.2563548776430334,
|
||
|
|
"learning_rate": 2.727867914816563e-05,
|
||
|
|
"loss": 0.0021,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005891253240406513,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 3362.0,
|
||
|
|
"valid_targets_min": 1925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1214574898785425,
|
||
|
|
"grad_norm": 0.2539609384410491,
|
||
|
|
"learning_rate": 2.713772443183744e-05,
|
||
|
|
"loss": 0.0017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006947282236069441,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 4062.7,
|
||
|
|
"valid_targets_min": 1995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.13663967611336,
|
||
|
|
"grad_norm": 0.2630805724493265,
|
||
|
|
"learning_rate": 2.69963622522203e-05,
|
||
|
|
"loss": 0.0017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006452821544371545,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 3088.9,
|
||
|
|
"valid_targets_min": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.151821862348178,
|
||
|
|
"grad_norm": 0.23340771404576902,
|
||
|
|
"learning_rate": 2.6854600679098798e-05,
|
||
|
|
"loss": 0.002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007457188330590725,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 3796.6,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.167004048582996,
|
||
|
|
"grad_norm": 0.2545402047594371,
|
||
|
|
"learning_rate": 2.6712447805057254e-05,
|
||
|
|
"loss": 0.0017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006669540889561176,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 3303.8,
|
||
|
|
"valid_targets_min": 1913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.182186234817814,
|
||
|
|
"grad_norm": 0.1977935823982838,
|
||
|
|
"learning_rate": 2.656991174501775e-05,
|
||
|
|
"loss": 0.0019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004706026811618358,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 4531.4,
|
||
|
|
"valid_targets_min": 2143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1973684210526314,
|
||
|
|
"grad_norm": 0.25885644125336243,
|
||
|
|
"learning_rate": 2.642700063577689e-05,
|
||
|
|
"loss": 0.0018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007244085427373648,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 3401.1,
|
||
|
|
"valid_targets_min": 1907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2125506072874495,
|
||
|
|
"grad_norm": 0.21305798442785215,
|
||
|
|
"learning_rate": 2.6283722635541255e-05,
|
||
|
|
"loss": 0.0018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005614292458631098,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 4700.9,
|
||
|
|
"valid_targets_min": 1884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.227732793522267,
|
||
|
|
"grad_norm": 0.2085704695560185,
|
||
|
|
"learning_rate": 2.6140085923461756e-05,
|
||
|
|
"loss": 0.0018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005232746480032802,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 3508.2,
|
||
|
|
"valid_targets_min": 1709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.242914979757085,
|
||
|
|
"grad_norm": 0.24048299559546837,
|
||
|
|
"learning_rate": 2.5996098699166678e-05,
|
||
|
|
"loss": 0.0015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003859190037474036,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 2727.9,
|
||
|
|
"valid_targets_min": 1833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2580971659919027,
|
||
|
|
"grad_norm": 0.22844621084804534,
|
||
|
|
"learning_rate": 2.585176918229359e-05,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004476247413549572,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 3507.3,
|
||
|
|
"valid_targets_min": 1891
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2732793522267207,
|
||
|
|
"grad_norm": 0.22812670414396327,
|
||
|
|
"learning_rate": 2.5707105612020163e-05,
|
||
|
|
"loss": 0.0017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007404968491755426,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 4335.8,
|
||
|
|
"valid_targets_min": 1908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2884615384615383,
|
||
|
|
"grad_norm": 0.20310600591138983,
|
||
|
|
"learning_rate": 2.5562116246593765e-05,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000392769870813936,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 4831.5,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3036437246963564,
|
||
|
|
"grad_norm": 0.23763448178024907,
|
||
|
|
"learning_rate": 2.5416809362860107e-05,
|
||
|
|
"loss": 0.0018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000676068477332592,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 3571.0,
|
||
|
|
"valid_targets_min": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.318825910931174,
|
||
|
|
"grad_norm": 0.20218095851763454,
|
||
|
|
"learning_rate": 2.5271193255790672e-05,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005099349655210972,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 4507.4,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.334008097165992,
|
||
|
|
"grad_norm": 0.21417308865457832,
|
||
|
|
"learning_rate": 2.5125276238009254e-05,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005469350144267082,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 3763.5,
|
||
|
|
"valid_targets_min": 1943
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3491902834008096,
|
||
|
|
"grad_norm": 0.28167385391387406,
|
||
|
|
"learning_rate": 2.4979066639317405e-05,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007526404224336147,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 4109.8,
|
||
|
|
"valid_targets_min": 1972
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3643724696356276,
|
||
|
|
"grad_norm": 0.19514572984805206,
|
||
|
|
"learning_rate": 2.4832572806218892e-05,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005153155652806163,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 3165.5,
|
||
|
|
"valid_targets_min": 1908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3795546558704452,
|
||
|
|
"grad_norm": 0.19384714346824494,
|
||
|
|
"learning_rate": 2.4685803101443276e-05,
|
||
|
|
"loss": 0.0015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004252713988535106,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 4969.4,
|
||
|
|
"valid_targets_min": 1914
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3947368421052633,
|
||
|
|
"grad_norm": 0.4387475072214097,
|
||
|
|
"learning_rate": 2.4538765903468486e-05,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005208882503211498,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 4512.2,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.409919028340081,
|
||
|
|
"grad_norm": 0.22083098676768156,
|
||
|
|
"learning_rate": 2.4391469606042533e-05,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00044841680210083723,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 3083.2,
|
||
|
|
"valid_targets_min": 1960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.425101214574899,
|
||
|
|
"grad_norm": 0.20232748198584524,
|
||
|
|
"learning_rate": 2.4243922617704364e-05,
|
||
|
|
"loss": 0.0015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00048432499170303345,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 3425.8,
|
||
|
|
"valid_targets_min": 1982
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4402834008097165,
|
||
|
|
"grad_norm": 0.1848941528609905,
|
||
|
|
"learning_rate": 2.4096133361303816e-05,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005783046362921596,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 3538.1,
|
||
|
|
"valid_targets_min": 1877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4554655870445345,
|
||
|
|
"grad_norm": 0.14415700849293703,
|
||
|
|
"learning_rate": 2.394811027352083e-05,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00033088825875893235,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 4210.4,
|
||
|
|
"valid_targets_min": 2102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.470647773279352,
|
||
|
|
"grad_norm": 0.2295883711357968,
|
||
|
|
"learning_rate": 2.3799861804383807e-05,
|
||
|
|
"loss": 0.0015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005370274884626269,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 2992.8,
|
||
|
|
"valid_targets_min": 1921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.48582995951417,
|
||
|
|
"grad_norm": 0.19281794032541882,
|
||
|
|
"learning_rate": 2.365139641678724e-05,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005609604995697737,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 5191.8,
|
||
|
|
"valid_targets_min": 1929
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.501012145748988,
|
||
|
|
"grad_norm": 0.16533041019213618,
|
||
|
|
"learning_rate": 2.350272258600861e-05,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003157127066515386,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 5001.4,
|
||
|
|
"valid_targets_min": 2055
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.516194331983806,
|
||
|
|
"grad_norm": 0.25739931801127197,
|
||
|
|
"learning_rate": 2.335384879922456e-05,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005053319036960602,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 3204.1,
|
||
|
|
"valid_targets_min": 1966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5313765182186234,
|
||
|
|
"grad_norm": 0.18625329582073147,
|
||
|
|
"learning_rate": 2.3204783555026394e-05,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005734963924624026,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 2981.7,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5465587044534415,
|
||
|
|
"grad_norm": 0.16191946120349462,
|
||
|
|
"learning_rate": 2.3055535362934945e-05,
|
||
|
|
"loss": 0.0015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00038139167008921504,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 4048.1,
|
||
|
|
"valid_targets_min": 1972
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.561740890688259,
|
||
|
|
"grad_norm": 0.21441521150079987,
|
||
|
|
"learning_rate": 2.2906112742914756e-05,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006663284148089588,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 3406.9,
|
||
|
|
"valid_targets_min": 1361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5769230769230766,
|
||
|
|
"grad_norm": 0.19764023218275437,
|
||
|
|
"learning_rate": 2.2756524224887776e-05,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004810533719137311,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 3454.8,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5921052631578947,
|
||
|
|
"grad_norm": 0.18338345668330808,
|
||
|
|
"learning_rate": 2.2606778348246366e-05,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004733000823762268,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 3790.9,
|
||
|
|
"valid_targets_min": 1889
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6072874493927127,
|
||
|
|
"grad_norm": 0.15319560553315267,
|
||
|
|
"learning_rate": 2.245688366136586e-05,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003475425182841718,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 3362.1,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6224696356275303,
|
||
|
|
"grad_norm": 0.18461011901234592,
|
||
|
|
"learning_rate": 2.2306848721116535e-05,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004801360482815653,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 3200.9,
|
||
|
|
"valid_targets_min": 1933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.637651821862348,
|
||
|
|
"grad_norm": 0.18334203366611002,
|
||
|
|
"learning_rate": 2.2156682092375175e-05,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005512770148925483,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 3440.0,
|
||
|
|
"valid_targets_min": 2128
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.652834008097166,
|
||
|
|
"grad_norm": 0.17488061821588735,
|
||
|
|
"learning_rate": 2.2006392347536102e-05,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00034284673165529966,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 3654.4,
|
||
|
|
"valid_targets_min": 1943
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.668016194331984,
|
||
|
|
"grad_norm": 0.16332864665186617,
|
||
|
|
"learning_rate": 2.1855988066021837e-05,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00036288710543885827,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 3880.0,
|
||
|
|
"valid_targets_min": 1771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6831983805668016,
|
||
|
|
"grad_norm": 0.1793309310702905,
|
||
|
|
"learning_rate": 2.1705477833793332e-05,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00046376511454582214,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 4569.3,
|
||
|
|
"valid_targets_min": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.698380566801619,
|
||
|
|
"grad_norm": 0.1735533190170727,
|
||
|
|
"learning_rate": 2.1554870242859813e-05,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00036736796027980745,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 4094.7,
|
||
|
|
"valid_targets_min": 1987
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7135627530364372,
|
||
|
|
"grad_norm": 0.15811841982602245,
|
||
|
|
"learning_rate": 2.140417389078833e-05,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003554734284989536,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 4293.6,
|
||
|
|
"valid_targets_min": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7287449392712553,
|
||
|
|
"grad_norm": 0.18925964951049176,
|
||
|
|
"learning_rate": 2.1253397380212935e-05,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003876521368511021,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 3846.2,
|
||
|
|
"valid_targets_min": 1951
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.743927125506073,
|
||
|
|
"grad_norm": 0.2059572003673452,
|
||
|
|
"learning_rate": 2.110254931834359e-05,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00033517612610012293,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 3583.5,
|
||
|
|
"valid_targets_min": 1847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7591093117408905,
|
||
|
|
"grad_norm": 0.18436601009250925,
|
||
|
|
"learning_rate": 2.095163831647485e-05,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00037239439552649856,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 3513.1,
|
||
|
|
"valid_targets_min": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7742914979757085,
|
||
|
|
"grad_norm": 0.19081815627126364,
|
||
|
|
"learning_rate": 2.0800672989494225e-05,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00035812356509268284,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 4042.1,
|
||
|
|
"valid_targets_min": 1361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7894736842105265,
|
||
|
|
"grad_norm": 0.2201494841546862,
|
||
|
|
"learning_rate": 2.0649661955390447e-05,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005164881586097181,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 3242.9,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.804655870445344,
|
||
|
|
"grad_norm": 0.15235406229998164,
|
||
|
|
"learning_rate": 2.0498613834761462e-05,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003050516243092716,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 4447.4,
|
||
|
|
"valid_targets_min": 1952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8198380566801617,
|
||
|
|
"grad_norm": 0.189182879583893,
|
||
|
|
"learning_rate": 2.0347537250322338e-05,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004330803931225091,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 4193.2,
|
||
|
|
"valid_targets_min": 1974
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.83502024291498,
|
||
|
|
"grad_norm": 0.1690992871744216,
|
||
|
|
"learning_rate": 2.0196440826413033e-05,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000334964512148872,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 4812.4,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.850202429149798,
|
||
|
|
"grad_norm": 0.18665849511011906,
|
||
|
|
"learning_rate": 2.004533318850605e-05,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003993385180365294,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 3342.8,
|
||
|
|
"valid_targets_min": 1802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8653846153846154,
|
||
|
|
"grad_norm": 0.17342910539890452,
|
||
|
|
"learning_rate": 1.9894222962714046e-05,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003865731123369187,
|
||
|
|
"step": 1275,
|
||
|
|
"valid_targets_mean": 3518.0,
|
||
|
|
"valid_targets_min": 1906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.880566801619433,
|
||
|
|
"grad_norm": 0.17910786724541225,
|
||
|
|
"learning_rate": 1.9743118775297432e-05,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002703152713365853,
|
||
|
|
"step": 1280,
|
||
|
|
"valid_targets_mean": 3390.8,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.895748987854251,
|
||
|
|
"grad_norm": 0.26331019757042656,
|
||
|
|
"learning_rate": 1.95920292521719e-05,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003070134553126991,
|
||
|
|
"step": 1285,
|
||
|
|
"valid_targets_mean": 3293.5,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.910931174089069,
|
||
|
|
"grad_norm": 0.18781319052306894,
|
||
|
|
"learning_rate": 1.9440963018416002e-05,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00037992349825799465,
|
||
|
|
"step": 1290,
|
||
|
|
"valid_targets_mean": 4061.6,
|
||
|
|
"valid_targets_min": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9261133603238867,
|
||
|
|
"grad_norm": 0.18227473763506005,
|
||
|
|
"learning_rate": 1.9289928697778826e-05,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005043879500590265,
|
||
|
|
"step": 1295,
|
||
|
|
"valid_targets_mean": 2907.3,
|
||
|
|
"valid_targets_min": 2055
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9412955465587043,
|
||
|
|
"grad_norm": 0.18820151837679194,
|
||
|
|
"learning_rate": 1.9138934912187647e-05,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00023931797477416694,
|
||
|
|
"step": 1300,
|
||
|
|
"valid_targets_mean": 3240.8,
|
||
|
|
"valid_targets_min": 2010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9564777327935223,
|
||
|
|
"grad_norm": 0.15644755991029566,
|
||
|
|
"learning_rate": 1.898799028125578e-05,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00034322403371334076,
|
||
|
|
"step": 1305,
|
||
|
|
"valid_targets_mean": 3311.7,
|
||
|
|
"valid_targets_min": 1993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.97165991902834,
|
||
|
|
"grad_norm": 0.14111680124033982,
|
||
|
|
"learning_rate": 1.8837103421790486e-05,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000309587805531919,
|
||
|
|
"step": 1310,
|
||
|
|
"valid_targets_mean": 3747.0,
|
||
|
|
"valid_targets_min": 2000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.986842105263158,
|
||
|
|
"grad_norm": 0.1790557203137647,
|
||
|
|
"learning_rate": 1.8686282947301108e-05,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00035593437496572733,
|
||
|
|
"step": 1315,
|
||
|
|
"valid_targets_mean": 3631.0,
|
||
|
|
"valid_targets_min": 1884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0,
|
||
|
|
"grad_norm": 0.31365694046421516,
|
||
|
|
"learning_rate": 1.8535537467507326e-05,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0009995142463594675,
|
||
|
|
"step": 1320,
|
||
|
|
"valid_targets_mean": 3038.1,
|
||
|
|
"valid_targets_min": 1836
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.015182186234818,
|
||
|
|
"grad_norm": 0.16742487206236584,
|
||
|
|
"learning_rate": 1.8384875587847698e-05,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003214214229956269,
|
||
|
|
"step": 1325,
|
||
|
|
"valid_targets_mean": 3446.2,
|
||
|
|
"valid_targets_min": 1970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.030364372469635,
|
||
|
|
"grad_norm": 0.1250217473203812,
|
||
|
|
"learning_rate": 1.823430590898839e-05,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00026543630519881845,
|
||
|
|
"step": 1330,
|
||
|
|
"valid_targets_mean": 6213.4,
|
||
|
|
"valid_targets_min": 2161
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.045546558704453,
|
||
|
|
"grad_norm": 0.14912361470746255,
|
||
|
|
"learning_rate": 1.8083837026332187e-05,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00033612066181376576,
|
||
|
|
"step": 1335,
|
||
|
|
"valid_targets_mean": 3990.1,
|
||
|
|
"valid_targets_min": 1917
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.060728744939271,
|
||
|
|
"grad_norm": 0.1251761622993448,
|
||
|
|
"learning_rate": 1.7933477529527862e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00023664985201321542,
|
||
|
|
"step": 1340,
|
||
|
|
"valid_targets_mean": 3745.6,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.075910931174089,
|
||
|
|
"grad_norm": 0.12915107495925437,
|
||
|
|
"learning_rate": 1.778323600197978e-05,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00027373761986382306,
|
||
|
|
"step": 1345,
|
||
|
|
"valid_targets_mean": 4506.6,
|
||
|
|
"valid_targets_min": 1998
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0910931174089065,
|
||
|
|
"grad_norm": 0.14831222185657933,
|
||
|
|
"learning_rate": 1.7633121020357945e-05,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00020463403780013323,
|
||
|
|
"step": 1350,
|
||
|
|
"valid_targets_mean": 4066.5,
|
||
|
|
"valid_targets_min": 1977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.1062753036437245,
|
||
|
|
"grad_norm": 0.162292116239436,
|
||
|
|
"learning_rate": 1.748314115410835e-05,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002023346023634076,
|
||
|
|
"step": 1355,
|
||
|
|
"valid_targets_mean": 4395.5,
|
||
|
|
"valid_targets_min": 1763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.1214574898785425,
|
||
|
|
"grad_norm": 0.1641100774946915,
|
||
|
|
"learning_rate": 1.733330496496383e-05,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003073760017286986,
|
||
|
|
"step": 1360,
|
||
|
|
"valid_targets_mean": 3288.6,
|
||
|
|
"valid_targets_min": 1709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.136639676113361,
|
||
|
|
"grad_norm": 0.18253415180379526,
|
||
|
|
"learning_rate": 1.718362100645527e-05,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00027408183086663485,
|
||
|
|
"step": 1365,
|
||
|
|
"valid_targets_mean": 4831.8,
|
||
|
|
"valid_targets_min": 1921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.151821862348178,
|
||
|
|
"grad_norm": 0.15612101665300201,
|
||
|
|
"learning_rate": 1.7034097823423356e-05,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00029285086202435195,
|
||
|
|
"step": 1370,
|
||
|
|
"valid_targets_mean": 2824.3,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.167004048582996,
|
||
|
|
"grad_norm": 0.12734987467698144,
|
||
|
|
"learning_rate": 1.6884743951530737e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00036749267019331455,
|
||
|
|
"step": 1375,
|
||
|
|
"valid_targets_mean": 4037.2,
|
||
|
|
"valid_targets_min": 1909
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.182186234817814,
|
||
|
|
"grad_norm": 0.15249940953957974,
|
||
|
|
"learning_rate": 1.6735567916774814e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00023765585501678288,
|
||
|
|
"step": 1380,
|
||
|
|
"valid_targets_mean": 3238.4,
|
||
|
|
"valid_targets_min": 2063
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.197368421052632,
|
||
|
|
"grad_norm": 0.16179151051480048,
|
||
|
|
"learning_rate": 1.6586578235000975e-05,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00030981560121290386,
|
||
|
|
"step": 1385,
|
||
|
|
"valid_targets_mean": 3698.3,
|
||
|
|
"valid_targets_min": 2143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.212550607287449,
|
||
|
|
"grad_norm": 0.1577298572401232,
|
||
|
|
"learning_rate": 1.64377834114165e-05,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00021539324370678514,
|
||
|
|
"step": 1390,
|
||
|
|
"valid_targets_mean": 3935.9,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.227732793522267,
|
||
|
|
"grad_norm": 0.11263879883300013,
|
||
|
|
"learning_rate": 1.6289191940105004e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00020557490643113852,
|
||
|
|
"step": 1395,
|
||
|
|
"valid_targets_mean": 4472.4,
|
||
|
|
"valid_targets_min": 1517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.242914979757085,
|
||
|
|
"grad_norm": 0.12465073918611852,
|
||
|
|
"learning_rate": 1.614081230354158e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002456406655255705,
|
||
|
|
"step": 1400,
|
||
|
|
"valid_targets_mean": 3555.1,
|
||
|
|
"valid_targets_min": 1970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.258097165991903,
|
||
|
|
"grad_norm": 0.19245395991769743,
|
||
|
|
"learning_rate": 1.599265297210852e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000279653089819476,
|
||
|
|
"step": 1405,
|
||
|
|
"valid_targets_mean": 3965.3,
|
||
|
|
"valid_targets_min": 1799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.27327935222672,
|
||
|
|
"grad_norm": 0.09293026362091913,
|
||
|
|
"learning_rate": 1.5844722403611814e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002100579149555415,
|
||
|
|
"step": 1410,
|
||
|
|
"valid_targets_mean": 4653.7,
|
||
|
|
"valid_targets_min": 1847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.288461538461538,
|
||
|
|
"grad_norm": 0.16479573992429064,
|
||
|
|
"learning_rate": 1.5697029042798334e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00029920038650743663,
|
||
|
|
"step": 1415,
|
||
|
|
"valid_targets_mean": 4110.0,
|
||
|
|
"valid_targets_min": 2072
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.303643724696356,
|
||
|
|
"grad_norm": 0.16833545524691845,
|
||
|
|
"learning_rate": 1.5549581320873715e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00022227107547223568,
|
||
|
|
"step": 1420,
|
||
|
|
"valid_targets_mean": 2792.3,
|
||
|
|
"valid_targets_min": 1933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.318825910931174,
|
||
|
|
"grad_norm": 0.10522732101077935,
|
||
|
|
"learning_rate": 1.5402387655021106e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00018714641919359565,
|
||
|
|
"step": 1425,
|
||
|
|
"valid_targets_mean": 3969.2,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.334008097165992,
|
||
|
|
"grad_norm": 0.1535967782921471,
|
||
|
|
"learning_rate": 1.5255456447920622e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002529872872401029,
|
||
|
|
"step": 1430,
|
||
|
|
"valid_targets_mean": 3094.1,
|
||
|
|
"valid_targets_min": 1921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.34919028340081,
|
||
|
|
"grad_norm": 0.09202385790577249,
|
||
|
|
"learning_rate": 1.5108796087269704e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00018833880312740803,
|
||
|
|
"step": 1435,
|
||
|
|
"valid_targets_mean": 4022.7,
|
||
|
|
"valid_targets_min": 1949
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.364372469635628,
|
||
|
|
"grad_norm": 0.13590560190082948,
|
||
|
|
"learning_rate": 1.4962414945304269e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00021189013205002993,
|
||
|
|
"step": 1440,
|
||
|
|
"valid_targets_mean": 3913.8,
|
||
|
|
"valid_targets_min": 1948
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.379554655870446,
|
||
|
|
"grad_norm": 0.09988902755140015,
|
||
|
|
"learning_rate": 1.48163213783208e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00017854674661066383,
|
||
|
|
"step": 1445,
|
||
|
|
"valid_targets_mean": 5511.4,
|
||
|
|
"valid_targets_min": 2124
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.394736842105263,
|
||
|
|
"grad_norm": 0.12353338103245862,
|
||
|
|
"learning_rate": 1.4670523726199304e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002653140400070697,
|
||
|
|
"step": 1450,
|
||
|
|
"valid_targets_mean": 3396.1,
|
||
|
|
"valid_targets_min": 1981
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.409919028340081,
|
||
|
|
"grad_norm": 0.08826754506458849,
|
||
|
|
"learning_rate": 1.4525030311927228e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00019216086366213858,
|
||
|
|
"step": 1455,
|
||
|
|
"valid_targets_mean": 4130.1,
|
||
|
|
"valid_targets_min": 1614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.425101214574899,
|
||
|
|
"grad_norm": 0.12805703842584779,
|
||
|
|
"learning_rate": 1.4379849441124345e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00016008673992473632,
|
||
|
|
"step": 1460,
|
||
|
|
"valid_targets_mean": 3068.0,
|
||
|
|
"valid_targets_min": 1709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.440283400809717,
|
||
|
|
"grad_norm": 0.11784266396497255,
|
||
|
|
"learning_rate": 1.4234989401568584e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00019212844199500978,
|
||
|
|
"step": 1465,
|
||
|
|
"valid_targets_mean": 4319.3,
|
||
|
|
"valid_targets_min": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.455465587044534,
|
||
|
|
"grad_norm": 0.1378125761030295,
|
||
|
|
"learning_rate": 1.4090458462722964e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001889071281766519,
|
||
|
|
"step": 1470,
|
||
|
|
"valid_targets_mean": 3233.0,
|
||
|
|
"valid_targets_min": 1983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.470647773279352,
|
||
|
|
"grad_norm": 0.0879020779069185,
|
||
|
|
"learning_rate": 1.3946264875263485e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001628987374715507,
|
||
|
|
"step": 1475,
|
||
|
|
"valid_targets_mean": 3540.2,
|
||
|
|
"valid_targets_min": 2012
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.48582995951417,
|
||
|
|
"grad_norm": 0.14863228311121393,
|
||
|
|
"learning_rate": 1.380241687060815e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00020136366947554052,
|
||
|
|
"step": 1480,
|
||
|
|
"valid_targets_mean": 3974.7,
|
||
|
|
"valid_targets_min": 1943
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.501012145748988,
|
||
|
|
"grad_norm": 0.2183001453487955,
|
||
|
|
"learning_rate": 1.3658922660447065e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00017537329404149204,
|
||
|
|
"step": 1485,
|
||
|
|
"valid_targets_mean": 4719.4,
|
||
|
|
"valid_targets_min": 1517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.516194331983805,
|
||
|
|
"grad_norm": 0.11852833445117421,
|
||
|
|
"learning_rate": 1.3515790436273664e-05,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00016619575035292655,
|
||
|
|
"step": 1490,
|
||
|
|
"valid_targets_mean": 4807.5,
|
||
|
|
"valid_targets_min": 2007
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.531376518218623,
|
||
|
|
"grad_norm": 0.12117637817950043,
|
||
|
|
"learning_rate": 1.3373028368917091e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002051171031780541,
|
||
|
|
"step": 1495,
|
||
|
|
"valid_targets_mean": 2937.9,
|
||
|
|
"valid_targets_min": 1998
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5465587044534415,
|
||
|
|
"grad_norm": 0.14174694124145332,
|
||
|
|
"learning_rate": 1.3230644608075766e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00026428542332723737,
|
||
|
|
"step": 1500,
|
||
|
|
"valid_targets_mean": 4077.5,
|
||
|
|
"valid_targets_min": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5617408906882595,
|
||
|
|
"grad_norm": 0.11495837629326648,
|
||
|
|
"learning_rate": 1.3088647281852138e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00018655852181836963,
|
||
|
|
"step": 1505,
|
||
|
|
"valid_targets_mean": 3627.2,
|
||
|
|
"valid_targets_min": 1802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.576923076923077,
|
||
|
|
"grad_norm": 0.09872682781211174,
|
||
|
|
"learning_rate": 1.2947044496288718e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00017638107237871736,
|
||
|
|
"step": 1510,
|
||
|
|
"valid_targets_mean": 3156.1,
|
||
|
|
"valid_targets_min": 1974
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.592105263157895,
|
||
|
|
"grad_norm": 0.09091847807092099,
|
||
|
|
"learning_rate": 1.2805844334905286e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00017424969701096416,
|
||
|
|
"step": 1515,
|
||
|
|
"valid_targets_mean": 3551.4,
|
||
|
|
"valid_targets_min": 1662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.607287449392713,
|
||
|
|
"grad_norm": 0.07490178665424728,
|
||
|
|
"learning_rate": 1.266505485823748e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00011350198474247009,
|
||
|
|
"step": 1520,
|
||
|
|
"valid_targets_mean": 4936.9,
|
||
|
|
"valid_targets_min": 2098
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.62246963562753,
|
||
|
|
"grad_norm": 0.11027068481003097,
|
||
|
|
"learning_rate": 1.2524684103376643e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00016770735965110362,
|
||
|
|
"step": 1525,
|
||
|
|
"valid_targets_mean": 3021.8,
|
||
|
|
"valid_targets_min": 1838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.637651821862348,
|
||
|
|
"grad_norm": 0.0959521626603772,
|
||
|
|
"learning_rate": 1.238474008351101e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00017282002954743803,
|
||
|
|
"step": 1530,
|
||
|
|
"valid_targets_mean": 4926.7,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.652834008097166,
|
||
|
|
"grad_norm": 0.1524560812650557,
|
||
|
|
"learning_rate": 1.2245230787468279e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00020078732632100582,
|
||
|
|
"step": 1535,
|
||
|
|
"valid_targets_mean": 4893.9,
|
||
|
|
"valid_targets_min": 1940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.668016194331984,
|
||
|
|
"grad_norm": 0.10659270907021356,
|
||
|
|
"learning_rate": 1.210616417925954e-05,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002053509815596044,
|
||
|
|
"step": 1540,
|
||
|
|
"valid_targets_mean": 3321.6,
|
||
|
|
"valid_targets_min": 2062
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.683198380566802,
|
||
|
|
"grad_norm": 0.08352178060768262,
|
||
|
|
"learning_rate": 1.1967548197624679e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00018780113896355033,
|
||
|
|
"step": 1545,
|
||
|
|
"valid_targets_mean": 4212.1,
|
||
|
|
"valid_targets_min": 2004
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.698380566801619,
|
||
|
|
"grad_norm": 0.10545113432956367,
|
||
|
|
"learning_rate": 1.182939075557915e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00024291532463394105,
|
||
|
|
"step": 1550,
|
||
|
|
"valid_targets_mean": 3816.8,
|
||
|
|
"valid_targets_min": 1763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.713562753036437,
|
||
|
|
"grad_norm": 0.09069716315742264,
|
||
|
|
"learning_rate": 1.1691699739962275e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00015057422569952905,
|
||
|
|
"step": 1555,
|
||
|
|
"valid_targets_mean": 3726.7,
|
||
|
|
"valid_targets_min": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.728744939271255,
|
||
|
|
"grad_norm": 0.07842492279266203,
|
||
|
|
"learning_rate": 1.1554483010987015e-05,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001360119931632653,
|
||
|
|
"step": 1560,
|
||
|
|
"valid_targets_mean": 3447.2,
|
||
|
|
"valid_targets_min": 1928
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.743927125506072,
|
||
|
|
"grad_norm": 0.11679110491500948,
|
||
|
|
"learning_rate": 1.1417748401791261e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001503416133346036,
|
||
|
|
"step": 1565,
|
||
|
|
"valid_targets_mean": 3905.7,
|
||
|
|
"valid_targets_min": 1833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7591093117408905,
|
||
|
|
"grad_norm": 0.08877491690318451,
|
||
|
|
"learning_rate": 1.1281503717990675e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00014764878142159432,
|
||
|
|
"step": 1570,
|
||
|
|
"valid_targets_mean": 3416.5,
|
||
|
|
"valid_targets_min": 1775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7742914979757085,
|
||
|
|
"grad_norm": 0.06832801111329315,
|
||
|
|
"learning_rate": 1.1145756737233078e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001266779436264187,
|
||
|
|
"step": 1575,
|
||
|
|
"valid_targets_mean": 3723.2,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7894736842105265,
|
||
|
|
"grad_norm": 0.13100666024322308,
|
||
|
|
"learning_rate": 1.1010515208754487e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00015492257080040872,
|
||
|
|
"step": 1580,
|
||
|
|
"valid_targets_mean": 3169.7,
|
||
|
|
"valid_targets_min": 1980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.804655870445345,
|
||
|
|
"grad_norm": 0.08943222531165669,
|
||
|
|
"learning_rate": 1.087578685293674e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00011407081910874695,
|
||
|
|
"step": 1585,
|
||
|
|
"valid_targets_mean": 3693.8,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.819838056680162,
|
||
|
|
"grad_norm": 0.08130978352352379,
|
||
|
|
"learning_rate": 1.0741579360866752e-05,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00015799161337781698,
|
||
|
|
"step": 1590,
|
||
|
|
"valid_targets_mean": 4679.5,
|
||
|
|
"valid_targets_min": 1983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.83502024291498,
|
||
|
|
"grad_norm": 0.11030737361757942,
|
||
|
|
"learning_rate": 1.0607900393897463e-05,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00017477729124948382,
|
||
|
|
"step": 1595,
|
||
|
|
"valid_targets_mean": 2894.2,
|
||
|
|
"valid_targets_min": 1950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.850202429149798,
|
||
|
|
"grad_norm": 0.10320780196537076,
|
||
|
|
"learning_rate": 1.0474757583210498e-05,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00017558873514644802,
|
||
|
|
"step": 1600,
|
||
|
|
"valid_targets_mean": 3363.4,
|
||
|
|
"valid_targets_min": 1933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.865384615384615,
|
||
|
|
"grad_norm": 0.07982206691484621,
|
||
|
|
"learning_rate": 1.0342158529380544e-05,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00017107665189541876,
|
||
|
|
"step": 1605,
|
||
|
|
"valid_targets_mean": 3934.4,
|
||
|
|
"valid_targets_min": 1908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.880566801619433,
|
||
|
|
"grad_norm": 0.11160739905615198,
|
||
|
|
"learning_rate": 1.0210110801941426e-05,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00016249390318989754,
|
||
|
|
"step": 1610,
|
||
|
|
"valid_targets_mean": 3558.2,
|
||
|
|
"valid_targets_min": 1913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.895748987854251,
|
||
|
|
"grad_norm": 0.11632472999928259,
|
||
|
|
"learning_rate": 1.0078621938954028e-05,
|
||
|
|
"loss": 0.0005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001625502045499161,
|
||
|
|
"step": 1615,
|
||
|
|
"valid_targets_mean": 2949.1,
|
||
|
|
"valid_targets_min": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.910931174089069,
|
||
|
|
"grad_norm": 0.11467728199482936,
|
||
|
|
"learning_rate": 9.947699446575951e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001648940669838339,
|
||
|
|
"step": 1620,
|
||
|
|
"valid_targets_mean": 4162.7,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.926113360323887,
|
||
|
|
"grad_norm": 0.07602911426895266,
|
||
|
|
"learning_rate": 9.817350798633068e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00018565275240689516,
|
||
|
|
"step": 1625,
|
||
|
|
"valid_targets_mean": 4390.2,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.941295546558704,
|
||
|
|
"grad_norm": 0.07534814608623663,
|
||
|
|
"learning_rate": 9.687583436192785e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00012638355838134885,
|
||
|
|
"step": 1630,
|
||
|
|
"valid_targets_mean": 4535.5,
|
||
|
|
"valid_targets_min": 1950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.956477732793522,
|
||
|
|
"grad_norm": 0.08894254374376702,
|
||
|
|
"learning_rate": 9.558404767139335e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00010171098983846605,
|
||
|
|
"step": 1635,
|
||
|
|
"valid_targets_mean": 4676.2,
|
||
|
|
"valid_targets_min": 2013
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.97165991902834,
|
||
|
|
"grad_norm": 0.08567558422222073,
|
||
|
|
"learning_rate": 9.429822165750893e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00011121262650704011,
|
||
|
|
"step": 1640,
|
||
|
|
"valid_targets_mean": 4557.1,
|
||
|
|
"valid_targets_min": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.9868421052631575,
|
||
|
|
"grad_norm": 0.10131494498844583,
|
||
|
|
"learning_rate": 9.301842972278557e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00012183194485260174,
|
||
|
|
"step": 1645,
|
||
|
|
"valid_targets_mean": 4601.4,
|
||
|
|
"valid_targets_min": 1940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.09996465868033678,
|
||
|
|
"learning_rate": 9.174474492527359e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002709078835323453,
|
||
|
|
"step": 1650,
|
||
|
|
"valid_targets_mean": 4467.4,
|
||
|
|
"valid_targets_min": 1993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.015182186234818,
|
||
|
|
"grad_norm": 0.08757084852979678,
|
||
|
|
"learning_rate": 9.047723997439206e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00015909172361716628,
|
||
|
|
"step": 1655,
|
||
|
|
"valid_targets_mean": 2889.2,
|
||
|
|
"valid_targets_min": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.030364372469635,
|
||
|
|
"grad_norm": 0.052874395522327686,
|
||
|
|
"learning_rate": 8.921598722677796e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00015834567602723837,
|
||
|
|
"step": 1660,
|
||
|
|
"valid_targets_mean": 3947.1,
|
||
|
|
"valid_targets_min": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.045546558704453,
|
||
|
|
"grad_norm": 0.10121338263005623,
|
||
|
|
"learning_rate": 8.796105868215592e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00013827148359268904,
|
||
|
|
"step": 1665,
|
||
|
|
"valid_targets_mean": 3173.6,
|
||
|
|
"valid_targets_min": 1662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.060728744939271,
|
||
|
|
"grad_norm": 0.061762884364732124,
|
||
|
|
"learning_rate": 8.671252597922768e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00010502958321012557,
|
||
|
|
"step": 1670,
|
||
|
|
"valid_targets_mean": 3470.3,
|
||
|
|
"valid_targets_min": 1614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.075910931174089,
|
||
|
|
"grad_norm": 0.0825082549248666,
|
||
|
|
"learning_rate": 8.547046039158283e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001244868035428226,
|
||
|
|
"step": 1675,
|
||
|
|
"valid_targets_mean": 4018.7,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0910931174089065,
|
||
|
|
"grad_norm": 0.0752896675331657,
|
||
|
|
"learning_rate": 8.423493282362982e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00016329191566910595,
|
||
|
|
"step": 1680,
|
||
|
|
"valid_targets_mean": 3562.6,
|
||
|
|
"valid_targets_min": 1517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.1062753036437245,
|
||
|
|
"grad_norm": 0.061362444372842856,
|
||
|
|
"learning_rate": 8.300601380654883e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00014282426855061203,
|
||
|
|
"step": 1685,
|
||
|
|
"valid_targets_mean": 4009.9,
|
||
|
|
"valid_targets_min": 1951
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.1214574898785425,
|
||
|
|
"grad_norm": 0.08843420754599847,
|
||
|
|
"learning_rate": 8.178377349426471e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001529960718471557,
|
||
|
|
"step": 1690,
|
||
|
|
"valid_targets_mean": 3906.6,
|
||
|
|
"valid_targets_min": 1907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.136639676113361,
|
||
|
|
"grad_norm": 0.10190449637655899,
|
||
|
|
"learning_rate": 8.056828165944282e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.42688639042899e-05,
|
||
|
|
"step": 1695,
|
||
|
|
"valid_targets_mean": 3508.2,
|
||
|
|
"valid_targets_min": 1764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.151821862348178,
|
||
|
|
"grad_norm": 0.0632823981717409,
|
||
|
|
"learning_rate": 7.93596076895055e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.968654972501099e-05,
|
||
|
|
"step": 1700,
|
||
|
|
"valid_targets_mean": 3828.6,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.167004048582996,
|
||
|
|
"grad_norm": 0.08747079386394507,
|
||
|
|
"learning_rate": 7.815782058267156e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00013723800657317042,
|
||
|
|
"step": 1705,
|
||
|
|
"valid_targets_mean": 3206.6,
|
||
|
|
"valid_targets_min": 1891
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.182186234817814,
|
||
|
|
"grad_norm": 0.07254850537567721,
|
||
|
|
"learning_rate": 7.696298894401697e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.237830141093582e-05,
|
||
|
|
"step": 1710,
|
||
|
|
"valid_targets_mean": 4004.1,
|
||
|
|
"valid_targets_min": 1987
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.197368421052632,
|
||
|
|
"grad_norm": 0.039356469706684716,
|
||
|
|
"learning_rate": 7.577518098155869e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.052822133526206e-05,
|
||
|
|
"step": 1715,
|
||
|
|
"valid_targets_mean": 4907.7,
|
||
|
|
"valid_targets_min": 2250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.212550607287449,
|
||
|
|
"grad_norm": 0.08096486303424018,
|
||
|
|
"learning_rate": 7.459446450236087e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00013704058073926717,
|
||
|
|
"step": 1720,
|
||
|
|
"valid_targets_mean": 2823.8,
|
||
|
|
"valid_targets_min": 1917
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.227732793522267,
|
||
|
|
"grad_norm": 0.0692352739943929,
|
||
|
|
"learning_rate": 7.3420906908664345e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.10937344795093e-05,
|
||
|
|
"step": 1725,
|
||
|
|
"valid_targets_mean": 4354.6,
|
||
|
|
"valid_targets_min": 1966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.242914979757085,
|
||
|
|
"grad_norm": 0.07184641685908202,
|
||
|
|
"learning_rate": 7.225457519403838e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.362018317915499e-05,
|
||
|
|
"step": 1730,
|
||
|
|
"valid_targets_mean": 3989.0,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.258097165991903,
|
||
|
|
"grad_norm": 0.048319273128030235,
|
||
|
|
"learning_rate": 7.109553593955671e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00010920399654423818,
|
||
|
|
"step": 1735,
|
||
|
|
"valid_targets_mean": 3180.4,
|
||
|
|
"valid_targets_min": 1977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.27327935222672,
|
||
|
|
"grad_norm": 0.054343904508002985,
|
||
|
|
"learning_rate": 6.99438553099965e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.486790855182335e-05,
|
||
|
|
"step": 1740,
|
||
|
|
"valid_targets_mean": 3835.1,
|
||
|
|
"valid_targets_min": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.288461538461538,
|
||
|
|
"grad_norm": 0.04371123259548947,
|
||
|
|
"learning_rate": 6.879959905006135e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.663388482294977e-05,
|
||
|
|
"step": 1745,
|
||
|
|
"valid_targets_mean": 4160.3,
|
||
|
|
"valid_targets_min": 1858
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.303643724696356,
|
||
|
|
"grad_norm": 0.04808181988707324,
|
||
|
|
"learning_rate": 6.766283248062817e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.434741953853518e-05,
|
||
|
|
"step": 1750,
|
||
|
|
"valid_targets_mean": 3292.6,
|
||
|
|
"valid_targets_min": 2024
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.318825910931174,
|
||
|
|
"grad_norm": 0.0535913542336524,
|
||
|
|
"learning_rate": 6.653362049501826e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00012302312825340778,
|
||
|
|
"step": 1755,
|
||
|
|
"valid_targets_mean": 3784.1,
|
||
|
|
"valid_targets_min": 1940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.334008097165992,
|
||
|
|
"grad_norm": 0.049723495364198206,
|
||
|
|
"learning_rate": 6.541202755529299e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.808164031710476e-05,
|
||
|
|
"step": 1760,
|
||
|
|
"valid_targets_mean": 4122.8,
|
||
|
|
"valid_targets_min": 1652
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.34919028340081,
|
||
|
|
"grad_norm": 0.04634848793784228,
|
||
|
|
"learning_rate": 6.429811768857359e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.078153769019991e-05,
|
||
|
|
"step": 1765,
|
||
|
|
"valid_targets_mean": 4063.0,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.364372469635628,
|
||
|
|
"grad_norm": 0.04317939107189241,
|
||
|
|
"learning_rate": 6.319195448338642e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.530956984031945e-05,
|
||
|
|
"step": 1770,
|
||
|
|
"valid_targets_mean": 3918.6,
|
||
|
|
"valid_targets_min": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.379554655870446,
|
||
|
|
"grad_norm": 0.06502616764847581,
|
||
|
|
"learning_rate": 6.2093601086032754e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00011668251681840047,
|
||
|
|
"step": 1775,
|
||
|
|
"valid_targets_mean": 3954.4,
|
||
|
|
"valid_targets_min": 2012
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.394736842105263,
|
||
|
|
"grad_norm": 0.04244558684689124,
|
||
|
|
"learning_rate": 6.100312019698411e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.36968538351357e-05,
|
||
|
|
"step": 1780,
|
||
|
|
"valid_targets_mean": 3633.9,
|
||
|
|
"valid_targets_min": 1763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.409919028340081,
|
||
|
|
"grad_norm": 0.04609181582976981,
|
||
|
|
"learning_rate": 5.9920574067303114e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00011274204007349908,
|
||
|
|
"step": 1785,
|
||
|
|
"valid_targets_mean": 4137.9,
|
||
|
|
"valid_targets_min": 1877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.425101214574899,
|
||
|
|
"grad_norm": 0.04829858301786825,
|
||
|
|
"learning_rate": 5.8846024495089425e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000125125574413687,
|
||
|
|
"step": 1790,
|
||
|
|
"valid_targets_mean": 4102.2,
|
||
|
|
"valid_targets_min": 1802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.440283400809717,
|
||
|
|
"grad_norm": 0.06035453107050216,
|
||
|
|
"learning_rate": 5.777953282195228e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.88104657153599e-05,
|
||
|
|
"step": 1795,
|
||
|
|
"valid_targets_mean": 3995.6,
|
||
|
|
"valid_targets_min": 1913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.455465587044534,
|
||
|
|
"grad_norm": 0.03696140940403187,
|
||
|
|
"learning_rate": 5.672115992950855e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.697451605461538e-05,
|
||
|
|
"step": 1800,
|
||
|
|
"valid_targets_mean": 4031.1,
|
||
|
|
"valid_targets_min": 1884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.470647773279352,
|
||
|
|
"grad_norm": 0.03448853071359026,
|
||
|
|
"learning_rate": 5.567096623590758e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.14379779715091e-05,
|
||
|
|
"step": 1805,
|
||
|
|
"valid_targets_mean": 3530.6,
|
||
|
|
"valid_targets_min": 1981
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.48582995951417,
|
||
|
|
"grad_norm": 0.04553999488559737,
|
||
|
|
"learning_rate": 5.462901169238175e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00013526347174774855,
|
||
|
|
"step": 1810,
|
||
|
|
"valid_targets_mean": 3226.0,
|
||
|
|
"valid_targets_min": 1951
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.501012145748988,
|
||
|
|
"grad_norm": 0.03357394448624427,
|
||
|
|
"learning_rate": 5.359535577982431e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.305084389168769e-05,
|
||
|
|
"step": 1815,
|
||
|
|
"valid_targets_mean": 3466.6,
|
||
|
|
"valid_targets_min": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.516194331983805,
|
||
|
|
"grad_norm": 0.027258175795275355,
|
||
|
|
"learning_rate": 5.257005750539388e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.831331145600416e-05,
|
||
|
|
"step": 1820,
|
||
|
|
"valid_targets_mean": 3101.7,
|
||
|
|
"valid_targets_min": 1889
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.531376518218623,
|
||
|
|
"grad_norm": 0.07495662494816803,
|
||
|
|
"learning_rate": 5.155317539914601e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00015046489716041833,
|
||
|
|
"step": 1825,
|
||
|
|
"valid_targets_mean": 4003.7,
|
||
|
|
"valid_targets_min": 2001
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5465587044534415,
|
||
|
|
"grad_norm": 0.07350520030536078,
|
||
|
|
"learning_rate": 5.054476751069179e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00012950049131177366,
|
||
|
|
"step": 1830,
|
||
|
|
"valid_targets_mean": 4322.5,
|
||
|
|
"valid_targets_min": 2089
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5617408906882595,
|
||
|
|
"grad_norm": 0.04203899726177876,
|
||
|
|
"learning_rate": 4.954489140588412e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00010223207937087864,
|
||
|
|
"step": 1835,
|
||
|
|
"valid_targets_mean": 3349.9,
|
||
|
|
"valid_targets_min": 1917
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.576923076923077,
|
||
|
|
"grad_norm": 0.056468818271058645,
|
||
|
|
"learning_rate": 4.85536041635315e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.302189285634086e-05,
|
||
|
|
"step": 1840,
|
||
|
|
"valid_targets_mean": 5094.2,
|
||
|
|
"valid_targets_min": 1838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.592105263157895,
|
||
|
|
"grad_norm": 0.03919213817041187,
|
||
|
|
"learning_rate": 4.757096237213976e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.667349513620138e-05,
|
||
|
|
"step": 1845,
|
||
|
|
"valid_targets_mean": 3459.2,
|
||
|
|
"valid_targets_min": 1877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.607287449392713,
|
||
|
|
"grad_norm": 0.03232545489748309,
|
||
|
|
"learning_rate": 4.659702212668151e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.86387167358771e-05,
|
||
|
|
"step": 1850,
|
||
|
|
"valid_targets_mean": 3589.6,
|
||
|
|
"valid_targets_min": 1984
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.62246963562753,
|
||
|
|
"grad_norm": 0.032734923983451625,
|
||
|
|
"learning_rate": 4.5631839025393694e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.798870188300498e-05,
|
||
|
|
"step": 1855,
|
||
|
|
"valid_targets_mean": 3331.8,
|
||
|
|
"valid_targets_min": 1915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.637651821862348,
|
||
|
|
"grad_norm": 0.03265793052611035,
|
||
|
|
"learning_rate": 4.467546816660433e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.756021998124197e-05,
|
||
|
|
"step": 1860,
|
||
|
|
"valid_targets_mean": 4192.2,
|
||
|
|
"valid_targets_min": 2041
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.652834008097166,
|
||
|
|
"grad_norm": 0.0360254785897426,
|
||
|
|
"learning_rate": 4.372796414558665e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.503755427431315e-05,
|
||
|
|
"step": 1865,
|
||
|
|
"valid_targets_mean": 3470.7,
|
||
|
|
"valid_targets_min": 2032
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.668016194331984,
|
||
|
|
"grad_norm": 0.07392537329773184,
|
||
|
|
"learning_rate": 4.278938105144255e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.83280299603939e-05,
|
||
|
|
"step": 1870,
|
||
|
|
"valid_targets_mean": 2848.8,
|
||
|
|
"valid_targets_min": 1959
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.683198380566802,
|
||
|
|
"grad_norm": 0.01737738334063054,
|
||
|
|
"learning_rate": 4.185977246401509e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.6896169553510845e-05,
|
||
|
|
"step": 1875,
|
||
|
|
"valid_targets_mean": 3211.2,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.698380566801619,
|
||
|
|
"grad_norm": 0.03164550137477946,
|
||
|
|
"learning_rate": 4.093919145082959e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00011084324796684086,
|
||
|
|
"step": 1880,
|
||
|
|
"valid_targets_mean": 5124.9,
|
||
|
|
"valid_targets_min": 1966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.713562753036437,
|
||
|
|
"grad_norm": 0.05549355618306573,
|
||
|
|
"learning_rate": 4.002769056406453e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.738938777241856e-05,
|
||
|
|
"step": 1885,
|
||
|
|
"valid_targets_mean": 3033.4,
|
||
|
|
"valid_targets_min": 2054
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.728744939271255,
|
||
|
|
"grad_norm": 0.025539998050962274,
|
||
|
|
"learning_rate": 3.912532183755115e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.841106318868697e-05,
|
||
|
|
"step": 1890,
|
||
|
|
"valid_targets_mean": 5370.5,
|
||
|
|
"valid_targets_min": 1763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.743927125506072,
|
||
|
|
"grad_norm": 0.04206476138717903,
|
||
|
|
"learning_rate": 3.82321367838034e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.680234896019101e-05,
|
||
|
|
"step": 1895,
|
||
|
|
"valid_targets_mean": 4678.3,
|
||
|
|
"valid_targets_min": 1912
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7591093117408905,
|
||
|
|
"grad_norm": 0.029381929361535344,
|
||
|
|
"learning_rate": 3.734818639107709e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.053813897073269e-05,
|
||
|
|
"step": 1900,
|
||
|
|
"valid_targets_mean": 2871.2,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7742914979757085,
|
||
|
|
"grad_norm": 0.040540688999014994,
|
||
|
|
"learning_rate": 3.647352112045943e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.305185524979606e-05,
|
||
|
|
"step": 1905,
|
||
|
|
"valid_targets_mean": 3330.4,
|
||
|
|
"valid_targets_min": 1884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7894736842105265,
|
||
|
|
"grad_norm": 0.018760510191600844,
|
||
|
|
"learning_rate": 3.560819090298808e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.034878631704487e-05,
|
||
|
|
"step": 1910,
|
||
|
|
"valid_targets_mean": 4684.8,
|
||
|
|
"valid_targets_min": 1928
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.804655870445345,
|
||
|
|
"grad_norm": 0.019796489844057102,
|
||
|
|
"learning_rate": 3.4752245136801065e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.202718941494823e-05,
|
||
|
|
"step": 1915,
|
||
|
|
"valid_targets_mean": 4422.4,
|
||
|
|
"valid_targets_min": 1908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.819838056680162,
|
||
|
|
"grad_norm": 0.0190551832438293,
|
||
|
|
"learning_rate": 3.3905732684316626e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.0186776206828654e-05,
|
||
|
|
"step": 1920,
|
||
|
|
"valid_targets_mean": 3264.5,
|
||
|
|
"valid_targets_min": 1517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.83502024291498,
|
||
|
|
"grad_norm": 0.02310926695469447,
|
||
|
|
"learning_rate": 3.3068701869444177e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.752004264853895e-05,
|
||
|
|
"step": 1925,
|
||
|
|
"valid_targets_mean": 2449.7,
|
||
|
|
"valid_targets_min": 1709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.850202429149798,
|
||
|
|
"grad_norm": 0.03007096324588529,
|
||
|
|
"learning_rate": 3.2241200474825307e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00010428763926029205,
|
||
|
|
"step": 1930,
|
||
|
|
"valid_targets_mean": 3271.9,
|
||
|
|
"valid_targets_min": 1948
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.865384615384615,
|
||
|
|
"grad_norm": 0.024156543923176104,
|
||
|
|
"learning_rate": 3.1423275739106353e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.422754424624145e-05,
|
||
|
|
"step": 1935,
|
||
|
|
"valid_targets_mean": 3200.3,
|
||
|
|
"valid_targets_min": 1937
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.880566801619433,
|
||
|
|
"grad_norm": 0.020579879432511442,
|
||
|
|
"learning_rate": 3.0614974354241547e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.473738176282495e-05,
|
||
|
|
"step": 1940,
|
||
|
|
"valid_targets_mean": 4438.5,
|
||
|
|
"valid_targets_min": 1908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.895748987854251,
|
||
|
|
"grad_norm": 0.033165617768540726,
|
||
|
|
"learning_rate": 2.9816342462827806e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.497471233364195e-05,
|
||
|
|
"step": 1945,
|
||
|
|
"valid_targets_mean": 3977.7,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.910931174089069,
|
||
|
|
"grad_norm": 0.02991343037345141,
|
||
|
|
"learning_rate": 2.9027425655470366e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.823180592618883e-05,
|
||
|
|
"step": 1950,
|
||
|
|
"valid_targets_mean": 3949.1,
|
||
|
|
"valid_targets_min": 2063
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.926113360323887,
|
||
|
|
"grad_norm": 0.026930227849481903,
|
||
|
|
"learning_rate": 2.824826896818036e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.759446791373193e-05,
|
||
|
|
"step": 1955,
|
||
|
|
"valid_targets_mean": 2829.4,
|
||
|
|
"valid_targets_min": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.941295546558704,
|
||
|
|
"grad_norm": 0.014986535617824063,
|
||
|
|
"learning_rate": 2.747891687980384e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.503857391886413e-05,
|
||
|
|
"step": 1960,
|
||
|
|
"valid_targets_mean": 3944.6,
|
||
|
|
"valid_targets_min": 2117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.956477732793522,
|
||
|
|
"grad_norm": 0.021613336947099587,
|
||
|
|
"learning_rate": 2.6719413309482843e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.343465454643592e-05,
|
||
|
|
"step": 1965,
|
||
|
|
"valid_targets_mean": 4094.9,
|
||
|
|
"valid_targets_min": 2032
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.97165991902834,
|
||
|
|
"grad_norm": 0.020689572014764804,
|
||
|
|
"learning_rate": 2.5969801614147838e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.5728814913891256e-05,
|
||
|
|
"step": 1970,
|
||
|
|
"valid_targets_mean": 4200.1,
|
||
|
|
"valid_targets_min": 1904
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.9868421052631575,
|
||
|
|
"grad_norm": 0.021252210412381328,
|
||
|
|
"learning_rate": 2.5230124586043016e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.904879748821259e-05,
|
||
|
|
"step": 1975,
|
||
|
|
"valid_targets_mean": 4169.0,
|
||
|
|
"valid_targets_min": 1802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0,
|
||
|
|
"grad_norm": 0.04432635666522882,
|
||
|
|
"learning_rate": 2.45004244502834e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00020038039656355977,
|
||
|
|
"step": 1980,
|
||
|
|
"valid_targets_mean": 3857.6,
|
||
|
|
"valid_targets_min": 1918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.015182186234818,
|
||
|
|
"grad_norm": 0.01975277574955162,
|
||
|
|
"learning_rate": 2.3780742862444205e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.235813922714442e-05,
|
||
|
|
"step": 1985,
|
||
|
|
"valid_targets_mean": 3944.0,
|
||
|
|
"valid_targets_min": 1705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.030364372469635,
|
||
|
|
"grad_norm": 0.020110708432130284,
|
||
|
|
"learning_rate": 2.3071120906183064e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.9585167289478704e-05,
|
||
|
|
"step": 1990,
|
||
|
|
"valid_targets_mean": 3802.8,
|
||
|
|
"valid_targets_min": 2063
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.045546558704453,
|
||
|
|
"grad_norm": 0.02583283592335947,
|
||
|
|
"learning_rate": 2.237159909089468e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.7057870688149706e-05,
|
||
|
|
"step": 1995,
|
||
|
|
"valid_targets_mean": 3118.0,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.060728744939271,
|
||
|
|
"grad_norm": 0.018444254625750108,
|
||
|
|
"learning_rate": 2.168221734939824e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.08332406450063e-05,
|
||
|
|
"step": 2000,
|
||
|
|
"valid_targets_mean": 3951.7,
|
||
|
|
"valid_targets_min": 2027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.075910931174089,
|
||
|
|
"grad_norm": 0.021598760579964522,
|
||
|
|
"learning_rate": 2.1003015035658024e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.67504534451291e-05,
|
||
|
|
"step": 2005,
|
||
|
|
"valid_targets_mean": 3864.8,
|
||
|
|
"valid_targets_min": 1937
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0910931174089065,
|
||
|
|
"grad_norm": 0.012481559526226376,
|
||
|
|
"learning_rate": 2.0334030922536606e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.6400862629525363e-05,
|
||
|
|
"step": 2010,
|
||
|
|
"valid_targets_mean": 4514.0,
|
||
|
|
"valid_targets_min": 1652
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.1062753036437245,
|
||
|
|
"grad_norm": 0.015753419182275418,
|
||
|
|
"learning_rate": 1.9675303199581554e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.7796263970667496e-05,
|
||
|
|
"step": 2015,
|
||
|
|
"valid_targets_mean": 3784.6,
|
||
|
|
"valid_targets_min": 1889
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.1214574898785425,
|
||
|
|
"grad_norm": 0.018773015200713696,
|
||
|
|
"learning_rate": 1.90268694708454e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.000359669793397e-05,
|
||
|
|
"step": 2020,
|
||
|
|
"valid_targets_mean": 4500.7,
|
||
|
|
"valid_targets_min": 2030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.136639676113361,
|
||
|
|
"grad_norm": 0.013912432501782385,
|
||
|
|
"learning_rate": 1.8388766752739017e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.690220157499425e-05,
|
||
|
|
"step": 2025,
|
||
|
|
"valid_targets_mean": 4143.6,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.151821862348178,
|
||
|
|
"grad_norm": 0.012875356411892577,
|
||
|
|
"learning_rate": 1.77610314719183e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.50392251170706e-05,
|
||
|
|
"step": 2030,
|
||
|
|
"valid_targets_mean": 4672.8,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.167004048582996,
|
||
|
|
"grad_norm": 0.01844243244247078,
|
||
|
|
"learning_rate": 1.7143699463204932e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.02084687165916e-05,
|
||
|
|
"step": 2035,
|
||
|
|
"valid_targets_mean": 5044.8,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.182186234817814,
|
||
|
|
"grad_norm": 0.016744112513923014,
|
||
|
|
"learning_rate": 1.6536805967540614e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.032545934431255e-05,
|
||
|
|
"step": 2040,
|
||
|
|
"valid_targets_mean": 4323.5,
|
||
|
|
"valid_targets_min": 2014
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.197368421052632,
|
||
|
|
"grad_norm": 0.017024046540539985,
|
||
|
|
"learning_rate": 1.5940385629975353e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.6848457714077085e-05,
|
||
|
|
"step": 2045,
|
||
|
|
"valid_targets_mean": 3450.3,
|
||
|
|
"valid_targets_min": 1959
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.212550607287449,
|
||
|
|
"grad_norm": 0.023141967689412814,
|
||
|
|
"learning_rate": 1.535447249768971e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 8.416399941779673e-05,
|
||
|
|
"step": 2050,
|
||
|
|
"valid_targets_mean": 3900.4,
|
||
|
|
"valid_targets_min": 1887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.227732793522267,
|
||
|
|
"grad_norm": 0.02329821457809859,
|
||
|
|
"learning_rate": 1.4779100018051118e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.812147669028491e-05,
|
||
|
|
"step": 2055,
|
||
|
|
"valid_targets_mean": 3131.1,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.242914979757085,
|
||
|
|
"grad_norm": 0.020849720868509947,
|
||
|
|
"learning_rate": 1.421430103670456e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.93707586126402e-05,
|
||
|
|
"step": 2060,
|
||
|
|
"valid_targets_mean": 3381.0,
|
||
|
|
"valid_targets_min": 1751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.258097165991903,
|
||
|
|
"grad_norm": 0.015890730879732272,
|
||
|
|
"learning_rate": 1.366010779569764e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.6655844673514366e-05,
|
||
|
|
"step": 2065,
|
||
|
|
"valid_targets_mean": 3341.5,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.27327935222672,
|
||
|
|
"grad_norm": 0.018573044001283956,
|
||
|
|
"learning_rate": 1.3116551931639899e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 7.622496923431754e-05,
|
||
|
|
"step": 2070,
|
||
|
|
"valid_targets_mean": 3206.4,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.288461538461538,
|
||
|
|
"grad_norm": 0.019084154033437873,
|
||
|
|
"learning_rate": 1.258366447389674e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.174696318339556e-05,
|
||
|
|
"step": 2075,
|
||
|
|
"valid_targets_mean": 3648.1,
|
||
|
|
"valid_targets_min": 1802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.303643724696356,
|
||
|
|
"grad_norm": 0.018759893652474203,
|
||
|
|
"learning_rate": 1.2061475842818337e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.5286811402766034e-05,
|
||
|
|
"step": 2080,
|
||
|
|
"valid_targets_mean": 3706.3,
|
||
|
|
"valid_targets_min": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.318825910931174,
|
||
|
|
"grad_norm": 0.017504026291277903,
|
||
|
|
"learning_rate": 1.1550015848002816e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.433221329236403e-05,
|
||
|
|
"step": 2085,
|
||
|
|
"valid_targets_mean": 3627.9,
|
||
|
|
"valid_targets_min": 1764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.334008097165992,
|
||
|
|
"grad_norm": 0.015890358687878792,
|
||
|
|
"learning_rate": 1.1049313686594675e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.451548349810764e-05,
|
||
|
|
"step": 2090,
|
||
|
|
"valid_targets_mean": 3145.2,
|
||
|
|
"valid_targets_min": 1977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.34919028340081,
|
||
|
|
"grad_norm": 0.014715742472841829,
|
||
|
|
"learning_rate": 1.0559397941618022e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.490584979066625e-05,
|
||
|
|
"step": 2095,
|
||
|
|
"valid_targets_mean": 3908.9,
|
||
|
|
"valid_targets_min": 2150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.364372469635628,
|
||
|
|
"grad_norm": 0.016704008750308624,
|
||
|
|
"learning_rate": 1.008029658034484e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.01592255407013e-05,
|
||
|
|
"step": 2100,
|
||
|
|
"valid_targets_mean": 3532.7,
|
||
|
|
"valid_targets_min": 1887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.379554655870446,
|
||
|
|
"grad_norm": 0.019049019436133078,
|
||
|
|
"learning_rate": 9.61203695269859e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.503716263570823e-05,
|
||
|
|
"step": 2105,
|
||
|
|
"valid_targets_mean": 3629.6,
|
||
|
|
"valid_targets_min": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.394736842105263,
|
||
|
|
"grad_norm": 0.016236390707226954,
|
||
|
|
"learning_rate": 9.154645789692718e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.1050959629938e-05,
|
||
|
|
"step": 2110,
|
||
|
|
"valid_targets_mean": 3881.4,
|
||
|
|
"valid_targets_min": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.409919028340081,
|
||
|
|
"grad_norm": 0.015776122254341156,
|
||
|
|
"learning_rate": 8.708149201904814e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.307747076381929e-05,
|
||
|
|
"step": 2115,
|
||
|
|
"valid_targets_mean": 3591.7,
|
||
|
|
"valid_targets_min": 1948
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.425101214574899,
|
||
|
|
"grad_norm": 0.018818329670249184,
|
||
|
|
"learning_rate": 8.272572677986001e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.5877233535284176e-05,
|
||
|
|
"step": 2120,
|
||
|
|
"valid_targets_mean": 4000.3,
|
||
|
|
"valid_targets_min": 2177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.440283400809717,
|
||
|
|
"grad_norm": 0.020158590036148442,
|
||
|
|
"learning_rate": 7.847941083206057e-07,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.8522524593863636e-05,
|
||
|
|
"step": 2125,
|
||
|
|
"valid_targets_mean": 4300.4,
|
||
|
|
"valid_targets_min": 1933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.455465587044534,
|
||
|
|
"grad_norm": 0.014896978888022024,
|
||
|
|
"learning_rate": 7.4342786580337e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.4126638133311644e-05,
|
||
|
|
"step": 2130,
|
||
|
|
"valid_targets_mean": 3224.0,
|
||
|
|
"valid_targets_min": 1958
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.470647773279352,
|
||
|
|
"grad_norm": 0.014839706777052217,
|
||
|
|
"learning_rate": 7.031609016753016e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.7936653572833166e-05,
|
||
|
|
"step": 2135,
|
||
|
|
"valid_targets_mean": 4046.5,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.48582995951417,
|
||
|
|
"grad_norm": 0.009215617413266505,
|
||
|
|
"learning_rate": 6.639955146115284e-07,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 3.9694281440461054e-05,
|
||
|
|
"step": 2140,
|
||
|
|
"valid_targets_mean": 4908.4,
|
||
|
|
"valid_targets_min": 1361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.501012145748988,
|
||
|
|
"grad_norm": 0.011920668123484768,
|
||
|
|
"learning_rate": 6.259339404026876e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.9164224037667736e-05,
|
||
|
|
"step": 2145,
|
||
|
|
"valid_targets_mean": 4520.5,
|
||
|
|
"valid_targets_min": 1517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.516194331983805,
|
||
|
|
"grad_norm": 0.026730651850193136,
|
||
|
|
"learning_rate": 5.889783518272785e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.978342960588634e-05,
|
||
|
|
"step": 2150,
|
||
|
|
"valid_targets_mean": 3326.0,
|
||
|
|
"valid_targets_min": 1980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.531376518218623,
|
||
|
|
"grad_norm": 0.014336502341104639,
|
||
|
|
"learning_rate": 5.53130858527644e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.432576628867537e-05,
|
||
|
|
"step": 2155,
|
||
|
|
"valid_targets_mean": 3821.7,
|
||
|
|
"valid_targets_min": 2034
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5465587044534415,
|
||
|
|
"grad_norm": 0.017577023688440895,
|
||
|
|
"learning_rate": 5.183935068895207e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.009461165172979e-05,
|
||
|
|
"step": 2160,
|
||
|
|
"valid_targets_mean": 3329.7,
|
||
|
|
"valid_targets_min": 1960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5617408906882595,
|
||
|
|
"grad_norm": 0.020541542988354578,
|
||
|
|
"learning_rate": 4.847682799252474e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.37130651739426e-05,
|
||
|
|
"step": 2165,
|
||
|
|
"valid_targets_mean": 4126.9,
|
||
|
|
"valid_targets_min": 2010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.576923076923077,
|
||
|
|
"grad_norm": 0.020240683045467427,
|
||
|
|
"learning_rate": 4.522570971605289e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.9133006288902834e-05,
|
||
|
|
"step": 2170,
|
||
|
|
"valid_targets_mean": 3881.9,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.592105263157895,
|
||
|
|
"grad_norm": 0.01514460608680531,
|
||
|
|
"learning_rate": 4.208618145248866e-07,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.101994272787124e-05,
|
||
|
|
"step": 2175,
|
||
|
|
"valid_targets_mean": 3571.2,
|
||
|
|
"valid_targets_min": 1614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.607287449392713,
|
||
|
|
"grad_norm": 0.017177357782435697,
|
||
|
|
"learning_rate": 3.9058422424568923e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.712420923169702e-05,
|
||
|
|
"step": 2180,
|
||
|
|
"valid_targets_mean": 3596.5,
|
||
|
|
"valid_targets_min": 1976
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.62246963562753,
|
||
|
|
"grad_norm": 0.013795022267324942,
|
||
|
|
"learning_rate": 3.614260547458659e-07,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.151079701841809e-05,
|
||
|
|
"step": 2185,
|
||
|
|
"valid_targets_mean": 3791.7,
|
||
|
|
"valid_targets_min": 1995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.637651821862348,
|
||
|
|
"grad_norm": 0.014143652777548072,
|
||
|
|
"learning_rate": 3.3338897054521205e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.267373853712343e-05,
|
||
|
|
"step": 2190,
|
||
|
|
"valid_targets_mean": 3306.9,
|
||
|
|
"valid_targets_min": 1913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.652834008097166,
|
||
|
|
"grad_norm": 0.013931734378664924,
|
||
|
|
"learning_rate": 3.0647457216538724e-07,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.983153485227376e-05,
|
||
|
|
"step": 2195,
|
||
|
|
"valid_targets_mean": 3615.7,
|
||
|
|
"valid_targets_min": 1889
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.668016194331984,
|
||
|
|
"grad_norm": 0.015225097461670592,
|
||
|
|
"learning_rate": 2.8068439603853747e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.3125491831451654e-05,
|
||
|
|
"step": 2200,
|
||
|
|
"valid_targets_mean": 3073.3,
|
||
|
|
"valid_targets_min": 1614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.683198380566802,
|
||
|
|
"grad_norm": 0.01410917768851612,
|
||
|
|
"learning_rate": 2.5601991441959407e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.7947123675839975e-05,
|
||
|
|
"step": 2205,
|
||
|
|
"valid_targets_mean": 4357.2,
|
||
|
|
"valid_targets_min": 1931
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.698380566801619,
|
||
|
|
"grad_norm": 0.015979999905700506,
|
||
|
|
"learning_rate": 2.3248253530222753e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.526039265329018e-05,
|
||
|
|
"step": 2210,
|
||
|
|
"valid_targets_mean": 3673.5,
|
||
|
|
"valid_targets_min": 1951
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.713562753036437,
|
||
|
|
"grad_norm": 0.015617750908073543,
|
||
|
|
"learning_rate": 2.1007360233846308e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.9317451814422384e-05,
|
||
|
|
"step": 2215,
|
||
|
|
"valid_targets_mean": 2995.4,
|
||
|
|
"valid_targets_min": 1933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.728744939271255,
|
||
|
|
"grad_norm": 0.015979790105217384,
|
||
|
|
"learning_rate": 1.8879439476198636e-07,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.4595937652047724e-05,
|
||
|
|
"step": 2220,
|
||
|
|
"valid_targets_mean": 4247.5,
|
||
|
|
"valid_targets_min": 1991
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.743927125506072,
|
||
|
|
"grad_norm": 0.01645260501929592,
|
||
|
|
"learning_rate": 1.6864612731511298e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.2429153583943844e-05,
|
||
|
|
"step": 2225,
|
||
|
|
"valid_targets_mean": 4480.4,
|
||
|
|
"valid_targets_min": 1951
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7591093117408905,
|
||
|
|
"grad_norm": 0.01944942659138858,
|
||
|
|
"learning_rate": 1.4962995017944626e-07,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.788427708670497e-05,
|
||
|
|
"step": 2230,
|
||
|
|
"valid_targets_mean": 2626.6,
|
||
|
|
"valid_targets_min": 1775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7742914979757085,
|
||
|
|
"grad_norm": 0.01742806763308914,
|
||
|
|
"learning_rate": 1.3174694891021188e-07,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.9685724661685526e-05,
|
||
|
|
"step": 2235,
|
||
|
|
"valid_targets_mean": 4857.9,
|
||
|
|
"valid_targets_min": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7894736842105265,
|
||
|
|
"grad_norm": 0.023094080709401707,
|
||
|
|
"learning_rate": 1.1499814437429869e-07,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 3.7925092328805476e-05,
|
||
|
|
"step": 2240,
|
||
|
|
"valid_targets_mean": 5961.4,
|
||
|
|
"valid_targets_min": 2090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.804655870445345,
|
||
|
|
"grad_norm": 0.017307208474793542,
|
||
|
|
"learning_rate": 9.938449269197181e-08,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.75071933073923e-05,
|
||
|
|
"step": 2245,
|
||
|
|
"valid_targets_mean": 3003.5,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.819838056680162,
|
||
|
|
"grad_norm": 0.011662518896251615,
|
||
|
|
"learning_rate": 8.490688518229651e-08,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.439153417479247e-05,
|
||
|
|
"step": 2250,
|
||
|
|
"valid_targets_mean": 4203.2,
|
||
|
|
"valid_targets_min": 1987
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.83502024291498,
|
||
|
|
"grad_norm": 0.01475787481920341,
|
||
|
|
"learning_rate": 7.156614831225428e-08,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.45772782061249e-05,
|
||
|
|
"step": 2255,
|
||
|
|
"valid_targets_mean": 4648.8,
|
||
|
|
"valid_targets_min": 1877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.850202429149798,
|
||
|
|
"grad_norm": 0.015715101816166085,
|
||
|
|
"learning_rate": 5.936304364956513e-08,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.87802826683037e-05,
|
||
|
|
"step": 2260,
|
||
|
|
"valid_targets_mean": 4547.4,
|
||
|
|
"valid_targets_min": 1964
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.865384615384615,
|
||
|
|
"grad_norm": 0.014800595262351267,
|
||
|
|
"learning_rate": 4.829826781921343e-08,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.313785029808059e-05,
|
||
|
|
"step": 2265,
|
||
|
|
"valid_targets_mean": 4174.7,
|
||
|
|
"valid_targets_min": 1857
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.880566801619433,
|
||
|
|
"grad_norm": 0.014883265746700266,
|
||
|
|
"learning_rate": 3.837245246367749e-08,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.947238969383761e-05,
|
||
|
|
"step": 2270,
|
||
|
|
"valid_targets_mean": 4516.9,
|
||
|
|
"valid_targets_min": 1877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.895748987854251,
|
||
|
|
"grad_norm": 0.01699094794420779,
|
||
|
|
"learning_rate": 2.958616420687177e-08,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.968399298377335e-05,
|
||
|
|
"step": 2275,
|
||
|
|
"valid_targets_mean": 3589.5,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.910931174089069,
|
||
|
|
"grad_norm": 0.015491641111548788,
|
||
|
|
"learning_rate": 2.1939904621806062e-08,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.265330011956394e-05,
|
||
|
|
"step": 2280,
|
||
|
|
"valid_targets_mean": 3934.0,
|
||
|
|
"valid_targets_min": 1764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.926113360323887,
|
||
|
|
"grad_norm": 0.01660058990889945,
|
||
|
|
"learning_rate": 1.5434110201946184e-08,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.100296013755724e-05,
|
||
|
|
"step": 2285,
|
||
|
|
"valid_targets_mean": 3816.3,
|
||
|
|
"valid_targets_min": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.941295546558704,
|
||
|
|
"grad_norm": 0.014706278447656446,
|
||
|
|
"learning_rate": 1.006915233629835e-08,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 4.412983253132552e-05,
|
||
|
|
"step": 2290,
|
||
|
|
"valid_targets_mean": 4423.5,
|
||
|
|
"valid_targets_min": 1983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.956477732793522,
|
||
|
|
"grad_norm": 0.01620369617513515,
|
||
|
|
"learning_rate": 5.845337288210573e-09,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 5.237687946646474e-05,
|
||
|
|
"step": 2295,
|
||
|
|
"valid_targets_mean": 3163.2,
|
||
|
|
"valid_targets_min": 2034
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.97165991902834,
|
||
|
|
"grad_norm": 0.017752424073866235,
|
||
|
|
"learning_rate": 2.7629061778866597e-09,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.215991743374616e-05,
|
||
|
|
"step": 2300,
|
||
|
|
"valid_targets_mean": 3215.0,
|
||
|
|
"valid_targets_min": 1915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.9868421052631575,
|
||
|
|
"grad_norm": 0.01805554399084789,
|
||
|
|
"learning_rate": 8.220349686216545e-10,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.730953464284539e-05,
|
||
|
|
"step": 2305,
|
||
|
|
"valid_targets_mean": 3769.1,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"grad_norm": 0.033637522141938735,
|
||
|
|
"learning_rate": 2.2834456763209944e-11,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00018654789892025292,
|
||
|
|
"step": 2310,
|
||
|
|
"valid_targets_mean": 3597.5,
|
||
|
|
"valid_targets_min": 1775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"step": 2310,
|
||
|
|
"total_flos": 2.978084610098856e+18,
|
||
|
|
"train_loss": 0.0,
|
||
|
|
"train_runtime": 3.5487,
|
||
|
|
"train_samples_per_second": 62332.474,
|
||
|
|
"train_steps_per_second": 650.94
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 2310,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 300,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 2.978084610098856e+18,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|