1606 lines
44 KiB
JSON
1606 lines
44 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 714,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.049019607843137254,
|
||
|
|
"grad_norm": 14.845285962034723,
|
||
|
|
"learning_rate": 2.222222222222222e-06,
|
||
|
|
"loss": 0.517,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18225759267807007,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 4948.1,
|
||
|
|
"valid_targets_min": 1347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09803921568627451,
|
||
|
|
"grad_norm": 7.464976124277118,
|
||
|
|
"learning_rate": 5e-06,
|
||
|
|
"loss": 0.4481,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1431998908519745,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 5591.3,
|
||
|
|
"valid_targets_min": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14705882352941177,
|
||
|
|
"grad_norm": 1.813784772450264,
|
||
|
|
"learning_rate": 7.77777777777778e-06,
|
||
|
|
"loss": 0.3912,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13079722225666046,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 5941.9,
|
||
|
|
"valid_targets_min": 267
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19607843137254902,
|
||
|
|
"grad_norm": 0.9910305112535406,
|
||
|
|
"learning_rate": 1.0555555555555557e-05,
|
||
|
|
"loss": 0.3384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11258930712938309,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 6313.4,
|
||
|
|
"valid_targets_min": 2143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24509803921568626,
|
||
|
|
"grad_norm": 0.7992459746477515,
|
||
|
|
"learning_rate": 1.3333333333333333e-05,
|
||
|
|
"loss": 0.3123,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10164578258991241,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 5665.3,
|
||
|
|
"valid_targets_min": 2015
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29411764705882354,
|
||
|
|
"grad_norm": 0.47317240306192654,
|
||
|
|
"learning_rate": 1.6111111111111115e-05,
|
||
|
|
"loss": 0.2899,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0981433242559433,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 5768.9,
|
||
|
|
"valid_targets_min": 2274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3431372549019608,
|
||
|
|
"grad_norm": 0.3462324350621174,
|
||
|
|
"learning_rate": 1.888888888888889e-05,
|
||
|
|
"loss": 0.2846,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09364812076091766,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 5493.6,
|
||
|
|
"valid_targets_min": 2334
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39215686274509803,
|
||
|
|
"grad_norm": 0.3775315605954148,
|
||
|
|
"learning_rate": 2.1666666666666667e-05,
|
||
|
|
"loss": 0.259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07151000946760178,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 5538.6,
|
||
|
|
"valid_targets_min": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4411764705882353,
|
||
|
|
"grad_norm": 0.23962790083936536,
|
||
|
|
"learning_rate": 2.444444444444445e-05,
|
||
|
|
"loss": 0.2354,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07168256491422653,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 6081.1,
|
||
|
|
"valid_targets_min": 977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49019607843137253,
|
||
|
|
"grad_norm": 0.23549334781671427,
|
||
|
|
"learning_rate": 2.7222222222222226e-05,
|
||
|
|
"loss": 0.2383,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09021559357643127,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 5551.8,
|
||
|
|
"valid_targets_min": 983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5392156862745098,
|
||
|
|
"grad_norm": 0.2061785031390588,
|
||
|
|
"learning_rate": 3.0000000000000004e-05,
|
||
|
|
"loss": 0.2381,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07527650892734528,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 6129.4,
|
||
|
|
"valid_targets_min": 780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5882352941176471,
|
||
|
|
"grad_norm": 0.18844424836907367,
|
||
|
|
"learning_rate": 3.277777777777778e-05,
|
||
|
|
"loss": 0.2054,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07372777163982391,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 6069.8,
|
||
|
|
"valid_targets_min": 256
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6372549019607843,
|
||
|
|
"grad_norm": 0.17246412617765505,
|
||
|
|
"learning_rate": 3.555555555555555e-05,
|
||
|
|
"loss": 0.2053,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06430137157440186,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 7087.8,
|
||
|
|
"valid_targets_min": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6862745098039216,
|
||
|
|
"grad_norm": 0.19460946066944204,
|
||
|
|
"learning_rate": 3.833333333333334e-05,
|
||
|
|
"loss": 0.2089,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09138505160808563,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 6163.3,
|
||
|
|
"valid_targets_min": 1620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7352941176470589,
|
||
|
|
"grad_norm": 0.19479248546898312,
|
||
|
|
"learning_rate": 3.9999042174899045e-05,
|
||
|
|
"loss": 0.1986,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06743113696575165,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 5578.7,
|
||
|
|
"valid_targets_min": 598
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7843137254901961,
|
||
|
|
"grad_norm": 0.2114771866057741,
|
||
|
|
"learning_rate": 3.998826769609533e-05,
|
||
|
|
"loss": 0.1951,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.060611382126808167,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 6215.6,
|
||
|
|
"valid_targets_min": 1322
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8333333333333334,
|
||
|
|
"grad_norm": 0.1910136360390071,
|
||
|
|
"learning_rate": 3.996552792838517e-05,
|
||
|
|
"loss": 0.191,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06359056383371353,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 5589.2,
|
||
|
|
"valid_targets_min": 298
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8823529411764706,
|
||
|
|
"grad_norm": 0.19138505864440583,
|
||
|
|
"learning_rate": 3.993083648414832e-05,
|
||
|
|
"loss": 0.1949,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07189453393220901,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 5569.2,
|
||
|
|
"valid_targets_min": 751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9313725490196079,
|
||
|
|
"grad_norm": 0.1811083652569276,
|
||
|
|
"learning_rate": 3.988421413022457e-05,
|
||
|
|
"loss": 0.1702,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05381467193365097,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 6246.1,
|
||
|
|
"valid_targets_min": 858
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9803921568627451,
|
||
|
|
"grad_norm": 0.15647945413369116,
|
||
|
|
"learning_rate": 3.982568877548239e-05,
|
||
|
|
"loss": 0.1782,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.051519643515348434,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 6077.8,
|
||
|
|
"valid_targets_min": 367
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0294117647058822,
|
||
|
|
"grad_norm": 0.20486438301365698,
|
||
|
|
"learning_rate": 3.975529545411226e-05,
|
||
|
|
"loss": 0.1896,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05946924537420273,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 5859.4,
|
||
|
|
"valid_targets_min": 1382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0784313725490196,
|
||
|
|
"grad_norm": 0.17500723183778927,
|
||
|
|
"learning_rate": 3.967307630465466e-05,
|
||
|
|
"loss": 0.1748,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04872163012623787,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 5285.2,
|
||
|
|
"valid_targets_min": 1213
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1274509803921569,
|
||
|
|
"grad_norm": 0.17177261390298737,
|
||
|
|
"learning_rate": 3.957908054477526e-05,
|
||
|
|
"loss": 0.1658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.053148724138736725,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 6222.8,
|
||
|
|
"valid_targets_min": 1926
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1764705882352942,
|
||
|
|
"grad_norm": 0.18155686316968658,
|
||
|
|
"learning_rate": 3.9473364441802474e-05,
|
||
|
|
"loss": 0.1772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05831097811460495,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 5672.5,
|
||
|
|
"valid_targets_min": 1670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2254901960784315,
|
||
|
|
"grad_norm": 0.20691680495712458,
|
||
|
|
"learning_rate": 3.9355991279044965e-05,
|
||
|
|
"loss": 0.1645,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06711968034505844,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 5509.9,
|
||
|
|
"valid_targets_min": 323
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2745098039215685,
|
||
|
|
"grad_norm": 0.16953565767196252,
|
||
|
|
"learning_rate": 3.922703131790925e-05,
|
||
|
|
"loss": 0.1641,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0471009723842144,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 5594.2,
|
||
|
|
"valid_targets_min": 2155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3235294117647058,
|
||
|
|
"grad_norm": 0.19779493712452909,
|
||
|
|
"learning_rate": 3.9086561755840146e-05,
|
||
|
|
"loss": 0.161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05489179864525795,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 4934.2,
|
||
|
|
"valid_targets_min": 654
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3725490196078431,
|
||
|
|
"grad_norm": 0.20239218498545397,
|
||
|
|
"learning_rate": 3.893466668010915e-05,
|
||
|
|
"loss": 0.1673,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06282714009284973,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 6535.7,
|
||
|
|
"valid_targets_min": 1687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4215686274509804,
|
||
|
|
"grad_norm": 0.17377123597737723,
|
||
|
|
"learning_rate": 3.8771437017478526e-05,
|
||
|
|
"loss": 0.1668,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046998970210552216,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 5463.5,
|
||
|
|
"valid_targets_min": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4705882352941178,
|
||
|
|
"grad_norm": 0.17062719138426188,
|
||
|
|
"learning_rate": 3.859697047977108e-05,
|
||
|
|
"loss": 0.1635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05450977012515068,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 5535.4,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5196078431372548,
|
||
|
|
"grad_norm": 0.19369358984546386,
|
||
|
|
"learning_rate": 3.8411371505378356e-05,
|
||
|
|
"loss": 0.1614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05806870013475418,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 6097.0,
|
||
|
|
"valid_targets_min": 915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5686274509803921,
|
||
|
|
"grad_norm": 0.17606741825762348,
|
||
|
|
"learning_rate": 3.8214751196742224e-05,
|
||
|
|
"loss": 0.1763,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.049261607229709625,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 5752.1,
|
||
|
|
"valid_targets_min": 578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6176470588235294,
|
||
|
|
"grad_norm": 0.2117864198467642,
|
||
|
|
"learning_rate": 3.800722725384716e-05,
|
||
|
|
"loss": 0.1549,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05296482890844345,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 5688.4,
|
||
|
|
"valid_targets_min": 751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6666666666666665,
|
||
|
|
"grad_norm": 0.19606553816396718,
|
||
|
|
"learning_rate": 3.778892390376323e-05,
|
||
|
|
"loss": 0.1689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06805002689361572,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 5743.2,
|
||
|
|
"valid_targets_min": 2992
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.715686274509804,
|
||
|
|
"grad_norm": 0.2579865850635183,
|
||
|
|
"learning_rate": 3.755997182628185e-05,
|
||
|
|
"loss": 0.1671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.056646402925252914,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 6171.2,
|
||
|
|
"valid_targets_min": 227
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7647058823529411,
|
||
|
|
"grad_norm": 0.15784964961757458,
|
||
|
|
"learning_rate": 3.732050807568878e-05,
|
||
|
|
"loss": 0.1625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.047502197325229645,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 5991.4,
|
||
|
|
"valid_targets_min": 1963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8137254901960784,
|
||
|
|
"grad_norm": 0.20767144079577776,
|
||
|
|
"learning_rate": 3.707067599872131e-05,
|
||
|
|
"loss": 0.1665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06044068560004234,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 5098.8,
|
||
|
|
"valid_targets_min": 781
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8627450980392157,
|
||
|
|
"grad_norm": 0.1769277041382716,
|
||
|
|
"learning_rate": 3.681062514875868e-05,
|
||
|
|
"loss": 0.1603,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06034180149435997,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 5518.4,
|
||
|
|
"valid_targets_min": 297
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9117647058823528,
|
||
|
|
"grad_norm": 0.1873324059846003,
|
||
|
|
"learning_rate": 3.6540511196297084e-05,
|
||
|
|
"loss": 0.1657,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05551169440150261,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 4605.0,
|
||
|
|
"valid_targets_min": 1450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9607843137254903,
|
||
|
|
"grad_norm": 0.18605503901153028,
|
||
|
|
"learning_rate": 3.6260495835762865e-05,
|
||
|
|
"loss": 0.1581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04336617887020111,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 4599.4,
|
||
|
|
"valid_targets_min": 735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0098039215686274,
|
||
|
|
"grad_norm": 0.185744348058967,
|
||
|
|
"learning_rate": 3.597074668871972e-05,
|
||
|
|
"loss": 0.1637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.051525089889764786,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 6674.3,
|
||
|
|
"valid_targets_min": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0588235294117645,
|
||
|
|
"grad_norm": 0.1852768943197571,
|
||
|
|
"learning_rate": 3.567143720352786e-05,
|
||
|
|
"loss": 0.1538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03533821552991867,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 5553.1,
|
||
|
|
"valid_targets_min": 343
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.107843137254902,
|
||
|
|
"grad_norm": 0.1833870940799021,
|
||
|
|
"learning_rate": 3.536274655151502e-05,
|
||
|
|
"loss": 0.1526,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04637109115719795,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 5456.4,
|
||
|
|
"valid_targets_min": 894
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.156862745098039,
|
||
|
|
"grad_norm": 0.15680028488123737,
|
||
|
|
"learning_rate": 3.504485951972181e-05,
|
||
|
|
"loss": 0.1472,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03762596845626831,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 5934.5,
|
||
|
|
"valid_targets_min": 1149
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2058823529411766,
|
||
|
|
"grad_norm": 0.17181083479283146,
|
||
|
|
"learning_rate": 3.4717966400285215e-05,
|
||
|
|
"loss": 0.1542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.039286620914936066,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 6930.2,
|
||
|
|
"valid_targets_min": 1228
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2549019607843137,
|
||
|
|
"grad_norm": 0.19508797620673862,
|
||
|
|
"learning_rate": 3.4382262876526845e-05,
|
||
|
|
"loss": 0.1564,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06591647118330002,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 6494.8,
|
||
|
|
"valid_targets_min": 2458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.303921568627451,
|
||
|
|
"grad_norm": 0.19945856709801285,
|
||
|
|
"learning_rate": 3.403794990581377e-05,
|
||
|
|
"loss": 0.16,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05911063775420189,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 5858.9,
|
||
|
|
"valid_targets_min": 1700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3529411764705883,
|
||
|
|
"grad_norm": 0.20200795155761367,
|
||
|
|
"learning_rate": 3.368523359926234e-05,
|
||
|
|
"loss": 0.1492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05478104576468468,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 5153.1,
|
||
|
|
"valid_targets_min": 2065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4019607843137254,
|
||
|
|
"grad_norm": 0.19519853478938234,
|
||
|
|
"learning_rate": 3.332432509835687e-05,
|
||
|
|
"loss": 0.1549,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04512668773531914,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 5347.1,
|
||
|
|
"valid_targets_min": 2068
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.450980392156863,
|
||
|
|
"grad_norm": 0.18550082917348132,
|
||
|
|
"learning_rate": 3.2955440448556986e-05,
|
||
|
|
"loss": 0.1548,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05476547032594681,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 5342.8,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5,
|
||
|
|
"grad_norm": 0.17389768733214,
|
||
|
|
"learning_rate": 3.257880046996954e-05,
|
||
|
|
"loss": 0.1482,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.044103942811489105,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 5376.1,
|
||
|
|
"valid_targets_min": 188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.549019607843137,
|
||
|
|
"grad_norm": 0.18960557879045112,
|
||
|
|
"learning_rate": 3.219463062516218e-05,
|
||
|
|
"loss": 0.1472,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.044259026646614075,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 6247.7,
|
||
|
|
"valid_targets_min": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5980392156862746,
|
||
|
|
"grad_norm": 0.18057320660204854,
|
||
|
|
"learning_rate": 3.180316088419794e-05,
|
||
|
|
"loss": 0.1504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04895833134651184,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 5481.7,
|
||
|
|
"valid_targets_min": 682
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6470588235294117,
|
||
|
|
"grad_norm": 0.16850152487348163,
|
||
|
|
"learning_rate": 3.14046255869716e-05,
|
||
|
|
"loss": 0.153,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05481772869825363,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 5729.1,
|
||
|
|
"valid_targets_min": 1495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.696078431372549,
|
||
|
|
"grad_norm": 0.1972977593451679,
|
||
|
|
"learning_rate": 3.099926330293017e-05,
|
||
|
|
"loss": 0.1654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0604429729282856,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 4838.8,
|
||
|
|
"valid_targets_min": 1164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7450980392156863,
|
||
|
|
"grad_norm": 0.1779532546747506,
|
||
|
|
"learning_rate": 3.058731668826147e-05,
|
||
|
|
"loss": 0.1611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0506039559841156,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 5173.8,
|
||
|
|
"valid_targets_min": 966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7941176470588234,
|
||
|
|
"grad_norm": 0.16859117358751705,
|
||
|
|
"learning_rate": 3.0169032340636363e-05,
|
||
|
|
"loss": 0.1562,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06492990255355835,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 6798.8,
|
||
|
|
"valid_targets_min": 2298
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.843137254901961,
|
||
|
|
"grad_norm": 0.17047341359032425,
|
||
|
|
"learning_rate": 2.9744660651591544e-05,
|
||
|
|
"loss": 0.1611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04040013998746872,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 5656.5,
|
||
|
|
"valid_targets_min": 1994
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.892156862745098,
|
||
|
|
"grad_norm": 0.1691917059677243,
|
||
|
|
"learning_rate": 2.9314455656641275e-05,
|
||
|
|
"loss": 0.1442,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05659858137369156,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 6168.2,
|
||
|
|
"valid_targets_min": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9411764705882355,
|
||
|
|
"grad_norm": 0.18686162719495658,
|
||
|
|
"learning_rate": 2.8878674883207726e-05,
|
||
|
|
"loss": 0.1525,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05041201412677765,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 6669.2,
|
||
|
|
"valid_targets_min": 1463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9901960784313726,
|
||
|
|
"grad_norm": 0.17069829336335143,
|
||
|
|
"learning_rate": 2.8437579196461072e-05,
|
||
|
|
"loss": 0.1396,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.044972144067287445,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 6637.5,
|
||
|
|
"valid_targets_min": 969
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0392156862745097,
|
||
|
|
"grad_norm": 0.15999237456048723,
|
||
|
|
"learning_rate": 2.799143264316152e-05,
|
||
|
|
"loss": 0.148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0373958945274353,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 5989.3,
|
||
|
|
"valid_targets_min": 337
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.088235294117647,
|
||
|
|
"grad_norm": 0.1592160126728059,
|
||
|
|
"learning_rate": 2.7540502293596802e-05,
|
||
|
|
"loss": 0.1396,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04034284129738808,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 5127.5,
|
||
|
|
"valid_targets_min": 695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1372549019607843,
|
||
|
|
"grad_norm": 0.1769980777310915,
|
||
|
|
"learning_rate": 2.708505808170973e-05,
|
||
|
|
"loss": 0.1537,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.044640615582466125,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 5812.9,
|
||
|
|
"valid_targets_min": 277
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.186274509803922,
|
||
|
|
"grad_norm": 0.15257116830142609,
|
||
|
|
"learning_rate": 2.662537264351152e-05,
|
||
|
|
"loss": 0.1404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05242038518190384,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 5841.9,
|
||
|
|
"valid_targets_min": 983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.235294117647059,
|
||
|
|
"grad_norm": 0.16404975625042978,
|
||
|
|
"learning_rate": 2.6161721153877658e-05,
|
||
|
|
"loss": 0.1519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.048479050397872925,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 5749.8,
|
||
|
|
"valid_targets_min": 349
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.284313725490196,
|
||
|
|
"grad_norm": 0.16196200510111014,
|
||
|
|
"learning_rate": 2.5694381161823883e-05,
|
||
|
|
"loss": 0.1393,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05674157291650772,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 6188.4,
|
||
|
|
"valid_targets_min": 2150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3333333333333335,
|
||
|
|
"grad_norm": 0.1715568446927489,
|
||
|
|
"learning_rate": 2.522363242436102e-05,
|
||
|
|
"loss": 0.1461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0405210517346859,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 5543.5,
|
||
|
|
"valid_targets_min": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3823529411764706,
|
||
|
|
"grad_norm": 0.18625399468743448,
|
||
|
|
"learning_rate": 2.47497567390281e-05,
|
||
|
|
"loss": 0.1581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05725814402103424,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 6666.8,
|
||
|
|
"valid_targets_min": 1783
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.431372549019608,
|
||
|
|
"grad_norm": 0.16710741980403304,
|
||
|
|
"learning_rate": 2.4273037775203924e-05,
|
||
|
|
"loss": 0.1434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.047255054116249084,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 5879.6,
|
||
|
|
"valid_targets_min": 2429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.480392156862745,
|
||
|
|
"grad_norm": 0.18714960345056877,
|
||
|
|
"learning_rate": 2.3793760904298154e-05,
|
||
|
|
"loss": 0.1444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04799136519432068,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 5505.2,
|
||
|
|
"valid_targets_min": 178
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5294117647058822,
|
||
|
|
"grad_norm": 0.195485882837898,
|
||
|
|
"learning_rate": 2.3312213028923572e-05,
|
||
|
|
"loss": 0.1458,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05502048134803772,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 6203.2,
|
||
|
|
"valid_targets_min": 915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5784313725490198,
|
||
|
|
"grad_norm": 0.1977460248140161,
|
||
|
|
"learning_rate": 2.2828682411151634e-05,
|
||
|
|
"loss": 0.142,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04789289832115173,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 5283.4,
|
||
|
|
"valid_targets_min": 1751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.627450980392157,
|
||
|
|
"grad_norm": 0.17712548108134565,
|
||
|
|
"learning_rate": 2.2343458499954342e-05,
|
||
|
|
"loss": 0.1438,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05372612178325653,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 5116.2,
|
||
|
|
"valid_targets_min": 745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6764705882352944,
|
||
|
|
"grad_norm": 0.17787145700828857,
|
||
|
|
"learning_rate": 2.1856831757935563e-05,
|
||
|
|
"loss": 0.1459,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.052830249071121216,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 6221.8,
|
||
|
|
"valid_targets_min": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7254901960784315,
|
||
|
|
"grad_norm": 0.1403375945900038,
|
||
|
|
"learning_rate": 2.136909348745558e-05,
|
||
|
|
"loss": 0.1445,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05467274412512779,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 7338.7,
|
||
|
|
"valid_targets_min": 267
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7745098039215685,
|
||
|
|
"grad_norm": 0.17890476249652149,
|
||
|
|
"learning_rate": 2.0880535656252955e-05,
|
||
|
|
"loss": 0.1559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04823530465364456,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 5682.3,
|
||
|
|
"valid_targets_min": 1113
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8235294117647056,
|
||
|
|
"grad_norm": 0.1675345935224821,
|
||
|
|
"learning_rate": 2.0391450722668096e-05,
|
||
|
|
"loss": 0.1422,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04629891738295555,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 5964.7,
|
||
|
|
"valid_targets_min": 906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.872549019607843,
|
||
|
|
"grad_norm": 0.16849306980232315,
|
||
|
|
"learning_rate": 1.9902131460573106e-05,
|
||
|
|
"loss": 0.1412,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.045284684747457504,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 4922.6,
|
||
|
|
"valid_targets_min": 233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9215686274509802,
|
||
|
|
"grad_norm": 0.1684953006314063,
|
||
|
|
"learning_rate": 1.941287078411279e-05,
|
||
|
|
"loss": 0.1419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.053950127214193344,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 6029.8,
|
||
|
|
"valid_targets_min": 1074
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9705882352941178,
|
||
|
|
"grad_norm": 0.17222377503381056,
|
||
|
|
"learning_rate": 1.8923961572361688e-05,
|
||
|
|
"loss": 0.1448,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046407949179410934,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 6019.1,
|
||
|
|
"valid_targets_min": 293
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.019607843137255,
|
||
|
|
"grad_norm": 0.15984187499464808,
|
||
|
|
"learning_rate": 1.8435696494002076e-05,
|
||
|
|
"loss": 0.1359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04338710010051727,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 5504.8,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.068627450980392,
|
||
|
|
"grad_norm": 0.16765151368961928,
|
||
|
|
"learning_rate": 1.7948367832127934e-05,
|
||
|
|
"loss": 0.148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.051340095698833466,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 6667.2,
|
||
|
|
"valid_targets_min": 1060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.117647058823529,
|
||
|
|
"grad_norm": 0.15792491122980973,
|
||
|
|
"learning_rate": 1.7462267309279722e-05,
|
||
|
|
"loss": 0.1436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05365137755870819,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 6270.2,
|
||
|
|
"valid_targets_min": 1126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.166666666666667,
|
||
|
|
"grad_norm": 0.1692968124966608,
|
||
|
|
"learning_rate": 1.6977685912814723e-05,
|
||
|
|
"loss": 0.1386,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04327564314007759,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 4881.8,
|
||
|
|
"valid_targets_min": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.215686274509804,
|
||
|
|
"grad_norm": 0.16802626120360126,
|
||
|
|
"learning_rate": 1.649491372071745e-05,
|
||
|
|
"loss": 0.1348,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04639950394630432,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 6071.3,
|
||
|
|
"valid_targets_min": 1608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.264705882352941,
|
||
|
|
"grad_norm": 0.1739251489205305,
|
||
|
|
"learning_rate": 1.601423972795448e-05,
|
||
|
|
"loss": 0.1466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04997720196843147,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 5071.2,
|
||
|
|
"valid_targets_min": 1901
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.313725490196078,
|
||
|
|
"grad_norm": 0.16846274854669305,
|
||
|
|
"learning_rate": 1.5535951673477493e-05,
|
||
|
|
"loss": 0.1443,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.036386433988809586,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 6193.9,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.362745098039215,
|
||
|
|
"grad_norm": 0.17845219785128927,
|
||
|
|
"learning_rate": 1.5060335867978322e-05,
|
||
|
|
"loss": 0.1491,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.049667488783597946,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 5358.5,
|
||
|
|
"valid_targets_min": 906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.411764705882353,
|
||
|
|
"grad_norm": 0.16638712672136335,
|
||
|
|
"learning_rate": 1.4587677022498845e-05,
|
||
|
|
"loss": 0.1492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04850779101252556,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 6082.3,
|
||
|
|
"valid_targets_min": 1213
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.46078431372549,
|
||
|
|
"grad_norm": 0.1509181659878023,
|
||
|
|
"learning_rate": 1.4118258077998563e-05,
|
||
|
|
"loss": 0.1347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.037120066583156586,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 7179.8,
|
||
|
|
"valid_targets_min": 1754
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.509803921568627,
|
||
|
|
"grad_norm": 0.17919038805552429,
|
||
|
|
"learning_rate": 1.3652360035981657e-05,
|
||
|
|
"loss": 0.1352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04360215738415718,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 5704.8,
|
||
|
|
"valid_targets_min": 1074
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5588235294117645,
|
||
|
|
"grad_norm": 0.17444385735623882,
|
||
|
|
"learning_rate": 1.3190261790285202e-05,
|
||
|
|
"loss": 0.1537,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046840980648994446,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 6333.6,
|
||
|
|
"valid_targets_min": 990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.607843137254902,
|
||
|
|
"grad_norm": 0.17400376107735924,
|
||
|
|
"learning_rate": 1.2732239960128854e-05,
|
||
|
|
"loss": 0.1403,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04668301343917847,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 6577.3,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6568627450980395,
|
||
|
|
"grad_norm": 0.1642690410502086,
|
||
|
|
"learning_rate": 1.227856872452637e-05,
|
||
|
|
"loss": 0.1363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04167339950799942,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 6613.6,
|
||
|
|
"valid_targets_min": 1274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.705882352941177,
|
||
|
|
"grad_norm": 0.1665608183925791,
|
||
|
|
"learning_rate": 1.1829519658157706e-05,
|
||
|
|
"loss": 0.1349,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04697566106915474,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 5399.3,
|
||
|
|
"valid_targets_min": 343
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.754901960784314,
|
||
|
|
"grad_norm": 0.15109027745291384,
|
||
|
|
"learning_rate": 1.1385361568800205e-05,
|
||
|
|
"loss": 0.1409,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04550229385495186,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 7463.3,
|
||
|
|
"valid_targets_min": 2613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.803921568627451,
|
||
|
|
"grad_norm": 0.16283398624058726,
|
||
|
|
"learning_rate": 1.0946360336416041e-05,
|
||
|
|
"loss": 0.145,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04752221703529358,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 5754.3,
|
||
|
|
"valid_targets_min": 1679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.852941176470588,
|
||
|
|
"grad_norm": 0.15690476742993395,
|
||
|
|
"learning_rate": 1.0512778753992384e-05,
|
||
|
|
"loss": 0.1391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.050478532910346985,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 6343.4,
|
||
|
|
"valid_targets_min": 1164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.901960784313726,
|
||
|
|
"grad_norm": 0.1547493986508796,
|
||
|
|
"learning_rate": 1.0084876370229346e-05,
|
||
|
|
"loss": 0.1361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.037191685289144516,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 5394.4,
|
||
|
|
"valid_targets_min": 2151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.950980392156863,
|
||
|
|
"grad_norm": 0.1546433674602096,
|
||
|
|
"learning_rate": 9.662909334170119e-06,
|
||
|
|
"loss": 0.1398,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0633089691400528,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 6511.4,
|
||
|
|
"valid_targets_min": 2728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.14864264452615442,
|
||
|
|
"learning_rate": 9.247130241866162e-06,
|
||
|
|
"loss": 0.1293,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0435556136071682,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 6029.2,
|
||
|
|
"valid_targets_min": 826
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.049019607843137,
|
||
|
|
"grad_norm": 0.17351132595970922,
|
||
|
|
"learning_rate": 8.837787985169248e-06,
|
||
|
|
"loss": 0.1389,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04400845244526863,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 4938.3,
|
||
|
|
"valid_targets_min": 578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.098039215686274,
|
||
|
|
"grad_norm": 0.16525532968859072,
|
||
|
|
"learning_rate": 8.435127602740931e-06,
|
||
|
|
"loss": 0.136,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04021601378917694,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 4886.9,
|
||
|
|
"valid_targets_min": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.147058823529412,
|
||
|
|
"grad_norm": 0.17565072755854125,
|
||
|
|
"learning_rate": 8.03939013336857e-06,
|
||
|
|
"loss": 0.1486,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0686882734298706,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 6319.9,
|
||
|
|
"valid_targets_min": 2372
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.196078431372549,
|
||
|
|
"grad_norm": 0.16546346883164598,
|
||
|
|
"learning_rate": 7.650812471675752e-06,
|
||
|
|
"loss": 0.1418,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.048375021666288376,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 5439.3,
|
||
|
|
"valid_targets_min": 401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.245098039215686,
|
||
|
|
"grad_norm": 0.16730821511271163,
|
||
|
|
"learning_rate": 7.269627226313507e-06,
|
||
|
|
"loss": 0.1373,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.050091587007045746,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 5765.1,
|
||
|
|
"valid_targets_min": 942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.294117647058823,
|
||
|
|
"grad_norm": 0.15886005158900465,
|
||
|
|
"learning_rate": 6.896062580717056e-06,
|
||
|
|
"loss": 0.1333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.052998557686805725,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 5913.3,
|
||
|
|
"valid_targets_min": 1181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.3431372549019605,
|
||
|
|
"grad_norm": 0.18046601157388797,
|
||
|
|
"learning_rate": 6.5303421565117595e-06,
|
||
|
|
"loss": 0.1359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04307221621274948,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 4173.9,
|
||
|
|
"valid_targets_min": 651
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.392156862745098,
|
||
|
|
"grad_norm": 0.14559092733051696,
|
||
|
|
"learning_rate": 6.172684879649613e-06,
|
||
|
|
"loss": 0.1284,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04021957516670227,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 5618.3,
|
||
|
|
"valid_targets_min": 386
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4411764705882355,
|
||
|
|
"grad_norm": 0.15063235370677366,
|
||
|
|
"learning_rate": 5.82330484935685e-06,
|
||
|
|
"loss": 0.1361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05188259109854698,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 6229.9,
|
||
|
|
"valid_targets_min": 2368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.490196078431373,
|
||
|
|
"grad_norm": 0.15416766663654213,
|
||
|
|
"learning_rate": 5.482411209970742e-06,
|
||
|
|
"loss": 0.1324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04332681745290756,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 5592.3,
|
||
|
|
"valid_targets_min": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.53921568627451,
|
||
|
|
"grad_norm": 0.16407271932187248,
|
||
|
|
"learning_rate": 5.15020802574256e-06,
|
||
|
|
"loss": 0.1401,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.055699240416288376,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 5802.0,
|
||
|
|
"valid_targets_min": 776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.588235294117647,
|
||
|
|
"grad_norm": 0.16225757678922273,
|
||
|
|
"learning_rate": 4.8268941586815275e-06,
|
||
|
|
"loss": 0.1384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05296599119901657,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 5673.7,
|
||
|
|
"valid_targets_min": 1561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.637254901960784,
|
||
|
|
"grad_norm": 0.1783066013238609,
|
||
|
|
"learning_rate": 4.512663149512915e-06,
|
||
|
|
"loss": 0.1404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05072878301143646,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 4909.6,
|
||
|
|
"valid_targets_min": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.686274509803922,
|
||
|
|
"grad_norm": 0.1635156014972583,
|
||
|
|
"learning_rate": 4.207703101821547e-06,
|
||
|
|
"loss": 0.1452,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04394640773534775,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 6057.6,
|
||
|
|
"valid_targets_min": 724
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.735294117647059,
|
||
|
|
"grad_norm": 0.1530197110100291,
|
||
|
|
"learning_rate": 3.912196569450062e-06,
|
||
|
|
"loss": 0.1365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04203708469867706,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 6321.2,
|
||
|
|
"valid_targets_min": 332
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.784313725490196,
|
||
|
|
"grad_norm": 0.15884170707784306,
|
||
|
|
"learning_rate": 3.626320447219325e-06,
|
||
|
|
"loss": 0.1422,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04695093259215355,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 5222.4,
|
||
|
|
"valid_targets_min": 825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.833333333333333,
|
||
|
|
"grad_norm": 0.1630943874822207,
|
||
|
|
"learning_rate": 3.350245865036439e-06,
|
||
|
|
"loss": 0.1399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.043691154569387436,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 6610.5,
|
||
|
|
"valid_targets_min": 924
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.882352941176471,
|
||
|
|
"grad_norm": 0.14034024802523662,
|
||
|
|
"learning_rate": 3.0841380854536986e-06,
|
||
|
|
"loss": 0.1374,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03624898940324783,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 7330.9,
|
||
|
|
"valid_targets_min": 2730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.931372549019608,
|
||
|
|
"grad_norm": 0.14334753955970883,
|
||
|
|
"learning_rate": 2.828156404739879e-06,
|
||
|
|
"loss": 0.1353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.041707608848810196,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 6577.7,
|
||
|
|
"valid_targets_min": 977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.980392156862745,
|
||
|
|
"grad_norm": 0.14232337944292772,
|
||
|
|
"learning_rate": 2.5824540575229475e-06,
|
||
|
|
"loss": 0.1297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.033248208463191986,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 5936.8,
|
||
|
|
"valid_targets_min": 1856
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.029411764705882,
|
||
|
|
"grad_norm": 0.1528237861523893,
|
||
|
|
"learning_rate": 2.3471781250614932e-06,
|
||
|
|
"loss": 0.1409,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04537317529320717,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 5408.1,
|
||
|
|
"valid_targets_min": 2357
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.078431372549019,
|
||
|
|
"grad_norm": 0.15536078711039872,
|
||
|
|
"learning_rate": 2.122469447199529e-06,
|
||
|
|
"loss": 0.1444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04748551920056343,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 6326.7,
|
||
|
|
"valid_targets_min": 1518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.127450980392156,
|
||
|
|
"grad_norm": 0.15104775706307438,
|
||
|
|
"learning_rate": 1.908462538057607e-06,
|
||
|
|
"loss": 0.1356,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.044153109192848206,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 5396.5,
|
||
|
|
"valid_targets_min": 1338
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.176470588235294,
|
||
|
|
"grad_norm": 0.1532713977989426,
|
||
|
|
"learning_rate": 1.7052855055105477e-06,
|
||
|
|
"loss": 0.1403,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04733777046203613,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 6734.2,
|
||
|
|
"valid_targets_min": 2804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2254901960784315,
|
||
|
|
"grad_norm": 0.15270643758494024,
|
||
|
|
"learning_rate": 1.5130599745000663e-06,
|
||
|
|
"loss": 0.135,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.040537070482969284,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 5465.8,
|
||
|
|
"valid_targets_min": 1202
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2745098039215685,
|
||
|
|
"grad_norm": 0.1478234433976078,
|
||
|
|
"learning_rate": 1.331901014228192e-06,
|
||
|
|
"loss": 0.129,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05076032876968384,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 5307.6,
|
||
|
|
"valid_targets_min": 974
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.323529411764706,
|
||
|
|
"grad_norm": 0.14756205953677878,
|
||
|
|
"learning_rate": 1.161917069275047e-06,
|
||
|
|
"loss": 0.1436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04347331076860428,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 5066.8,
|
||
|
|
"valid_targets_min": 727
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.372549019607844,
|
||
|
|
"grad_norm": 0.15417081876853794,
|
||
|
|
"learning_rate": 1.0032098946822244e-06,
|
||
|
|
"loss": 0.1402,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.041828703135252,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 6169.8,
|
||
|
|
"valid_targets_min": 1408
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.421568627450981,
|
||
|
|
"grad_norm": 0.16730842517730218,
|
||
|
|
"learning_rate": 8.558744950406361e-07,
|
||
|
|
"loss": 0.1344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.052832819521427155,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 6030.4,
|
||
|
|
"valid_targets_min": 916
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.470588235294118,
|
||
|
|
"grad_norm": 0.14840505399338527,
|
||
|
|
"learning_rate": 7.199990676192836e-07,
|
||
|
|
"loss": 0.133,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.042389050126075745,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 6102.2,
|
||
|
|
"valid_targets_min": 1060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.519607843137255,
|
||
|
|
"grad_norm": 0.1464931099328798,
|
||
|
|
"learning_rate": 5.956649495689992e-07,
|
||
|
|
"loss": 0.1328,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.052474960684776306,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 5826.2,
|
||
|
|
"valid_targets_min": 1276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.568627450980392,
|
||
|
|
"grad_norm": 0.14130389260204995,
|
||
|
|
"learning_rate": 4.829465692327429e-07,
|
||
|
|
"loss": 0.1367,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04665312170982361,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 6207.3,
|
||
|
|
"valid_targets_min": 1636
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.617647058823529,
|
||
|
|
"grad_norm": 0.14337796217921622,
|
||
|
|
"learning_rate": 3.819114015916614e-07,
|
||
|
|
"loss": 0.1474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05262928083539009,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 6180.5,
|
||
|
|
"valid_targets_min": 1432
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.666666666666667,
|
||
|
|
"grad_norm": 0.14706066206331941,
|
||
|
|
"learning_rate": 2.9261992787347873e-07,
|
||
|
|
"loss": 0.1413,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04194498062133789,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 6359.8,
|
||
|
|
"valid_targets_min": 1625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.715686274509804,
|
||
|
|
"grad_norm": 0.14057051185988517,
|
||
|
|
"learning_rate": 2.151255993475254e-07,
|
||
|
|
"loss": 0.1398,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0460154190659523,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 6352.1,
|
||
|
|
"valid_targets_min": 1706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.764705882352941,
|
||
|
|
"grad_norm": 0.12402153861336833,
|
||
|
|
"learning_rate": 1.4947480532794489e-07,
|
||
|
|
"loss": 0.1311,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03990158438682556,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 7494.4,
|
||
|
|
"valid_targets_min": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.813725490196078,
|
||
|
|
"grad_norm": 0.1599205562983089,
|
||
|
|
"learning_rate": 9.570684540434638e-08,
|
||
|
|
"loss": 0.133,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05221497267484665,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 5782.2,
|
||
|
|
"valid_targets_min": 977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.862745098039216,
|
||
|
|
"grad_norm": 0.1479123937146945,
|
||
|
|
"learning_rate": 5.3853905916443347e-08,
|
||
|
|
"loss": 0.1307,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04318413883447647,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 5034.2,
|
||
|
|
"valid_targets_min": 1045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.911764705882353,
|
||
|
|
"grad_norm": 0.1465353782553542,
|
||
|
|
"learning_rate": 2.3941040686816796e-08,
|
||
|
|
"loss": 0.1387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.040560588240623474,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 6557.7,
|
||
|
|
"valid_targets_min": 2578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.96078431372549,
|
||
|
|
"grad_norm": 0.13650641166527105,
|
||
|
|
"learning_rate": 5.986156023303214e-09,
|
||
|
|
"loss": 0.1264,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.043502289801836014,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 6664.9,
|
||
|
|
"valid_targets_min": 1868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"step": 714,
|
||
|
|
"total_flos": 2.800887118061109e+18,
|
||
|
|
"train_loss": 0.0,
|
||
|
|
"train_runtime": 3.2172,
|
||
|
|
"train_samples_per_second": 21305.35,
|
||
|
|
"train_steps_per_second": 221.931
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 714,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 300,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 2.800887118061109e+18,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|