Files
sft__Kimi-2-5-inferredbugs-…/trainer_state.json

1606 lines
44 KiB
JSON
Raw Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 714,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.049019607843137254,
"grad_norm": 14.845285962034723,
"learning_rate": 2.222222222222222e-06,
"loss": 0.517,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18225759267807007,
"step": 5,
"valid_targets_mean": 4948.1,
"valid_targets_min": 1347
},
{
"epoch": 0.09803921568627451,
"grad_norm": 7.464976124277118,
"learning_rate": 5e-06,
"loss": 0.4481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1431998908519745,
"step": 10,
"valid_targets_mean": 5591.3,
"valid_targets_min": 680
},
{
"epoch": 0.14705882352941177,
"grad_norm": 1.813784772450264,
"learning_rate": 7.77777777777778e-06,
"loss": 0.3912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13079722225666046,
"step": 15,
"valid_targets_mean": 5941.9,
"valid_targets_min": 267
},
{
"epoch": 0.19607843137254902,
"grad_norm": 0.9910305112535406,
"learning_rate": 1.0555555555555557e-05,
"loss": 0.3384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11258930712938309,
"step": 20,
"valid_targets_mean": 6313.4,
"valid_targets_min": 2143
},
{
"epoch": 0.24509803921568626,
"grad_norm": 0.7992459746477515,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.3123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10164578258991241,
"step": 25,
"valid_targets_mean": 5665.3,
"valid_targets_min": 2015
},
{
"epoch": 0.29411764705882354,
"grad_norm": 0.47317240306192654,
"learning_rate": 1.6111111111111115e-05,
"loss": 0.2899,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0981433242559433,
"step": 30,
"valid_targets_mean": 5768.9,
"valid_targets_min": 2274
},
{
"epoch": 0.3431372549019608,
"grad_norm": 0.3462324350621174,
"learning_rate": 1.888888888888889e-05,
"loss": 0.2846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09364812076091766,
"step": 35,
"valid_targets_mean": 5493.6,
"valid_targets_min": 2334
},
{
"epoch": 0.39215686274509803,
"grad_norm": 0.3775315605954148,
"learning_rate": 2.1666666666666667e-05,
"loss": 0.259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07151000946760178,
"step": 40,
"valid_targets_mean": 5538.6,
"valid_targets_min": 489
},
{
"epoch": 0.4411764705882353,
"grad_norm": 0.23962790083936536,
"learning_rate": 2.444444444444445e-05,
"loss": 0.2354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07168256491422653,
"step": 45,
"valid_targets_mean": 6081.1,
"valid_targets_min": 977
},
{
"epoch": 0.49019607843137253,
"grad_norm": 0.23549334781671427,
"learning_rate": 2.7222222222222226e-05,
"loss": 0.2383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09021559357643127,
"step": 50,
"valid_targets_mean": 5551.8,
"valid_targets_min": 983
},
{
"epoch": 0.5392156862745098,
"grad_norm": 0.2061785031390588,
"learning_rate": 3.0000000000000004e-05,
"loss": 0.2381,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07527650892734528,
"step": 55,
"valid_targets_mean": 6129.4,
"valid_targets_min": 780
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.18844424836907367,
"learning_rate": 3.277777777777778e-05,
"loss": 0.2054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07372777163982391,
"step": 60,
"valid_targets_mean": 6069.8,
"valid_targets_min": 256
},
{
"epoch": 0.6372549019607843,
"grad_norm": 0.17246412617765505,
"learning_rate": 3.555555555555555e-05,
"loss": 0.2053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06430137157440186,
"step": 65,
"valid_targets_mean": 7087.8,
"valid_targets_min": 400
},
{
"epoch": 0.6862745098039216,
"grad_norm": 0.19460946066944204,
"learning_rate": 3.833333333333334e-05,
"loss": 0.2089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09138505160808563,
"step": 70,
"valid_targets_mean": 6163.3,
"valid_targets_min": 1620
},
{
"epoch": 0.7352941176470589,
"grad_norm": 0.19479248546898312,
"learning_rate": 3.9999042174899045e-05,
"loss": 0.1986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06743113696575165,
"step": 75,
"valid_targets_mean": 5578.7,
"valid_targets_min": 598
},
{
"epoch": 0.7843137254901961,
"grad_norm": 0.2114771866057741,
"learning_rate": 3.998826769609533e-05,
"loss": 0.1951,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.060611382126808167,
"step": 80,
"valid_targets_mean": 6215.6,
"valid_targets_min": 1322
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.1910136360390071,
"learning_rate": 3.996552792838517e-05,
"loss": 0.191,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06359056383371353,
"step": 85,
"valid_targets_mean": 5589.2,
"valid_targets_min": 298
},
{
"epoch": 0.8823529411764706,
"grad_norm": 0.19138505864440583,
"learning_rate": 3.993083648414832e-05,
"loss": 0.1949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07189453393220901,
"step": 90,
"valid_targets_mean": 5569.2,
"valid_targets_min": 751
},
{
"epoch": 0.9313725490196079,
"grad_norm": 0.1811083652569276,
"learning_rate": 3.988421413022457e-05,
"loss": 0.1702,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05381467193365097,
"step": 95,
"valid_targets_mean": 6246.1,
"valid_targets_min": 858
},
{
"epoch": 0.9803921568627451,
"grad_norm": 0.15647945413369116,
"learning_rate": 3.982568877548239e-05,
"loss": 0.1782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.051519643515348434,
"step": 100,
"valid_targets_mean": 6077.8,
"valid_targets_min": 367
},
{
"epoch": 1.0294117647058822,
"grad_norm": 0.20486438301365698,
"learning_rate": 3.975529545411226e-05,
"loss": 0.1896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05946924537420273,
"step": 105,
"valid_targets_mean": 5859.4,
"valid_targets_min": 1382
},
{
"epoch": 1.0784313725490196,
"grad_norm": 0.17500723183778927,
"learning_rate": 3.967307630465466e-05,
"loss": 0.1748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04872163012623787,
"step": 110,
"valid_targets_mean": 5285.2,
"valid_targets_min": 1213
},
{
"epoch": 1.1274509803921569,
"grad_norm": 0.17177261390298737,
"learning_rate": 3.957908054477526e-05,
"loss": 0.1658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.053148724138736725,
"step": 115,
"valid_targets_mean": 6222.8,
"valid_targets_min": 1926
},
{
"epoch": 1.1764705882352942,
"grad_norm": 0.18155686316968658,
"learning_rate": 3.9473364441802474e-05,
"loss": 0.1772,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05831097811460495,
"step": 120,
"valid_targets_mean": 5672.5,
"valid_targets_min": 1670
},
{
"epoch": 1.2254901960784315,
"grad_norm": 0.20691680495712458,
"learning_rate": 3.9355991279044965e-05,
"loss": 0.1645,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06711968034505844,
"step": 125,
"valid_targets_mean": 5509.9,
"valid_targets_min": 323
},
{
"epoch": 1.2745098039215685,
"grad_norm": 0.16953565767196252,
"learning_rate": 3.922703131790925e-05,
"loss": 0.1641,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0471009723842144,
"step": 130,
"valid_targets_mean": 5594.2,
"valid_targets_min": 2155
},
{
"epoch": 1.3235294117647058,
"grad_norm": 0.19779493712452909,
"learning_rate": 3.9086561755840146e-05,
"loss": 0.161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05489179864525795,
"step": 135,
"valid_targets_mean": 4934.2,
"valid_targets_min": 654
},
{
"epoch": 1.3725490196078431,
"grad_norm": 0.20239218498545397,
"learning_rate": 3.893466668010915e-05,
"loss": 0.1673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06282714009284973,
"step": 140,
"valid_targets_mean": 6535.7,
"valid_targets_min": 1687
},
{
"epoch": 1.4215686274509804,
"grad_norm": 0.17377123597737723,
"learning_rate": 3.8771437017478526e-05,
"loss": 0.1668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.046998970210552216,
"step": 145,
"valid_targets_mean": 5463.5,
"valid_targets_min": 390
},
{
"epoch": 1.4705882352941178,
"grad_norm": 0.17062719138426188,
"learning_rate": 3.859697047977108e-05,
"loss": 0.1635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05450977012515068,
"step": 150,
"valid_targets_mean": 5535.4,
"valid_targets_min": 804
},
{
"epoch": 1.5196078431372548,
"grad_norm": 0.19369358984546386,
"learning_rate": 3.8411371505378356e-05,
"loss": 0.1614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05806870013475418,
"step": 155,
"valid_targets_mean": 6097.0,
"valid_targets_min": 915
},
{
"epoch": 1.5686274509803921,
"grad_norm": 0.17606741825762348,
"learning_rate": 3.8214751196742224e-05,
"loss": 0.1763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.049261607229709625,
"step": 160,
"valid_targets_mean": 5752.1,
"valid_targets_min": 578
},
{
"epoch": 1.6176470588235294,
"grad_norm": 0.2117864198467642,
"learning_rate": 3.800722725384716e-05,
"loss": 0.1549,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05296482890844345,
"step": 165,
"valid_targets_mean": 5688.4,
"valid_targets_min": 751
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.19606553816396718,
"learning_rate": 3.778892390376323e-05,
"loss": 0.1689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06805002689361572,
"step": 170,
"valid_targets_mean": 5743.2,
"valid_targets_min": 2992
},
{
"epoch": 1.715686274509804,
"grad_norm": 0.2579865850635183,
"learning_rate": 3.755997182628185e-05,
"loss": 0.1671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.056646402925252914,
"step": 175,
"valid_targets_mean": 6171.2,
"valid_targets_min": 227
},
{
"epoch": 1.7647058823529411,
"grad_norm": 0.15784964961757458,
"learning_rate": 3.732050807568878e-05,
"loss": 0.1625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.047502197325229645,
"step": 180,
"valid_targets_mean": 5991.4,
"valid_targets_min": 1963
},
{
"epoch": 1.8137254901960784,
"grad_norm": 0.20767144079577776,
"learning_rate": 3.707067599872131e-05,
"loss": 0.1665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06044068560004234,
"step": 185,
"valid_targets_mean": 5098.8,
"valid_targets_min": 781
},
{
"epoch": 1.8627450980392157,
"grad_norm": 0.1769277041382716,
"learning_rate": 3.681062514875868e-05,
"loss": 0.1603,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06034180149435997,
"step": 190,
"valid_targets_mean": 5518.4,
"valid_targets_min": 297
},
{
"epoch": 1.9117647058823528,
"grad_norm": 0.1873324059846003,
"learning_rate": 3.6540511196297084e-05,
"loss": 0.1657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05551169440150261,
"step": 195,
"valid_targets_mean": 4605.0,
"valid_targets_min": 1450
},
{
"epoch": 1.9607843137254903,
"grad_norm": 0.18605503901153028,
"learning_rate": 3.6260495835762865e-05,
"loss": 0.1581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04336617887020111,
"step": 200,
"valid_targets_mean": 4599.4,
"valid_targets_min": 735
},
{
"epoch": 2.0098039215686274,
"grad_norm": 0.185744348058967,
"learning_rate": 3.597074668871972e-05,
"loss": 0.1637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.051525089889764786,
"step": 205,
"valid_targets_mean": 6674.3,
"valid_targets_min": 420
},
{
"epoch": 2.0588235294117645,
"grad_norm": 0.1852768943197571,
"learning_rate": 3.567143720352786e-05,
"loss": 0.1538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03533821552991867,
"step": 210,
"valid_targets_mean": 5553.1,
"valid_targets_min": 343
},
{
"epoch": 2.107843137254902,
"grad_norm": 0.1833870940799021,
"learning_rate": 3.536274655151502e-05,
"loss": 0.1526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04637109115719795,
"step": 215,
"valid_targets_mean": 5456.4,
"valid_targets_min": 894
},
{
"epoch": 2.156862745098039,
"grad_norm": 0.15680028488123737,
"learning_rate": 3.504485951972181e-05,
"loss": 0.1472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03762596845626831,
"step": 220,
"valid_targets_mean": 5934.5,
"valid_targets_min": 1149
},
{
"epoch": 2.2058823529411766,
"grad_norm": 0.17181083479283146,
"learning_rate": 3.4717966400285215e-05,
"loss": 0.1542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.039286620914936066,
"step": 225,
"valid_targets_mean": 6930.2,
"valid_targets_min": 1228
},
{
"epoch": 2.2549019607843137,
"grad_norm": 0.19508797620673862,
"learning_rate": 3.4382262876526845e-05,
"loss": 0.1564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06591647118330002,
"step": 230,
"valid_targets_mean": 6494.8,
"valid_targets_min": 2458
},
{
"epoch": 2.303921568627451,
"grad_norm": 0.19945856709801285,
"learning_rate": 3.403794990581377e-05,
"loss": 0.16,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05911063775420189,
"step": 235,
"valid_targets_mean": 5858.9,
"valid_targets_min": 1700
},
{
"epoch": 2.3529411764705883,
"grad_norm": 0.20200795155761367,
"learning_rate": 3.368523359926234e-05,
"loss": 0.1492,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05478104576468468,
"step": 240,
"valid_targets_mean": 5153.1,
"valid_targets_min": 2065
},
{
"epoch": 2.4019607843137254,
"grad_norm": 0.19519853478938234,
"learning_rate": 3.332432509835687e-05,
"loss": 0.1549,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04512668773531914,
"step": 245,
"valid_targets_mean": 5347.1,
"valid_targets_min": 2068
},
{
"epoch": 2.450980392156863,
"grad_norm": 0.18550082917348132,
"learning_rate": 3.2955440448556986e-05,
"loss": 0.1548,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05476547032594681,
"step": 250,
"valid_targets_mean": 5342.8,
"valid_targets_min": 527
},
{
"epoch": 2.5,
"grad_norm": 0.17389768733214,
"learning_rate": 3.257880046996954e-05,
"loss": 0.1482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.044103942811489105,
"step": 255,
"valid_targets_mean": 5376.1,
"valid_targets_min": 188
},
{
"epoch": 2.549019607843137,
"grad_norm": 0.18960557879045112,
"learning_rate": 3.219463062516218e-05,
"loss": 0.1472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.044259026646614075,
"step": 260,
"valid_targets_mean": 6247.7,
"valid_targets_min": 120
},
{
"epoch": 2.5980392156862746,
"grad_norm": 0.18057320660204854,
"learning_rate": 3.180316088419794e-05,
"loss": 0.1504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04895833134651184,
"step": 265,
"valid_targets_mean": 5481.7,
"valid_targets_min": 682
},
{
"epoch": 2.6470588235294117,
"grad_norm": 0.16850152487348163,
"learning_rate": 3.14046255869716e-05,
"loss": 0.153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05481772869825363,
"step": 270,
"valid_targets_mean": 5729.1,
"valid_targets_min": 1495
},
{
"epoch": 2.696078431372549,
"grad_norm": 0.1972977593451679,
"learning_rate": 3.099926330293017e-05,
"loss": 0.1654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0604429729282856,
"step": 275,
"valid_targets_mean": 4838.8,
"valid_targets_min": 1164
},
{
"epoch": 2.7450980392156863,
"grad_norm": 0.1779532546747506,
"learning_rate": 3.058731668826147e-05,
"loss": 0.1611,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0506039559841156,
"step": 280,
"valid_targets_mean": 5173.8,
"valid_targets_min": 966
},
{
"epoch": 2.7941176470588234,
"grad_norm": 0.16859117358751705,
"learning_rate": 3.0169032340636363e-05,
"loss": 0.1562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06492990255355835,
"step": 285,
"valid_targets_mean": 6798.8,
"valid_targets_min": 2298
},
{
"epoch": 2.843137254901961,
"grad_norm": 0.17047341359032425,
"learning_rate": 2.9744660651591544e-05,
"loss": 0.1611,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04040013998746872,
"step": 290,
"valid_targets_mean": 5656.5,
"valid_targets_min": 1994
},
{
"epoch": 2.892156862745098,
"grad_norm": 0.1691917059677243,
"learning_rate": 2.9314455656641275e-05,
"loss": 0.1442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05659858137369156,
"step": 295,
"valid_targets_mean": 6168.2,
"valid_targets_min": 255
},
{
"epoch": 2.9411764705882355,
"grad_norm": 0.18686162719495658,
"learning_rate": 2.8878674883207726e-05,
"loss": 0.1525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05041201412677765,
"step": 300,
"valid_targets_mean": 6669.2,
"valid_targets_min": 1463
},
{
"epoch": 2.9901960784313726,
"grad_norm": 0.17069829336335143,
"learning_rate": 2.8437579196461072e-05,
"loss": 0.1396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.044972144067287445,
"step": 305,
"valid_targets_mean": 6637.5,
"valid_targets_min": 969
},
{
"epoch": 3.0392156862745097,
"grad_norm": 0.15999237456048723,
"learning_rate": 2.799143264316152e-05,
"loss": 0.148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0373958945274353,
"step": 310,
"valid_targets_mean": 5989.3,
"valid_targets_min": 337
},
{
"epoch": 3.088235294117647,
"grad_norm": 0.1592160126728059,
"learning_rate": 2.7540502293596802e-05,
"loss": 0.1396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04034284129738808,
"step": 315,
"valid_targets_mean": 5127.5,
"valid_targets_min": 695
},
{
"epoch": 3.1372549019607843,
"grad_norm": 0.1769980777310915,
"learning_rate": 2.708505808170973e-05,
"loss": 0.1537,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.044640615582466125,
"step": 320,
"valid_targets_mean": 5812.9,
"valid_targets_min": 277
},
{
"epoch": 3.186274509803922,
"grad_norm": 0.15257116830142609,
"learning_rate": 2.662537264351152e-05,
"loss": 0.1404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05242038518190384,
"step": 325,
"valid_targets_mean": 5841.9,
"valid_targets_min": 983
},
{
"epoch": 3.235294117647059,
"grad_norm": 0.16404975625042978,
"learning_rate": 2.6161721153877658e-05,
"loss": 0.1519,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048479050397872925,
"step": 330,
"valid_targets_mean": 5749.8,
"valid_targets_min": 349
},
{
"epoch": 3.284313725490196,
"grad_norm": 0.16196200510111014,
"learning_rate": 2.5694381161823883e-05,
"loss": 0.1393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05674157291650772,
"step": 335,
"valid_targets_mean": 6188.4,
"valid_targets_min": 2150
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.1715568446927489,
"learning_rate": 2.522363242436102e-05,
"loss": 0.1461,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0405210517346859,
"step": 340,
"valid_targets_mean": 5543.5,
"valid_targets_min": 220
},
{
"epoch": 3.3823529411764706,
"grad_norm": 0.18625399468743448,
"learning_rate": 2.47497567390281e-05,
"loss": 0.1581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05725814402103424,
"step": 345,
"valid_targets_mean": 6666.8,
"valid_targets_min": 1783
},
{
"epoch": 3.431372549019608,
"grad_norm": 0.16710741980403304,
"learning_rate": 2.4273037775203924e-05,
"loss": 0.1434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.047255054116249084,
"step": 350,
"valid_targets_mean": 5879.6,
"valid_targets_min": 2429
},
{
"epoch": 3.480392156862745,
"grad_norm": 0.18714960345056877,
"learning_rate": 2.3793760904298154e-05,
"loss": 0.1444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04799136519432068,
"step": 355,
"valid_targets_mean": 5505.2,
"valid_targets_min": 178
},
{
"epoch": 3.5294117647058822,
"grad_norm": 0.195485882837898,
"learning_rate": 2.3312213028923572e-05,
"loss": 0.1458,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05502048134803772,
"step": 360,
"valid_targets_mean": 6203.2,
"valid_targets_min": 915
},
{
"epoch": 3.5784313725490198,
"grad_norm": 0.1977460248140161,
"learning_rate": 2.2828682411151634e-05,
"loss": 0.142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04789289832115173,
"step": 365,
"valid_targets_mean": 5283.4,
"valid_targets_min": 1751
},
{
"epoch": 3.627450980392157,
"grad_norm": 0.17712548108134565,
"learning_rate": 2.2343458499954342e-05,
"loss": 0.1438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05372612178325653,
"step": 370,
"valid_targets_mean": 5116.2,
"valid_targets_min": 745
},
{
"epoch": 3.6764705882352944,
"grad_norm": 0.17787145700828857,
"learning_rate": 2.1856831757935563e-05,
"loss": 0.1459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.052830249071121216,
"step": 375,
"valid_targets_mean": 6221.8,
"valid_targets_min": 489
},
{
"epoch": 3.7254901960784315,
"grad_norm": 0.1403375945900038,
"learning_rate": 2.136909348745558e-05,
"loss": 0.1445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05467274412512779,
"step": 380,
"valid_targets_mean": 7338.7,
"valid_targets_min": 267
},
{
"epoch": 3.7745098039215685,
"grad_norm": 0.17890476249652149,
"learning_rate": 2.0880535656252955e-05,
"loss": 0.1559,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04823530465364456,
"step": 385,
"valid_targets_mean": 5682.3,
"valid_targets_min": 1113
},
{
"epoch": 3.8235294117647056,
"grad_norm": 0.1675345935224821,
"learning_rate": 2.0391450722668096e-05,
"loss": 0.1422,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04629891738295555,
"step": 390,
"valid_targets_mean": 5964.7,
"valid_targets_min": 906
},
{
"epoch": 3.872549019607843,
"grad_norm": 0.16849306980232315,
"learning_rate": 1.9902131460573106e-05,
"loss": 0.1412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.045284684747457504,
"step": 395,
"valid_targets_mean": 4922.6,
"valid_targets_min": 233
},
{
"epoch": 3.9215686274509802,
"grad_norm": 0.1684953006314063,
"learning_rate": 1.941287078411279e-05,
"loss": 0.1419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.053950127214193344,
"step": 400,
"valid_targets_mean": 6029.8,
"valid_targets_min": 1074
},
{
"epoch": 3.9705882352941178,
"grad_norm": 0.17222377503381056,
"learning_rate": 1.8923961572361688e-05,
"loss": 0.1448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.046407949179410934,
"step": 405,
"valid_targets_mean": 6019.1,
"valid_targets_min": 293
},
{
"epoch": 4.019607843137255,
"grad_norm": 0.15984187499464808,
"learning_rate": 1.8435696494002076e-05,
"loss": 0.1359,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04338710010051727,
"step": 410,
"valid_targets_mean": 5504.8,
"valid_targets_min": 321
},
{
"epoch": 4.068627450980392,
"grad_norm": 0.16765151368961928,
"learning_rate": 1.7948367832127934e-05,
"loss": 0.148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.051340095698833466,
"step": 415,
"valid_targets_mean": 6667.2,
"valid_targets_min": 1060
},
{
"epoch": 4.117647058823529,
"grad_norm": 0.15792491122980973,
"learning_rate": 1.7462267309279722e-05,
"loss": 0.1436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05365137755870819,
"step": 420,
"valid_targets_mean": 6270.2,
"valid_targets_min": 1126
},
{
"epoch": 4.166666666666667,
"grad_norm": 0.1692968124966608,
"learning_rate": 1.6977685912814723e-05,
"loss": 0.1386,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04327564314007759,
"step": 425,
"valid_targets_mean": 4881.8,
"valid_targets_min": 280
},
{
"epoch": 4.215686274509804,
"grad_norm": 0.16802626120360126,
"learning_rate": 1.649491372071745e-05,
"loss": 0.1348,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04639950394630432,
"step": 430,
"valid_targets_mean": 6071.3,
"valid_targets_min": 1608
},
{
"epoch": 4.264705882352941,
"grad_norm": 0.1739251489205305,
"learning_rate": 1.601423972795448e-05,
"loss": 0.1466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04997720196843147,
"step": 435,
"valid_targets_mean": 5071.2,
"valid_targets_min": 1901
},
{
"epoch": 4.313725490196078,
"grad_norm": 0.16846274854669305,
"learning_rate": 1.5535951673477493e-05,
"loss": 0.1443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.036386433988809586,
"step": 440,
"valid_targets_mean": 6193.9,
"valid_targets_min": 804
},
{
"epoch": 4.362745098039215,
"grad_norm": 0.17845219785128927,
"learning_rate": 1.5060335867978322e-05,
"loss": 0.1491,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.049667488783597946,
"step": 445,
"valid_targets_mean": 5358.5,
"valid_targets_min": 906
},
{
"epoch": 4.411764705882353,
"grad_norm": 0.16638712672136335,
"learning_rate": 1.4587677022498845e-05,
"loss": 0.1492,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04850779101252556,
"step": 450,
"valid_targets_mean": 6082.3,
"valid_targets_min": 1213
},
{
"epoch": 4.46078431372549,
"grad_norm": 0.1509181659878023,
"learning_rate": 1.4118258077998563e-05,
"loss": 0.1347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.037120066583156586,
"step": 455,
"valid_targets_mean": 7179.8,
"valid_targets_min": 1754
},
{
"epoch": 4.509803921568627,
"grad_norm": 0.17919038805552429,
"learning_rate": 1.3652360035981657e-05,
"loss": 0.1352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04360215738415718,
"step": 460,
"valid_targets_mean": 5704.8,
"valid_targets_min": 1074
},
{
"epoch": 4.5588235294117645,
"grad_norm": 0.17444385735623882,
"learning_rate": 1.3190261790285202e-05,
"loss": 0.1537,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.046840980648994446,
"step": 465,
"valid_targets_mean": 6333.6,
"valid_targets_min": 990
},
{
"epoch": 4.607843137254902,
"grad_norm": 0.17400376107735924,
"learning_rate": 1.2732239960128854e-05,
"loss": 0.1403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04668301343917847,
"step": 470,
"valid_targets_mean": 6577.3,
"valid_targets_min": 804
},
{
"epoch": 4.6568627450980395,
"grad_norm": 0.1642690410502086,
"learning_rate": 1.227856872452637e-05,
"loss": 0.1363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04167339950799942,
"step": 475,
"valid_targets_mean": 6613.6,
"valid_targets_min": 1274
},
{
"epoch": 4.705882352941177,
"grad_norm": 0.1665608183925791,
"learning_rate": 1.1829519658157706e-05,
"loss": 0.1349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04697566106915474,
"step": 480,
"valid_targets_mean": 5399.3,
"valid_targets_min": 343
},
{
"epoch": 4.754901960784314,
"grad_norm": 0.15109027745291384,
"learning_rate": 1.1385361568800205e-05,
"loss": 0.1409,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04550229385495186,
"step": 485,
"valid_targets_mean": 7463.3,
"valid_targets_min": 2613
},
{
"epoch": 4.803921568627451,
"grad_norm": 0.16283398624058726,
"learning_rate": 1.0946360336416041e-05,
"loss": 0.145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04752221703529358,
"step": 490,
"valid_targets_mean": 5754.3,
"valid_targets_min": 1679
},
{
"epoch": 4.852941176470588,
"grad_norm": 0.15690476742993395,
"learning_rate": 1.0512778753992384e-05,
"loss": 0.1391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050478532910346985,
"step": 495,
"valid_targets_mean": 6343.4,
"valid_targets_min": 1164
},
{
"epoch": 4.901960784313726,
"grad_norm": 0.1547493986508796,
"learning_rate": 1.0084876370229346e-05,
"loss": 0.1361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.037191685289144516,
"step": 500,
"valid_targets_mean": 5394.4,
"valid_targets_min": 2151
},
{
"epoch": 4.950980392156863,
"grad_norm": 0.1546433674602096,
"learning_rate": 9.662909334170119e-06,
"loss": 0.1398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0633089691400528,
"step": 505,
"valid_targets_mean": 6511.4,
"valid_targets_min": 2728
},
{
"epoch": 5.0,
"grad_norm": 0.14864264452615442,
"learning_rate": 9.247130241866162e-06,
"loss": 0.1293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0435556136071682,
"step": 510,
"valid_targets_mean": 6029.2,
"valid_targets_min": 826
},
{
"epoch": 5.049019607843137,
"grad_norm": 0.17351132595970922,
"learning_rate": 8.837787985169248e-06,
"loss": 0.1389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04400845244526863,
"step": 515,
"valid_targets_mean": 4938.3,
"valid_targets_min": 578
},
{
"epoch": 5.098039215686274,
"grad_norm": 0.16525532968859072,
"learning_rate": 8.435127602740931e-06,
"loss": 0.136,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04021601378917694,
"step": 520,
"valid_targets_mean": 4886.9,
"valid_targets_min": 245
},
{
"epoch": 5.147058823529412,
"grad_norm": 0.17565072755854125,
"learning_rate": 8.03939013336857e-06,
"loss": 0.1486,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0686882734298706,
"step": 525,
"valid_targets_mean": 6319.9,
"valid_targets_min": 2372
},
{
"epoch": 5.196078431372549,
"grad_norm": 0.16546346883164598,
"learning_rate": 7.650812471675752e-06,
"loss": 0.1418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048375021666288376,
"step": 530,
"valid_targets_mean": 5439.3,
"valid_targets_min": 401
},
{
"epoch": 5.245098039215686,
"grad_norm": 0.16730821511271163,
"learning_rate": 7.269627226313507e-06,
"loss": 0.1373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050091587007045746,
"step": 535,
"valid_targets_mean": 5765.1,
"valid_targets_min": 942
},
{
"epoch": 5.294117647058823,
"grad_norm": 0.15886005158900465,
"learning_rate": 6.896062580717056e-06,
"loss": 0.1333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.052998557686805725,
"step": 540,
"valid_targets_mean": 5913.3,
"valid_targets_min": 1181
},
{
"epoch": 5.3431372549019605,
"grad_norm": 0.18046601157388797,
"learning_rate": 6.5303421565117595e-06,
"loss": 0.1359,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04307221621274948,
"step": 545,
"valid_targets_mean": 4173.9,
"valid_targets_min": 651
},
{
"epoch": 5.392156862745098,
"grad_norm": 0.14559092733051696,
"learning_rate": 6.172684879649613e-06,
"loss": 0.1284,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04021957516670227,
"step": 550,
"valid_targets_mean": 5618.3,
"valid_targets_min": 386
},
{
"epoch": 5.4411764705882355,
"grad_norm": 0.15063235370677366,
"learning_rate": 5.82330484935685e-06,
"loss": 0.1361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05188259109854698,
"step": 555,
"valid_targets_mean": 6229.9,
"valid_targets_min": 2368
},
{
"epoch": 5.490196078431373,
"grad_norm": 0.15416766663654213,
"learning_rate": 5.482411209970742e-06,
"loss": 0.1324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04332681745290756,
"step": 560,
"valid_targets_mean": 5592.3,
"valid_targets_min": 420
},
{
"epoch": 5.53921568627451,
"grad_norm": 0.16407271932187248,
"learning_rate": 5.15020802574256e-06,
"loss": 0.1401,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.055699240416288376,
"step": 565,
"valid_targets_mean": 5802.0,
"valid_targets_min": 776
},
{
"epoch": 5.588235294117647,
"grad_norm": 0.16225757678922273,
"learning_rate": 4.8268941586815275e-06,
"loss": 0.1384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05296599119901657,
"step": 570,
"valid_targets_mean": 5673.7,
"valid_targets_min": 1561
},
{
"epoch": 5.637254901960784,
"grad_norm": 0.1783066013238609,
"learning_rate": 4.512663149512915e-06,
"loss": 0.1404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05072878301143646,
"step": 575,
"valid_targets_mean": 4909.6,
"valid_targets_min": 270
},
{
"epoch": 5.686274509803922,
"grad_norm": 0.1635156014972583,
"learning_rate": 4.207703101821547e-06,
"loss": 0.1452,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04394640773534775,
"step": 580,
"valid_targets_mean": 6057.6,
"valid_targets_min": 724
},
{
"epoch": 5.735294117647059,
"grad_norm": 0.1530197110100291,
"learning_rate": 3.912196569450062e-06,
"loss": 0.1365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04203708469867706,
"step": 585,
"valid_targets_mean": 6321.2,
"valid_targets_min": 332
},
{
"epoch": 5.784313725490196,
"grad_norm": 0.15884170707784306,
"learning_rate": 3.626320447219325e-06,
"loss": 0.1422,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04695093259215355,
"step": 590,
"valid_targets_mean": 5222.4,
"valid_targets_min": 825
},
{
"epoch": 5.833333333333333,
"grad_norm": 0.1630943874822207,
"learning_rate": 3.350245865036439e-06,
"loss": 0.1399,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.043691154569387436,
"step": 595,
"valid_targets_mean": 6610.5,
"valid_targets_min": 924
},
{
"epoch": 5.882352941176471,
"grad_norm": 0.14034024802523662,
"learning_rate": 3.0841380854536986e-06,
"loss": 0.1374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03624898940324783,
"step": 600,
"valid_targets_mean": 7330.9,
"valid_targets_min": 2730
},
{
"epoch": 5.931372549019608,
"grad_norm": 0.14334753955970883,
"learning_rate": 2.828156404739879e-06,
"loss": 0.1353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.041707608848810196,
"step": 605,
"valid_targets_mean": 6577.7,
"valid_targets_min": 977
},
{
"epoch": 5.980392156862745,
"grad_norm": 0.14232337944292772,
"learning_rate": 2.5824540575229475e-06,
"loss": 0.1297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.033248208463191986,
"step": 610,
"valid_targets_mean": 5936.8,
"valid_targets_min": 1856
},
{
"epoch": 6.029411764705882,
"grad_norm": 0.1528237861523893,
"learning_rate": 2.3471781250614932e-06,
"loss": 0.1409,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04537317529320717,
"step": 615,
"valid_targets_mean": 5408.1,
"valid_targets_min": 2357
},
{
"epoch": 6.078431372549019,
"grad_norm": 0.15536078711039872,
"learning_rate": 2.122469447199529e-06,
"loss": 0.1444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04748551920056343,
"step": 620,
"valid_targets_mean": 6326.7,
"valid_targets_min": 1518
},
{
"epoch": 6.127450980392156,
"grad_norm": 0.15104775706307438,
"learning_rate": 1.908462538057607e-06,
"loss": 0.1356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.044153109192848206,
"step": 625,
"valid_targets_mean": 5396.5,
"valid_targets_min": 1338
},
{
"epoch": 6.176470588235294,
"grad_norm": 0.1532713977989426,
"learning_rate": 1.7052855055105477e-06,
"loss": 0.1403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04733777046203613,
"step": 630,
"valid_targets_mean": 6734.2,
"valid_targets_min": 2804
},
{
"epoch": 6.2254901960784315,
"grad_norm": 0.15270643758494024,
"learning_rate": 1.5130599745000663e-06,
"loss": 0.135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.040537070482969284,
"step": 635,
"valid_targets_mean": 5465.8,
"valid_targets_min": 1202
},
{
"epoch": 6.2745098039215685,
"grad_norm": 0.1478234433976078,
"learning_rate": 1.331901014228192e-06,
"loss": 0.129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05076032876968384,
"step": 640,
"valid_targets_mean": 5307.6,
"valid_targets_min": 974
},
{
"epoch": 6.323529411764706,
"grad_norm": 0.14756205953677878,
"learning_rate": 1.161917069275047e-06,
"loss": 0.1436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04347331076860428,
"step": 645,
"valid_targets_mean": 5066.8,
"valid_targets_min": 727
},
{
"epoch": 6.372549019607844,
"grad_norm": 0.15417081876853794,
"learning_rate": 1.0032098946822244e-06,
"loss": 0.1402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.041828703135252,
"step": 650,
"valid_targets_mean": 6169.8,
"valid_targets_min": 1408
},
{
"epoch": 6.421568627450981,
"grad_norm": 0.16730842517730218,
"learning_rate": 8.558744950406361e-07,
"loss": 0.1344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.052832819521427155,
"step": 655,
"valid_targets_mean": 6030.4,
"valid_targets_min": 916
},
{
"epoch": 6.470588235294118,
"grad_norm": 0.14840505399338527,
"learning_rate": 7.199990676192836e-07,
"loss": 0.133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.042389050126075745,
"step": 660,
"valid_targets_mean": 6102.2,
"valid_targets_min": 1060
},
{
"epoch": 6.519607843137255,
"grad_norm": 0.1464931099328798,
"learning_rate": 5.956649495689992e-07,
"loss": 0.1328,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.052474960684776306,
"step": 665,
"valid_targets_mean": 5826.2,
"valid_targets_min": 1276
},
{
"epoch": 6.568627450980392,
"grad_norm": 0.14130389260204995,
"learning_rate": 4.829465692327429e-07,
"loss": 0.1367,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04665312170982361,
"step": 670,
"valid_targets_mean": 6207.3,
"valid_targets_min": 1636
},
{
"epoch": 6.617647058823529,
"grad_norm": 0.14337796217921622,
"learning_rate": 3.819114015916614e-07,
"loss": 0.1474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05262928083539009,
"step": 675,
"valid_targets_mean": 6180.5,
"valid_targets_min": 1432
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.14706066206331941,
"learning_rate": 2.9261992787347873e-07,
"loss": 0.1413,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04194498062133789,
"step": 680,
"valid_targets_mean": 6359.8,
"valid_targets_min": 1625
},
{
"epoch": 6.715686274509804,
"grad_norm": 0.14057051185988517,
"learning_rate": 2.151255993475254e-07,
"loss": 0.1398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0460154190659523,
"step": 685,
"valid_targets_mean": 6352.1,
"valid_targets_min": 1706
},
{
"epoch": 6.764705882352941,
"grad_norm": 0.12402153861336833,
"learning_rate": 1.4947480532794489e-07,
"loss": 0.1311,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03990158438682556,
"step": 690,
"valid_targets_mean": 7494.4,
"valid_targets_min": 270
},
{
"epoch": 6.813725490196078,
"grad_norm": 0.1599205562983089,
"learning_rate": 9.570684540434638e-08,
"loss": 0.133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05221497267484665,
"step": 695,
"valid_targets_mean": 5782.2,
"valid_targets_min": 977
},
{
"epoch": 6.862745098039216,
"grad_norm": 0.1479123937146945,
"learning_rate": 5.3853905916443347e-08,
"loss": 0.1307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04318413883447647,
"step": 700,
"valid_targets_mean": 5034.2,
"valid_targets_min": 1045
},
{
"epoch": 6.911764705882353,
"grad_norm": 0.1465353782553542,
"learning_rate": 2.3941040686816796e-08,
"loss": 0.1387,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.040560588240623474,
"step": 705,
"valid_targets_mean": 6557.7,
"valid_targets_min": 2578
},
{
"epoch": 6.96078431372549,
"grad_norm": 0.13650641166527105,
"learning_rate": 5.986156023303214e-09,
"loss": 0.1264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.043502289801836014,
"step": 710,
"valid_targets_mean": 6664.9,
"valid_targets_min": 1868
},
{
"epoch": 7.0,
"step": 714,
"total_flos": 2.800887118061109e+18,
"train_loss": 0.0,
"train_runtime": 3.2172,
"train_samples_per_second": 21305.35,
"train_steps_per_second": 221.931
}
],
"logging_steps": 5,
"max_steps": 714,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.800887118061109e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}