Files
mix760_3step_bc760/trainer_state.json

1511 lines
42 KiB
JSON
Raw Permalink Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 665,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05263157894736842,
"grad_norm": 12.553807503106103,
"learning_rate": 2.3880597014925373e-06,
"loss": 0.6006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11287848651409149,
"step": 5,
"valid_targets_mean": 673.5,
"valid_targets_min": 369
},
{
"epoch": 0.10526315789473684,
"grad_norm": 7.035434903432055,
"learning_rate": 5.37313432835821e-06,
"loss": 0.6424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10525691509246826,
"step": 10,
"valid_targets_mean": 3003.0,
"valid_targets_min": 529
},
{
"epoch": 0.15789473684210525,
"grad_norm": 5.198775023562205,
"learning_rate": 8.35820895522388e-06,
"loss": 0.4501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0761948674917221,
"step": 15,
"valid_targets_mean": 1735.2,
"valid_targets_min": 610
},
{
"epoch": 0.21052631578947367,
"grad_norm": 2.0741815557089174,
"learning_rate": 1.1343283582089553e-05,
"loss": 0.3755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10525241494178772,
"step": 20,
"valid_targets_mean": 1774.2,
"valid_targets_min": 632
},
{
"epoch": 0.2631578947368421,
"grad_norm": 1.850608693504522,
"learning_rate": 1.4328358208955224e-05,
"loss": 0.3576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2170872539281845,
"step": 25,
"valid_targets_mean": 1434.5,
"valid_targets_min": 537
},
{
"epoch": 0.3157894736842105,
"grad_norm": 0.7064366027469681,
"learning_rate": 1.7313432835820894e-05,
"loss": 0.2887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04902719706296921,
"step": 30,
"valid_targets_mean": 1492.2,
"valid_targets_min": 337
},
{
"epoch": 0.3684210526315789,
"grad_norm": 0.8492931103555103,
"learning_rate": 2.029850746268657e-05,
"loss": 0.3099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11869967728853226,
"step": 35,
"valid_targets_mean": 2878.5,
"valid_targets_min": 574
},
{
"epoch": 0.42105263157894735,
"grad_norm": 0.9128923904158283,
"learning_rate": 2.3283582089552242e-05,
"loss": 0.2594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06856397539377213,
"step": 40,
"valid_targets_mean": 2244.8,
"valid_targets_min": 478
},
{
"epoch": 0.47368421052631576,
"grad_norm": 0.7721603884282439,
"learning_rate": 2.6268656716417913e-05,
"loss": 0.2576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07116584479808807,
"step": 45,
"valid_targets_mean": 1934.0,
"valid_targets_min": 567
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.815787876318566,
"learning_rate": 2.9253731343283584e-05,
"loss": 0.291,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13275116682052612,
"step": 50,
"valid_targets_mean": 2430.8,
"valid_targets_min": 1481
},
{
"epoch": 0.5789473684210527,
"grad_norm": 0.7820305833709643,
"learning_rate": 3.2238805970149255e-05,
"loss": 0.2378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06999962031841278,
"step": 55,
"valid_targets_mean": 1782.0,
"valid_targets_min": 459
},
{
"epoch": 0.631578947368421,
"grad_norm": 0.921532705829729,
"learning_rate": 3.522388059701493e-05,
"loss": 0.2227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04323829710483551,
"step": 60,
"valid_targets_mean": 758.5,
"valid_targets_min": 591
},
{
"epoch": 0.6842105263157895,
"grad_norm": 0.7092700459991306,
"learning_rate": 3.8208955223880596e-05,
"loss": 0.2336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.037662893533706665,
"step": 65,
"valid_targets_mean": 730.0,
"valid_targets_min": 436
},
{
"epoch": 0.7368421052631579,
"grad_norm": 0.7461432563225519,
"learning_rate": 3.9998896039909675e-05,
"loss": 0.1988,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02792029082775116,
"step": 70,
"valid_targets_mean": 497.0,
"valid_targets_min": 461
},
{
"epoch": 0.7894736842105263,
"grad_norm": 0.7023900039706257,
"learning_rate": 3.998647788848384e-05,
"loss": 0.2185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03114963322877884,
"step": 75,
"valid_targets_mean": 1628.8,
"valid_targets_min": 382
},
{
"epoch": 0.8421052631578947,
"grad_norm": 0.7055640361791583,
"learning_rate": 3.996027023188427e-05,
"loss": 0.2002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026642896234989166,
"step": 80,
"valid_targets_mean": 570.2,
"valid_targets_min": 453
},
{
"epoch": 0.8947368421052632,
"grad_norm": 0.8834619078292679,
"learning_rate": 3.9920291151866977e-05,
"loss": 0.2278,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08590055257081985,
"step": 85,
"valid_targets_mean": 2245.0,
"valid_targets_min": 564
},
{
"epoch": 0.9473684210526315,
"grad_norm": 0.8048346765640612,
"learning_rate": 3.986656823166766e-05,
"loss": 0.2334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08790746331214905,
"step": 90,
"valid_targets_mean": 3408.2,
"valid_targets_min": 1208
},
{
"epoch": 1.0,
"grad_norm": 0.7721245809969578,
"learning_rate": 3.979913853697095e-05,
"loss": 0.1963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05376936122775078,
"step": 95,
"valid_targets_mean": 1591.8,
"valid_targets_min": 804
},
{
"epoch": 1.0526315789473684,
"grad_norm": 0.6211663045932956,
"learning_rate": 3.9718048590337186e-05,
"loss": 0.1857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0645800530910492,
"step": 100,
"valid_targets_mean": 2262.0,
"valid_targets_min": 1426
},
{
"epoch": 1.1052631578947367,
"grad_norm": 0.7785104666209682,
"learning_rate": 3.962335433910463e-05,
"loss": 0.1824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025534367188811302,
"step": 105,
"valid_targets_mean": 679.8,
"valid_targets_min": 514
},
{
"epoch": 1.1578947368421053,
"grad_norm": 0.8004929865701785,
"learning_rate": 3.9515121116788985e-05,
"loss": 0.205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10348161309957504,
"step": 110,
"valid_targets_mean": 2548.2,
"valid_targets_min": 1644
},
{
"epoch": 1.2105263157894737,
"grad_norm": 0.8442132787033475,
"learning_rate": 3.939342359800714e-05,
"loss": 0.1878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.046689361333847046,
"step": 115,
"valid_targets_mean": 1556.2,
"valid_targets_min": 361
},
{
"epoch": 1.263157894736842,
"grad_norm": 0.8100669921933641,
"learning_rate": 3.925834574695599e-05,
"loss": 0.1719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0551832839846611,
"step": 120,
"valid_targets_mean": 2410.0,
"valid_targets_min": 523
},
{
"epoch": 1.3157894736842106,
"grad_norm": 0.7871886106834695,
"learning_rate": 3.910998075948207e-05,
"loss": 0.1911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05720491334795952,
"step": 125,
"valid_targets_mean": 2352.5,
"valid_targets_min": 532
},
{
"epoch": 1.368421052631579,
"grad_norm": 0.7323699198683509,
"learning_rate": 3.8948430998781824e-05,
"loss": 0.1753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0170910581946373,
"step": 130,
"valid_targets_mean": 1404.5,
"valid_targets_min": 575
},
{
"epoch": 1.4210526315789473,
"grad_norm": 1.0920789638236659,
"learning_rate": 3.8773807924776976e-05,
"loss": 0.1912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06749822944402695,
"step": 135,
"valid_targets_mean": 929.8,
"valid_targets_min": 369
},
{
"epoch": 1.4736842105263157,
"grad_norm": 0.7229550949497463,
"learning_rate": 3.8586232017213675e-05,
"loss": 0.1831,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03690353408455849,
"step": 140,
"valid_targets_mean": 1208.2,
"valid_targets_min": 861
},
{
"epoch": 1.526315789473684,
"grad_norm": 0.7657486783018452,
"learning_rate": 3.83858326925385e-05,
"loss": 0.1812,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.037817131727933884,
"step": 145,
"valid_targets_mean": 1573.8,
"valid_targets_min": 524
},
{
"epoch": 1.5789473684210527,
"grad_norm": 0.7208446240323184,
"learning_rate": 3.8172748214608624e-05,
"loss": 0.1665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06162188947200775,
"step": 150,
"valid_targets_mean": 1708.5,
"valid_targets_min": 480
},
{
"epoch": 1.631578947368421,
"grad_norm": 0.7918543247788804,
"learning_rate": 3.7947125599297856e-05,
"loss": 0.1776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04460986703634262,
"step": 155,
"valid_targets_mean": 2118.2,
"valid_targets_min": 451
},
{
"epoch": 1.6842105263157894,
"grad_norm": 0.7001163120437345,
"learning_rate": 3.7709120513064196e-05,
"loss": 0.1856,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03856482356786728,
"step": 160,
"valid_targets_mean": 1108.5,
"valid_targets_min": 478
},
{
"epoch": 1.736842105263158,
"grad_norm": 0.6906214593751792,
"learning_rate": 3.745889716554912e-05,
"loss": 0.1873,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.040963269770145416,
"step": 165,
"valid_targets_mean": 1622.5,
"valid_targets_min": 717
},
{
"epoch": 1.7894736842105263,
"grad_norm": 0.798273215978815,
"learning_rate": 3.7196628196282415e-05,
"loss": 0.1787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.020104659721255302,
"step": 170,
"valid_targets_mean": 551.8,
"valid_targets_min": 482
},
{
"epoch": 1.8421052631578947,
"grad_norm": 0.6391796891465947,
"learning_rate": 3.692249455557103e-05,
"loss": 0.1619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025837548077106476,
"step": 175,
"valid_targets_mean": 3984.0,
"valid_targets_min": 252
},
{
"epoch": 1.8947368421052633,
"grad_norm": 0.5174143688291576,
"learning_rate": 3.6636685379653875e-05,
"loss": 0.1677,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023429114371538162,
"step": 180,
"valid_targets_mean": 763.5,
"valid_targets_min": 520
},
{
"epoch": 1.9473684210526314,
"grad_norm": 0.8576835506620597,
"learning_rate": 3.633939786020884e-05,
"loss": 0.1675,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03479863703250885,
"step": 185,
"valid_targets_mean": 1539.5,
"valid_targets_min": 452
},
{
"epoch": 2.0,
"grad_norm": 0.7568710245152749,
"learning_rate": 3.603083710830205e-05,
"loss": 0.1779,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06252141296863556,
"step": 190,
"valid_targets_mean": 2105.8,
"valid_targets_min": 477
},
{
"epoch": 2.0526315789473686,
"grad_norm": 0.6843664313982926,
"learning_rate": 3.5711216012873114e-05,
"loss": 0.1443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05131068080663681,
"step": 195,
"valid_targets_mean": 3221.2,
"valid_targets_min": 766
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.6549037149587634,
"learning_rate": 3.538075509385427e-05,
"loss": 0.1406,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.035776007920503616,
"step": 200,
"valid_targets_mean": 2162.2,
"valid_targets_min": 564
},
{
"epoch": 2.1578947368421053,
"grad_norm": 0.6798810404078721,
"learning_rate": 3.503968235002437e-05,
"loss": 0.136,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01746884360909462,
"step": 205,
"valid_targets_mean": 1817.8,
"valid_targets_min": 482
},
{
"epoch": 2.2105263157894735,
"grad_norm": 0.7040977825128967,
"learning_rate": 3.468823310170309e-05,
"loss": 0.1594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03740096092224121,
"step": 210,
"valid_targets_mean": 1947.5,
"valid_targets_min": 567
},
{
"epoch": 2.263157894736842,
"grad_norm": 0.7823204339158937,
"learning_rate": 3.4326649828393565e-05,
"loss": 0.1399,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.033209312707185745,
"step": 215,
"valid_targets_mean": 1384.5,
"valid_targets_min": 591
},
{
"epoch": 2.3157894736842106,
"grad_norm": 0.7781824943502823,
"learning_rate": 3.395518200148571e-05,
"loss": 0.1494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0761878490447998,
"step": 220,
"valid_targets_mean": 1668.5,
"valid_targets_min": 677
},
{
"epoch": 2.3684210526315788,
"grad_norm": 0.6417846187297783,
"learning_rate": 3.357408591213544e-05,
"loss": 0.149,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02233138121664524,
"step": 225,
"valid_targets_mean": 869.8,
"valid_targets_min": 452
},
{
"epoch": 2.4210526315789473,
"grad_norm": 1.2326477223555594,
"learning_rate": 3.318362449443876e-05,
"loss": 0.1446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.058247171342372894,
"step": 230,
"valid_targets_mean": 1183.0,
"valid_targets_min": 406
},
{
"epoch": 2.473684210526316,
"grad_norm": 0.8902988410602314,
"learning_rate": 3.278406714402253e-05,
"loss": 0.1496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.019801612943410873,
"step": 235,
"valid_targets_mean": 810.2,
"valid_targets_min": 436
},
{
"epoch": 2.526315789473684,
"grad_norm": 0.7722716265711358,
"learning_rate": 3.237568953217717e-05,
"loss": 0.1357,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023706065490841866,
"step": 240,
"valid_targets_mean": 1874.5,
"valid_targets_min": 487
},
{
"epoch": 2.5789473684210527,
"grad_norm": 0.7505590432900965,
"learning_rate": 3.195877341565958e-05,
"loss": 0.1605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04506827890872955,
"step": 245,
"valid_targets_mean": 2340.8,
"valid_targets_min": 1596
},
{
"epoch": 2.6315789473684212,
"grad_norm": 1.078393610170121,
"learning_rate": 3.153360644229735e-05,
"loss": 0.1365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03983701393008232,
"step": 250,
"valid_targets_mean": 1252.5,
"valid_targets_min": 418
},
{
"epoch": 2.6842105263157894,
"grad_norm": 1.0240506694449787,
"learning_rate": 3.110048195252851e-05,
"loss": 0.1763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.042883411049842834,
"step": 255,
"valid_targets_mean": 972.8,
"valid_targets_min": 416
},
{
"epoch": 2.736842105263158,
"grad_norm": 0.88391634666274,
"learning_rate": 3.065969877701378e-05,
"loss": 0.1496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03341054543852806,
"step": 260,
"valid_targets_mean": 1375.5,
"valid_targets_min": 468
},
{
"epoch": 2.7894736842105265,
"grad_norm": 0.9935905952495295,
"learning_rate": 3.0211561030460755e-05,
"loss": 0.1611,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04604099690914154,
"step": 265,
"valid_targets_mean": 1171.0,
"valid_targets_min": 572
},
{
"epoch": 2.8421052631578947,
"grad_norm": 0.7349039809089655,
"learning_rate": 2.975637790180255e-05,
"loss": 0.1212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01938222162425518,
"step": 270,
"valid_targets_mean": 813.5,
"valid_targets_min": 523
},
{
"epoch": 2.8947368421052633,
"grad_norm": 0.6446524265967656,
"learning_rate": 2.9294463440875375e-05,
"loss": 0.1461,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.032505787909030914,
"step": 275,
"valid_targets_mean": 2418.0,
"valid_targets_min": 791
},
{
"epoch": 2.9473684210526314,
"grad_norm": 0.8168387870454594,
"learning_rate": 2.8826136341742504e-05,
"loss": 0.1415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.041891537606716156,
"step": 280,
"valid_targets_mean": 1029.5,
"valid_targets_min": 487
},
{
"epoch": 3.0,
"grad_norm": 0.6795412956632005,
"learning_rate": 2.8351719722813933e-05,
"loss": 0.1587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03285399451851845,
"step": 285,
"valid_targets_mean": 1979.0,
"valid_targets_min": 917
},
{
"epoch": 3.0526315789473686,
"grad_norm": 0.8429893002470394,
"learning_rate": 2.7871540903913465e-05,
"loss": 0.1186,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.051500104367733,
"step": 290,
"valid_targets_mean": 1433.0,
"valid_targets_min": 407
},
{
"epoch": 3.1052631578947367,
"grad_norm": 0.7288845482984847,
"learning_rate": 2.7385931180447145e-05,
"loss": 0.1252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.031095707789063454,
"step": 295,
"valid_targets_mean": 2574.0,
"valid_targets_min": 597
},
{
"epoch": 3.1578947368421053,
"grad_norm": 0.8785679932821717,
"learning_rate": 2.6895225594828743e-05,
"loss": 0.1249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.020273303613066673,
"step": 300,
"valid_targets_mean": 1149.5,
"valid_targets_min": 600
},
{
"epoch": 3.2105263157894735,
"grad_norm": 0.5964839392422349,
"learning_rate": 2.639976270531996e-05,
"loss": 0.1096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02001447230577469,
"step": 305,
"valid_targets_mean": 1876.8,
"valid_targets_min": 463
},
{
"epoch": 3.263157894736842,
"grad_norm": 1.0058384602350647,
"learning_rate": 2.5899884352444994e-05,
"loss": 0.1292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03967369720339775,
"step": 310,
"valid_targets_mean": 1636.5,
"valid_targets_min": 1320
},
{
"epoch": 3.3157894736842106,
"grad_norm": 0.7379249353528501,
"learning_rate": 2.5395935423140487e-05,
"loss": 0.1148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023941613733768463,
"step": 315,
"valid_targets_mean": 1889.2,
"valid_targets_min": 456
},
{
"epoch": 3.3684210526315788,
"grad_norm": 0.6320511316986958,
"learning_rate": 2.4888263612803637e-05,
"loss": 0.111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.015035606920719147,
"step": 320,
"valid_targets_mean": 2843.8,
"valid_targets_min": 579
},
{
"epoch": 3.4210526315789473,
"grad_norm": 0.7888755202797919,
"learning_rate": 2.4377219185402613e-05,
"loss": 0.1091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.028542907908558846,
"step": 325,
"valid_targets_mean": 1003.2,
"valid_targets_min": 499
},
{
"epoch": 3.473684210526316,
"grad_norm": 1.129509556464978,
"learning_rate": 2.3863154731814867e-05,
"loss": 0.1241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05130193009972572,
"step": 330,
"valid_targets_mean": 1287.8,
"valid_targets_min": 560
},
{
"epoch": 3.526315789473684,
"grad_norm": 0.6395207589130021,
"learning_rate": 2.3346424926559935e-05,
"loss": 0.1313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.019851651042699814,
"step": 335,
"valid_targets_mean": 2567.2,
"valid_targets_min": 454
},
{
"epoch": 3.5789473684210527,
"grad_norm": 0.5599961293630993,
"learning_rate": 2.2827386283094707e-05,
"loss": 0.1229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.015991318970918655,
"step": 340,
"valid_targets_mean": 1478.5,
"valid_targets_min": 457
},
{
"epoch": 3.6315789473684212,
"grad_norm": 0.8348704783982805,
"learning_rate": 2.2306396907839883e-05,
"loss": 0.121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.014956638216972351,
"step": 345,
"valid_targets_mean": 1313.2,
"valid_targets_min": 489
},
{
"epoch": 3.6842105263157894,
"grad_norm": 0.8193826114929279,
"learning_rate": 2.178381625310748e-05,
"loss": 0.1168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.016708550974726677,
"step": 350,
"valid_targets_mean": 1944.0,
"valid_targets_min": 549
},
{
"epoch": 3.736842105263158,
"grad_norm": 0.9430505757719317,
"learning_rate": 2.1260004869099583e-05,
"loss": 0.1054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.018935151398181915,
"step": 355,
"valid_targets_mean": 575.2,
"valid_targets_min": 360
},
{
"epoch": 3.7894736842105265,
"grad_norm": 0.6631035172044142,
"learning_rate": 2.0735324155149795e-05,
"loss": 0.135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.014331744983792305,
"step": 360,
"valid_targets_mean": 934.5,
"valid_targets_min": 524
},
{
"epoch": 3.8421052631578947,
"grad_norm": 0.8657436402850743,
"learning_rate": 2.021013611037873e-05,
"loss": 0.1331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03150756284594536,
"step": 365,
"valid_targets_mean": 1456.0,
"valid_targets_min": 468
},
{
"epoch": 3.8947368421052633,
"grad_norm": 0.7075879023467191,
"learning_rate": 1.9684803083935676e-05,
"loss": 0.1389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02251843735575676,
"step": 370,
"valid_targets_mean": 915.5,
"valid_targets_min": 433
},
{
"epoch": 3.9473684210526314,
"grad_norm": 0.5419112299991141,
"learning_rate": 1.915968752499886e-05,
"loss": 0.115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.015261461958289146,
"step": 375,
"valid_targets_mean": 1871.0,
"valid_targets_min": 446
},
{
"epoch": 4.0,
"grad_norm": 0.7993441162916212,
"learning_rate": 1.8635151732706586e-05,
"loss": 0.1336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022348973900079727,
"step": 380,
"valid_targets_mean": 848.2,
"valid_targets_min": 518
},
{
"epoch": 4.052631578947368,
"grad_norm": 0.6631125816107435,
"learning_rate": 1.8111557606191946e-05,
"loss": 0.1049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02011416107416153,
"step": 385,
"valid_targets_mean": 2040.8,
"valid_targets_min": 575
},
{
"epoch": 4.105263157894737,
"grad_norm": 1.040065523528812,
"learning_rate": 1.758926639489354e-05,
"loss": 0.1107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02567300572991371,
"step": 390,
"valid_targets_mean": 1236.0,
"valid_targets_min": 281
},
{
"epoch": 4.157894736842105,
"grad_norm": 0.8157880749784518,
"learning_rate": 1.7068638449314365e-05,
"loss": 0.1135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02425241470336914,
"step": 395,
"valid_targets_mean": 1700.0,
"valid_targets_min": 436
},
{
"epoch": 4.2105263157894735,
"grad_norm": 0.6407113553475756,
"learning_rate": 1.6550032972400996e-05,
"loss": 0.1007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.015315159223973751,
"step": 400,
"valid_targets_mean": 2811.0,
"valid_targets_min": 655
},
{
"epoch": 4.2631578947368425,
"grad_norm": 0.792271856227501,
"learning_rate": 1.6033807771714464e-05,
"loss": 0.0856,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0246428269892931,
"step": 405,
"valid_targets_mean": 1821.0,
"valid_targets_min": 453
},
{
"epoch": 4.315789473684211,
"grad_norm": 0.7488624643430496,
"learning_rate": 1.552031901256391e-05,
"loss": 0.1061,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.012473690323531628,
"step": 410,
"valid_targets_mean": 744.2,
"valid_targets_min": 501
},
{
"epoch": 4.368421052631579,
"grad_norm": 0.7373911618925023,
"learning_rate": 1.5009920972273255e-05,
"loss": 0.1026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024960853159427643,
"step": 415,
"valid_targets_mean": 1795.8,
"valid_targets_min": 537
},
{
"epoch": 4.421052631578947,
"grad_norm": 0.8368699116001358,
"learning_rate": 1.4502965795750487e-05,
"loss": 0.1084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024426810443401337,
"step": 420,
"valid_targets_mean": 1879.0,
"valid_targets_min": 477
},
{
"epoch": 4.473684210526316,
"grad_norm": 1.035900030611362,
"learning_rate": 1.399980325252823e-05,
"loss": 0.1041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02010546252131462,
"step": 425,
"valid_targets_mean": 870.0,
"valid_targets_min": 588
},
{
"epoch": 4.526315789473684,
"grad_norm": 0.8454908753679591,
"learning_rate": 1.3500780495443098e-05,
"loss": 0.0967,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0443265363574028,
"step": 430,
"valid_targets_mean": 2228.5,
"valid_targets_min": 382
},
{
"epoch": 4.578947368421053,
"grad_norm": 0.9079677228427804,
"learning_rate": 1.3006241821120483e-05,
"loss": 0.0967,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.020769363269209862,
"step": 435,
"valid_targets_mean": 1294.0,
"valid_targets_min": 469
},
{
"epoch": 4.631578947368421,
"grad_norm": 0.697471073230868,
"learning_rate": 1.2516528432429955e-05,
"loss": 0.0912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.017268797382712364,
"step": 440,
"valid_targets_mean": 1621.0,
"valid_targets_min": 440
},
{
"epoch": 4.684210526315789,
"grad_norm": 0.6926037557955551,
"learning_rate": 1.2031978203075172e-05,
"loss": 0.0969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.041803259402513504,
"step": 445,
"valid_targets_mean": 1963.0,
"valid_targets_min": 524
},
{
"epoch": 4.7368421052631575,
"grad_norm": 0.7472368581347761,
"learning_rate": 1.1552925444480674e-05,
"loss": 0.0975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01823119819164276,
"step": 450,
"valid_targets_mean": 1856.8,
"valid_targets_min": 461
},
{
"epoch": 4.7894736842105265,
"grad_norm": 0.6588601167548072,
"learning_rate": 1.1079700675136506e-05,
"loss": 0.096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024438511580228806,
"step": 455,
"valid_targets_mean": 3060.0,
"valid_targets_min": 605
},
{
"epoch": 4.842105263157895,
"grad_norm": 1.0306521791067318,
"learning_rate": 1.0612630392559728e-05,
"loss": 0.098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08015666902065277,
"step": 460,
"valid_targets_mean": 2960.8,
"valid_targets_min": 509
},
{
"epoch": 4.894736842105263,
"grad_norm": 0.5351687335944878,
"learning_rate": 1.015203684803013e-05,
"loss": 0.0822,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.015991540625691414,
"step": 465,
"valid_targets_mean": 4341.2,
"valid_targets_min": 1093
},
{
"epoch": 4.947368421052632,
"grad_norm": 0.7874517868903579,
"learning_rate": 9.698237824255634e-06,
"loss": 0.1013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02613271400332451,
"step": 470,
"valid_targets_mean": 1760.5,
"valid_targets_min": 415
},
{
"epoch": 5.0,
"grad_norm": 0.7529240572345405,
"learning_rate": 9.251546416120756e-06,
"loss": 0.0948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.038777366280555725,
"step": 475,
"valid_targets_mean": 2599.2,
"valid_targets_min": 360
},
{
"epoch": 5.052631578947368,
"grad_norm": 0.8391592840372387,
"learning_rate": 8.812270814669338e-06,
"loss": 0.0991,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.012826068326830864,
"step": 480,
"valid_targets_mean": 827.0,
"valid_targets_min": 549
},
{
"epoch": 5.105263157894737,
"grad_norm": 0.898064625511165,
"learning_rate": 8.38071409447074e-06,
"loss": 0.0798,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.013072891160845757,
"step": 485,
"valid_targets_mean": 2060.5,
"valid_targets_min": 526
},
{
"epoch": 5.157894736842105,
"grad_norm": 0.7414990110957742,
"learning_rate": 7.957174004516015e-06,
"loss": 0.0807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.017101336270570755,
"step": 490,
"valid_targets_mean": 2430.8,
"valid_targets_min": 418
},
{
"epoch": 5.2105263157894735,
"grad_norm": 0.8541483835659059,
"learning_rate": 7.5419427627884586e-06,
"loss": 0.0903,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.016206396743655205,
"step": 495,
"valid_targets_mean": 2121.8,
"valid_targets_min": 451
},
{
"epoch": 5.2631578947368425,
"grad_norm": 0.9520445173297055,
"learning_rate": 7.1353068546502144e-06,
"loss": 0.0876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0290323905646801,
"step": 500,
"valid_targets_mean": 2227.2,
"valid_targets_min": 520
},
{
"epoch": 5.315789473684211,
"grad_norm": 0.49442257527886013,
"learning_rate": 6.737546835184101e-06,
"loss": 0.0681,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.005898091942071915,
"step": 505,
"valid_targets_mean": 3989.0,
"valid_targets_min": 441
},
{
"epoch": 5.368421052631579,
"grad_norm": 0.6172028111736194,
"learning_rate": 6.348937135626922e-06,
"loss": 0.0795,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02028818055987358,
"step": 510,
"valid_targets_mean": 2356.5,
"valid_targets_min": 515
},
{
"epoch": 5.421052631578947,
"grad_norm": 1.0863613555570175,
"learning_rate": 5.9697458740279165e-06,
"loss": 0.095,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02723700925707817,
"step": 515,
"valid_targets_mean": 1796.5,
"valid_targets_min": 542
},
{
"epoch": 5.473684210526316,
"grad_norm": 0.9127541216790402,
"learning_rate": 5.600234670262925e-06,
"loss": 0.0657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02121194452047348,
"step": 520,
"valid_targets_mean": 1008.2,
"valid_targets_min": 458
},
{
"epoch": 5.526315789473684,
"grad_norm": 0.7939548918919143,
"learning_rate": 5.240658465531914e-06,
"loss": 0.0907,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.020264407619833946,
"step": 525,
"valid_targets_mean": 2318.5,
"valid_targets_min": 1029
},
{
"epoch": 5.578947368421053,
"grad_norm": 0.7958309073175084,
"learning_rate": 4.891265346464416e-06,
"loss": 0.0811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.016854919493198395,
"step": 530,
"valid_targets_mean": 1867.5,
"valid_targets_min": 505
},
{
"epoch": 5.631578947368421,
"grad_norm": 0.964741333422222,
"learning_rate": 4.552296373954194e-06,
"loss": 0.0916,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02305922657251358,
"step": 535,
"valid_targets_mean": 1494.0,
"valid_targets_min": 501
},
{
"epoch": 5.684210526315789,
"grad_norm": 0.6953130743902943,
"learning_rate": 4.223985416841292e-06,
"loss": 0.0858,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.006064994726330042,
"step": 540,
"valid_targets_mean": 2224.8,
"valid_targets_min": 436
},
{
"epoch": 5.7368421052631575,
"grad_norm": 0.8426129883534313,
"learning_rate": 3.906558990556126e-06,
"loss": 0.0867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.015257779508829117,
"step": 545,
"valid_targets_mean": 1543.0,
"valid_targets_min": 398
},
{
"epoch": 5.7894736842105265,
"grad_norm": 0.8466455484780093,
"learning_rate": 3.6002361008370802e-06,
"loss": 0.0782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.017657935619354248,
"step": 550,
"valid_targets_mean": 1370.5,
"valid_targets_min": 796
},
{
"epoch": 5.842105263157895,
"grad_norm": 0.7810621379380235,
"learning_rate": 3.3052280926292802e-06,
"loss": 0.0857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.017286978662014008,
"step": 555,
"valid_targets_mean": 1197.5,
"valid_targets_min": 482
},
{
"epoch": 5.894736842105263,
"grad_norm": 0.9429542983678016,
"learning_rate": 3.021738504268905e-06,
"loss": 0.0803,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.019310947507619858,
"step": 560,
"valid_targets_mean": 740.2,
"valid_targets_min": 405
},
{
"epoch": 5.947368421052632,
"grad_norm": 0.848754138173841,
"learning_rate": 2.7499629270535954e-06,
"loss": 0.0795,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.036632854491472244,
"step": 565,
"valid_targets_mean": 2569.2,
"valid_targets_min": 1078
},
{
"epoch": 6.0,
"grad_norm": 0.8474574025573199,
"learning_rate": 2.490088870295839e-06,
"loss": 0.0748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.013190139085054398,
"step": 570,
"valid_targets_mean": 1706.0,
"valid_targets_min": 539
},
{
"epoch": 6.052631578947368,
"grad_norm": 1.277717890049751,
"learning_rate": 2.242295631952496e-06,
"loss": 0.0854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02643488347530365,
"step": 575,
"valid_targets_mean": 1145.2,
"valid_targets_min": 561
},
{
"epoch": 6.105263157894737,
"grad_norm": 1.002374231512521,
"learning_rate": 2.0067541749196453e-06,
"loss": 0.0711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.021946445107460022,
"step": 580,
"valid_targets_mean": 784.0,
"valid_targets_min": 421
},
{
"epoch": 6.157894736842105,
"grad_norm": 0.5901745775554788,
"learning_rate": 1.783627009078137e-06,
"loss": 0.0829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.014920946210622787,
"step": 585,
"valid_targets_mean": 2335.2,
"valid_targets_min": 485
},
{
"epoch": 6.2105263157894735,
"grad_norm": 0.8805052231905595,
"learning_rate": 1.573068079171265e-06,
"loss": 0.0813,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03274039924144745,
"step": 590,
"valid_targets_mean": 2157.8,
"valid_targets_min": 529
},
{
"epoch": 6.2631578947368425,
"grad_norm": 0.932027933585667,
"learning_rate": 1.3752226585918416e-06,
"loss": 0.0719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.014052268117666245,
"step": 595,
"valid_targets_mean": 994.8,
"valid_targets_min": 513
},
{
"epoch": 6.315789473684211,
"grad_norm": 0.8541046370873381,
"learning_rate": 1.1902272491520362e-06,
"loss": 0.0653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.00850268267095089,
"step": 600,
"valid_targets_mean": 1679.0,
"valid_targets_min": 525
},
{
"epoch": 6.368421052631579,
"grad_norm": 0.9133472628588594,
"learning_rate": 1.0182094869050796e-06,
"loss": 0.0788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02191070280969143,
"step": 605,
"valid_targets_mean": 1648.2,
"valid_targets_min": 415
},
{
"epoch": 6.421052631578947,
"grad_norm": 0.8210144567874685,
"learning_rate": 8.592880540838111e-07,
"loss": 0.066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.014700721949338913,
"step": 610,
"valid_targets_mean": 868.8,
"valid_targets_min": 471
},
{
"epoch": 6.473684210526316,
"grad_norm": 0.6702334373473492,
"learning_rate": 7.135725972168694e-07,
"loss": 0.085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.014351149089634418,
"step": 615,
"valid_targets_mean": 2272.8,
"valid_targets_min": 546
},
{
"epoch": 6.526315789473684,
"grad_norm": 0.6787505389804506,
"learning_rate": 5.811636514789598e-07,
"loss": 0.0628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.011107168160378933,
"step": 620,
"valid_targets_mean": 1480.2,
"valid_targets_min": 415
},
{
"epoch": 6.578947368421053,
"grad_norm": 0.7692352383890824,
"learning_rate": 4.621525713274588e-07,
"loss": 0.0716,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.017746414989233017,
"step": 625,
"valid_targets_mean": 2110.8,
"valid_targets_min": 504
},
{
"epoch": 6.631578947368421,
"grad_norm": 0.7906146884753448,
"learning_rate": 3.5662146747315054e-07,
"loss": 0.0882,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022792555391788483,
"step": 630,
"valid_targets_mean": 2510.0,
"valid_targets_min": 433
},
{
"epoch": 6.684210526315789,
"grad_norm": 1.1576408782365855,
"learning_rate": 2.6464315022861844e-07,
"loss": 0.0855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024924524128437042,
"step": 635,
"valid_targets_mean": 1319.0,
"valid_targets_min": 413
},
{
"epoch": 6.7368421052631575,
"grad_norm": 0.7614682584594898,
"learning_rate": 1.862810792733849e-07,
"loss": 0.0731,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025046605616807938,
"step": 640,
"valid_targets_mean": 1614.5,
"valid_targets_min": 344
},
{
"epoch": 6.7894736842105265,
"grad_norm": 0.8926016666095682,
"learning_rate": 1.2158931987041877e-07,
"loss": 0.0746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01438556332141161,
"step": 645,
"valid_targets_mean": 1287.2,
"valid_targets_min": 524
},
{
"epoch": 6.842105263157895,
"grad_norm": 0.906919830054555,
"learning_rate": 7.06125055642537e-08,
"loss": 0.0677,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.008928455412387848,
"step": 650,
"valid_targets_mean": 464.8,
"valid_targets_min": 411
},
{
"epoch": 6.894736842105263,
"grad_norm": 0.7154592892609424,
"learning_rate": 3.3385807386456804e-08,
"loss": 0.0664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03817162662744522,
"step": 655,
"valid_targets_mean": 2167.2,
"valid_targets_min": 407
},
{
"epoch": 6.947368421052632,
"grad_norm": 0.9354297577789874,
"learning_rate": 9.934909589646157e-09,
"loss": 0.0767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01679036021232605,
"step": 660,
"valid_targets_mean": 1597.2,
"valid_targets_min": 505
},
{
"epoch": 7.0,
"grad_norm": 0.9200047562223783,
"learning_rate": 2.759919268702227e-10,
"loss": 0.0788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.028918465599417686,
"step": 665,
"valid_targets_mean": 1713.0,
"valid_targets_min": 479
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.028918465599417686,
"step": 665,
"total_flos": 1.8900557354971955e+17,
"train_loss": 0.14472091395155828,
"train_runtime": 29358.9617,
"train_samples_per_second": 0.362,
"train_steps_per_second": 0.023,
"valid_targets_mean": 1713.0,
"valid_targets_min": 479
}
],
"logging_steps": 5,
"max_steps": 665,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.8900557354971955e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}