1511 lines
42 KiB
JSON
1511 lines
42 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 665,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.05263157894736842,
|
||
|
|
"grad_norm": 12.553807503106103,
|
||
|
|
"learning_rate": 2.3880597014925373e-06,
|
||
|
|
"loss": 0.6006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11287848651409149,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 673.5,
|
||
|
|
"valid_targets_min": 369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10526315789473684,
|
||
|
|
"grad_norm": 7.035434903432055,
|
||
|
|
"learning_rate": 5.37313432835821e-06,
|
||
|
|
"loss": 0.6424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10525691509246826,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 3003.0,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15789473684210525,
|
||
|
|
"grad_norm": 5.198775023562205,
|
||
|
|
"learning_rate": 8.35820895522388e-06,
|
||
|
|
"loss": 0.4501,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0761948674917221,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 1735.2,
|
||
|
|
"valid_targets_min": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21052631578947367,
|
||
|
|
"grad_norm": 2.0741815557089174,
|
||
|
|
"learning_rate": 1.1343283582089553e-05,
|
||
|
|
"loss": 0.3755,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10525241494178772,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 1774.2,
|
||
|
|
"valid_targets_min": 632
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2631578947368421,
|
||
|
|
"grad_norm": 1.850608693504522,
|
||
|
|
"learning_rate": 1.4328358208955224e-05,
|
||
|
|
"loss": 0.3576,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2170872539281845,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 1434.5,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3157894736842105,
|
||
|
|
"grad_norm": 0.7064366027469681,
|
||
|
|
"learning_rate": 1.7313432835820894e-05,
|
||
|
|
"loss": 0.2887,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04902719706296921,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 1492.2,
|
||
|
|
"valid_targets_min": 337
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3684210526315789,
|
||
|
|
"grad_norm": 0.8492931103555103,
|
||
|
|
"learning_rate": 2.029850746268657e-05,
|
||
|
|
"loss": 0.3099,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11869967728853226,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 2878.5,
|
||
|
|
"valid_targets_min": 574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42105263157894735,
|
||
|
|
"grad_norm": 0.9128923904158283,
|
||
|
|
"learning_rate": 2.3283582089552242e-05,
|
||
|
|
"loss": 0.2594,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06856397539377213,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 2244.8,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47368421052631576,
|
||
|
|
"grad_norm": 0.7721603884282439,
|
||
|
|
"learning_rate": 2.6268656716417913e-05,
|
||
|
|
"loss": 0.2576,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07116584479808807,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 1934.0,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5263157894736842,
|
||
|
|
"grad_norm": 0.815787876318566,
|
||
|
|
"learning_rate": 2.9253731343283584e-05,
|
||
|
|
"loss": 0.291,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13275116682052612,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 2430.8,
|
||
|
|
"valid_targets_min": 1481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5789473684210527,
|
||
|
|
"grad_norm": 0.7820305833709643,
|
||
|
|
"learning_rate": 3.2238805970149255e-05,
|
||
|
|
"loss": 0.2378,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06999962031841278,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 1782.0,
|
||
|
|
"valid_targets_min": 459
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.631578947368421,
|
||
|
|
"grad_norm": 0.921532705829729,
|
||
|
|
"learning_rate": 3.522388059701493e-05,
|
||
|
|
"loss": 0.2227,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04323829710483551,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 758.5,
|
||
|
|
"valid_targets_min": 591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6842105263157895,
|
||
|
|
"grad_norm": 0.7092700459991306,
|
||
|
|
"learning_rate": 3.8208955223880596e-05,
|
||
|
|
"loss": 0.2336,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.037662893533706665,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 730.0,
|
||
|
|
"valid_targets_min": 436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7368421052631579,
|
||
|
|
"grad_norm": 0.7461432563225519,
|
||
|
|
"learning_rate": 3.9998896039909675e-05,
|
||
|
|
"loss": 0.1988,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02792029082775116,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 497.0,
|
||
|
|
"valid_targets_min": 461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7894736842105263,
|
||
|
|
"grad_norm": 0.7023900039706257,
|
||
|
|
"learning_rate": 3.998647788848384e-05,
|
||
|
|
"loss": 0.2185,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03114963322877884,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 1628.8,
|
||
|
|
"valid_targets_min": 382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8421052631578947,
|
||
|
|
"grad_norm": 0.7055640361791583,
|
||
|
|
"learning_rate": 3.996027023188427e-05,
|
||
|
|
"loss": 0.2002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.026642896234989166,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 570.2,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8947368421052632,
|
||
|
|
"grad_norm": 0.8834619078292679,
|
||
|
|
"learning_rate": 3.9920291151866977e-05,
|
||
|
|
"loss": 0.2278,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08590055257081985,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 2245.0,
|
||
|
|
"valid_targets_min": 564
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9473684210526315,
|
||
|
|
"grad_norm": 0.8048346765640612,
|
||
|
|
"learning_rate": 3.986656823166766e-05,
|
||
|
|
"loss": 0.2334,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08790746331214905,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 3408.2,
|
||
|
|
"valid_targets_min": 1208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 0.7721245809969578,
|
||
|
|
"learning_rate": 3.979913853697095e-05,
|
||
|
|
"loss": 0.1963,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05376936122775078,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 1591.8,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0526315789473684,
|
||
|
|
"grad_norm": 0.6211663045932956,
|
||
|
|
"learning_rate": 3.9718048590337186e-05,
|
||
|
|
"loss": 0.1857,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0645800530910492,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 2262.0,
|
||
|
|
"valid_targets_min": 1426
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1052631578947367,
|
||
|
|
"grad_norm": 0.7785104666209682,
|
||
|
|
"learning_rate": 3.962335433910463e-05,
|
||
|
|
"loss": 0.1824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.025534367188811302,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 679.8,
|
||
|
|
"valid_targets_min": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1578947368421053,
|
||
|
|
"grad_norm": 0.8004929865701785,
|
||
|
|
"learning_rate": 3.9515121116788985e-05,
|
||
|
|
"loss": 0.205,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10348161309957504,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 2548.2,
|
||
|
|
"valid_targets_min": 1644
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2105263157894737,
|
||
|
|
"grad_norm": 0.8442132787033475,
|
||
|
|
"learning_rate": 3.939342359800714e-05,
|
||
|
|
"loss": 0.1878,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046689361333847046,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 1556.2,
|
||
|
|
"valid_targets_min": 361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.263157894736842,
|
||
|
|
"grad_norm": 0.8100669921933641,
|
||
|
|
"learning_rate": 3.925834574695599e-05,
|
||
|
|
"loss": 0.1719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0551832839846611,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 2410.0,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3157894736842106,
|
||
|
|
"grad_norm": 0.7871886106834695,
|
||
|
|
"learning_rate": 3.910998075948207e-05,
|
||
|
|
"loss": 0.1911,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05720491334795952,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 2352.5,
|
||
|
|
"valid_targets_min": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.368421052631579,
|
||
|
|
"grad_norm": 0.7323699198683509,
|
||
|
|
"learning_rate": 3.8948430998781824e-05,
|
||
|
|
"loss": 0.1753,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0170910581946373,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 1404.5,
|
||
|
|
"valid_targets_min": 575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4210526315789473,
|
||
|
|
"grad_norm": 1.0920789638236659,
|
||
|
|
"learning_rate": 3.8773807924776976e-05,
|
||
|
|
"loss": 0.1912,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06749822944402695,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 929.8,
|
||
|
|
"valid_targets_min": 369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4736842105263157,
|
||
|
|
"grad_norm": 0.7229550949497463,
|
||
|
|
"learning_rate": 3.8586232017213675e-05,
|
||
|
|
"loss": 0.1831,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03690353408455849,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 1208.2,
|
||
|
|
"valid_targets_min": 861
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.526315789473684,
|
||
|
|
"grad_norm": 0.7657486783018452,
|
||
|
|
"learning_rate": 3.83858326925385e-05,
|
||
|
|
"loss": 0.1812,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.037817131727933884,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 1573.8,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5789473684210527,
|
||
|
|
"grad_norm": 0.7208446240323184,
|
||
|
|
"learning_rate": 3.8172748214608624e-05,
|
||
|
|
"loss": 0.1665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06162188947200775,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 1708.5,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.631578947368421,
|
||
|
|
"grad_norm": 0.7918543247788804,
|
||
|
|
"learning_rate": 3.7947125599297856e-05,
|
||
|
|
"loss": 0.1776,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04460986703634262,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 2118.2,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6842105263157894,
|
||
|
|
"grad_norm": 0.7001163120437345,
|
||
|
|
"learning_rate": 3.7709120513064196e-05,
|
||
|
|
"loss": 0.1856,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03856482356786728,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 1108.5,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.736842105263158,
|
||
|
|
"grad_norm": 0.6906214593751792,
|
||
|
|
"learning_rate": 3.745889716554912e-05,
|
||
|
|
"loss": 0.1873,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.040963269770145416,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 1622.5,
|
||
|
|
"valid_targets_min": 717
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7894736842105263,
|
||
|
|
"grad_norm": 0.798273215978815,
|
||
|
|
"learning_rate": 3.7196628196282415e-05,
|
||
|
|
"loss": 0.1787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.020104659721255302,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 551.8,
|
||
|
|
"valid_targets_min": 482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8421052631578947,
|
||
|
|
"grad_norm": 0.6391796891465947,
|
||
|
|
"learning_rate": 3.692249455557103e-05,
|
||
|
|
"loss": 0.1619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.025837548077106476,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 3984.0,
|
||
|
|
"valid_targets_min": 252
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8947368421052633,
|
||
|
|
"grad_norm": 0.5174143688291576,
|
||
|
|
"learning_rate": 3.6636685379653875e-05,
|
||
|
|
"loss": 0.1677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.023429114371538162,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 763.5,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9473684210526314,
|
||
|
|
"grad_norm": 0.8576835506620597,
|
||
|
|
"learning_rate": 3.633939786020884e-05,
|
||
|
|
"loss": 0.1675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03479863703250885,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 1539.5,
|
||
|
|
"valid_targets_min": 452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 0.7568710245152749,
|
||
|
|
"learning_rate": 3.603083710830205e-05,
|
||
|
|
"loss": 0.1779,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06252141296863556,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 2105.8,
|
||
|
|
"valid_targets_min": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0526315789473686,
|
||
|
|
"grad_norm": 0.6843664313982926,
|
||
|
|
"learning_rate": 3.5711216012873114e-05,
|
||
|
|
"loss": 0.1443,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05131068080663681,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 3221.2,
|
||
|
|
"valid_targets_min": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1052631578947367,
|
||
|
|
"grad_norm": 0.6549037149587634,
|
||
|
|
"learning_rate": 3.538075509385427e-05,
|
||
|
|
"loss": 0.1406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.035776007920503616,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 2162.2,
|
||
|
|
"valid_targets_min": 564
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1578947368421053,
|
||
|
|
"grad_norm": 0.6798810404078721,
|
||
|
|
"learning_rate": 3.503968235002437e-05,
|
||
|
|
"loss": 0.136,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01746884360909462,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 1817.8,
|
||
|
|
"valid_targets_min": 482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2105263157894735,
|
||
|
|
"grad_norm": 0.7040977825128967,
|
||
|
|
"learning_rate": 3.468823310170309e-05,
|
||
|
|
"loss": 0.1594,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03740096092224121,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 1947.5,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.263157894736842,
|
||
|
|
"grad_norm": 0.7823204339158937,
|
||
|
|
"learning_rate": 3.4326649828393565e-05,
|
||
|
|
"loss": 0.1399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.033209312707185745,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 1384.5,
|
||
|
|
"valid_targets_min": 591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3157894736842106,
|
||
|
|
"grad_norm": 0.7781824943502823,
|
||
|
|
"learning_rate": 3.395518200148571e-05,
|
||
|
|
"loss": 0.1494,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0761878490447998,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 1668.5,
|
||
|
|
"valid_targets_min": 677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3684210526315788,
|
||
|
|
"grad_norm": 0.6417846187297783,
|
||
|
|
"learning_rate": 3.357408591213544e-05,
|
||
|
|
"loss": 0.149,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02233138121664524,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 869.8,
|
||
|
|
"valid_targets_min": 452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4210526315789473,
|
||
|
|
"grad_norm": 1.2326477223555594,
|
||
|
|
"learning_rate": 3.318362449443876e-05,
|
||
|
|
"loss": 0.1446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.058247171342372894,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 1183.0,
|
||
|
|
"valid_targets_min": 406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.473684210526316,
|
||
|
|
"grad_norm": 0.8902988410602314,
|
||
|
|
"learning_rate": 3.278406714402253e-05,
|
||
|
|
"loss": 0.1496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.019801612943410873,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 810.2,
|
||
|
|
"valid_targets_min": 436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.526315789473684,
|
||
|
|
"grad_norm": 0.7722716265711358,
|
||
|
|
"learning_rate": 3.237568953217717e-05,
|
||
|
|
"loss": 0.1357,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.023706065490841866,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 1874.5,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5789473684210527,
|
||
|
|
"grad_norm": 0.7505590432900965,
|
||
|
|
"learning_rate": 3.195877341565958e-05,
|
||
|
|
"loss": 0.1605,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04506827890872955,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 2340.8,
|
||
|
|
"valid_targets_min": 1596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6315789473684212,
|
||
|
|
"grad_norm": 1.078393610170121,
|
||
|
|
"learning_rate": 3.153360644229735e-05,
|
||
|
|
"loss": 0.1365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03983701393008232,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 1252.5,
|
||
|
|
"valid_targets_min": 418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6842105263157894,
|
||
|
|
"grad_norm": 1.0240506694449787,
|
||
|
|
"learning_rate": 3.110048195252851e-05,
|
||
|
|
"loss": 0.1763,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.042883411049842834,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 972.8,
|
||
|
|
"valid_targets_min": 416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.736842105263158,
|
||
|
|
"grad_norm": 0.88391634666274,
|
||
|
|
"learning_rate": 3.065969877701378e-05,
|
||
|
|
"loss": 0.1496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03341054543852806,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 1375.5,
|
||
|
|
"valid_targets_min": 468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7894736842105265,
|
||
|
|
"grad_norm": 0.9935905952495295,
|
||
|
|
"learning_rate": 3.0211561030460755e-05,
|
||
|
|
"loss": 0.1611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04604099690914154,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 1171.0,
|
||
|
|
"valid_targets_min": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8421052631578947,
|
||
|
|
"grad_norm": 0.7349039809089655,
|
||
|
|
"learning_rate": 2.975637790180255e-05,
|
||
|
|
"loss": 0.1212,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01938222162425518,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 813.5,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8947368421052633,
|
||
|
|
"grad_norm": 0.6446524265967656,
|
||
|
|
"learning_rate": 2.9294463440875375e-05,
|
||
|
|
"loss": 0.1461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.032505787909030914,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 2418.0,
|
||
|
|
"valid_targets_min": 791
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9473684210526314,
|
||
|
|
"grad_norm": 0.8168387870454594,
|
||
|
|
"learning_rate": 2.8826136341742504e-05,
|
||
|
|
"loss": 0.1415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.041891537606716156,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 1029.5,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"grad_norm": 0.6795412956632005,
|
||
|
|
"learning_rate": 2.8351719722813933e-05,
|
||
|
|
"loss": 0.1587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03285399451851845,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 1979.0,
|
||
|
|
"valid_targets_min": 917
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0526315789473686,
|
||
|
|
"grad_norm": 0.8429893002470394,
|
||
|
|
"learning_rate": 2.7871540903913465e-05,
|
||
|
|
"loss": 0.1186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.051500104367733,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 1433.0,
|
||
|
|
"valid_targets_min": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1052631578947367,
|
||
|
|
"grad_norm": 0.7288845482984847,
|
||
|
|
"learning_rate": 2.7385931180447145e-05,
|
||
|
|
"loss": 0.1252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.031095707789063454,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 2574.0,
|
||
|
|
"valid_targets_min": 597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1578947368421053,
|
||
|
|
"grad_norm": 0.8785679932821717,
|
||
|
|
"learning_rate": 2.6895225594828743e-05,
|
||
|
|
"loss": 0.1249,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.020273303613066673,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 1149.5,
|
||
|
|
"valid_targets_min": 600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2105263157894735,
|
||
|
|
"grad_norm": 0.5964839392422349,
|
||
|
|
"learning_rate": 2.639976270531996e-05,
|
||
|
|
"loss": 0.1096,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02001447230577469,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 1876.8,
|
||
|
|
"valid_targets_min": 463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.263157894736842,
|
||
|
|
"grad_norm": 1.0058384602350647,
|
||
|
|
"learning_rate": 2.5899884352444994e-05,
|
||
|
|
"loss": 0.1292,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03967369720339775,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 1636.5,
|
||
|
|
"valid_targets_min": 1320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3157894736842106,
|
||
|
|
"grad_norm": 0.7379249353528501,
|
||
|
|
"learning_rate": 2.5395935423140487e-05,
|
||
|
|
"loss": 0.1148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.023941613733768463,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 1889.2,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3684210526315788,
|
||
|
|
"grad_norm": 0.6320511316986958,
|
||
|
|
"learning_rate": 2.4888263612803637e-05,
|
||
|
|
"loss": 0.111,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.015035606920719147,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 2843.8,
|
||
|
|
"valid_targets_min": 579
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4210526315789473,
|
||
|
|
"grad_norm": 0.7888755202797919,
|
||
|
|
"learning_rate": 2.4377219185402613e-05,
|
||
|
|
"loss": 0.1091,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.028542907908558846,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 1003.2,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.473684210526316,
|
||
|
|
"grad_norm": 1.129509556464978,
|
||
|
|
"learning_rate": 2.3863154731814867e-05,
|
||
|
|
"loss": 0.1241,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05130193009972572,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 1287.8,
|
||
|
|
"valid_targets_min": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.526315789473684,
|
||
|
|
"grad_norm": 0.6395207589130021,
|
||
|
|
"learning_rate": 2.3346424926559935e-05,
|
||
|
|
"loss": 0.1313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.019851651042699814,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 2567.2,
|
||
|
|
"valid_targets_min": 454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5789473684210527,
|
||
|
|
"grad_norm": 0.5599961293630993,
|
||
|
|
"learning_rate": 2.2827386283094707e-05,
|
||
|
|
"loss": 0.1229,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.015991318970918655,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 1478.5,
|
||
|
|
"valid_targets_min": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6315789473684212,
|
||
|
|
"grad_norm": 0.8348704783982805,
|
||
|
|
"learning_rate": 2.2306396907839883e-05,
|
||
|
|
"loss": 0.121,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.014956638216972351,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 1313.2,
|
||
|
|
"valid_targets_min": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6842105263157894,
|
||
|
|
"grad_norm": 0.8193826114929279,
|
||
|
|
"learning_rate": 2.178381625310748e-05,
|
||
|
|
"loss": 0.1168,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.016708550974726677,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 1944.0,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.736842105263158,
|
||
|
|
"grad_norm": 0.9430505757719317,
|
||
|
|
"learning_rate": 2.1260004869099583e-05,
|
||
|
|
"loss": 0.1054,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.018935151398181915,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 575.2,
|
||
|
|
"valid_targets_min": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7894736842105265,
|
||
|
|
"grad_norm": 0.6631035172044142,
|
||
|
|
"learning_rate": 2.0735324155149795e-05,
|
||
|
|
"loss": 0.135,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.014331744983792305,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 934.5,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8421052631578947,
|
||
|
|
"grad_norm": 0.8657436402850743,
|
||
|
|
"learning_rate": 2.021013611037873e-05,
|
||
|
|
"loss": 0.1331,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03150756284594536,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 1456.0,
|
||
|
|
"valid_targets_min": 468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8947368421052633,
|
||
|
|
"grad_norm": 0.7075879023467191,
|
||
|
|
"learning_rate": 1.9684803083935676e-05,
|
||
|
|
"loss": 0.1389,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02251843735575676,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 915.5,
|
||
|
|
"valid_targets_min": 433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9473684210526314,
|
||
|
|
"grad_norm": 0.5419112299991141,
|
||
|
|
"learning_rate": 1.915968752499886e-05,
|
||
|
|
"loss": 0.115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.015261461958289146,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 1871.0,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0,
|
||
|
|
"grad_norm": 0.7993441162916212,
|
||
|
|
"learning_rate": 1.8635151732706586e-05,
|
||
|
|
"loss": 0.1336,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.022348973900079727,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 848.2,
|
||
|
|
"valid_targets_min": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.052631578947368,
|
||
|
|
"grad_norm": 0.6631125816107435,
|
||
|
|
"learning_rate": 1.8111557606191946e-05,
|
||
|
|
"loss": 0.1049,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02011416107416153,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 2040.8,
|
||
|
|
"valid_targets_min": 575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.105263157894737,
|
||
|
|
"grad_norm": 1.040065523528812,
|
||
|
|
"learning_rate": 1.758926639489354e-05,
|
||
|
|
"loss": 0.1107,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02567300572991371,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 1236.0,
|
||
|
|
"valid_targets_min": 281
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.157894736842105,
|
||
|
|
"grad_norm": 0.8157880749784518,
|
||
|
|
"learning_rate": 1.7068638449314365e-05,
|
||
|
|
"loss": 0.1135,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02425241470336914,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 1700.0,
|
||
|
|
"valid_targets_min": 436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2105263157894735,
|
||
|
|
"grad_norm": 0.6407113553475756,
|
||
|
|
"learning_rate": 1.6550032972400996e-05,
|
||
|
|
"loss": 0.1007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.015315159223973751,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 2811.0,
|
||
|
|
"valid_targets_min": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2631578947368425,
|
||
|
|
"grad_norm": 0.792271856227501,
|
||
|
|
"learning_rate": 1.6033807771714464e-05,
|
||
|
|
"loss": 0.0856,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0246428269892931,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 1821.0,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.315789473684211,
|
||
|
|
"grad_norm": 0.7488624643430496,
|
||
|
|
"learning_rate": 1.552031901256391e-05,
|
||
|
|
"loss": 0.1061,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.012473690323531628,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 744.2,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.368421052631579,
|
||
|
|
"grad_norm": 0.7373911618925023,
|
||
|
|
"learning_rate": 1.5009920972273255e-05,
|
||
|
|
"loss": 0.1026,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.024960853159427643,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 1795.8,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.421052631578947,
|
||
|
|
"grad_norm": 0.8368699116001358,
|
||
|
|
"learning_rate": 1.4502965795750487e-05,
|
||
|
|
"loss": 0.1084,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.024426810443401337,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 1879.0,
|
||
|
|
"valid_targets_min": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.473684210526316,
|
||
|
|
"grad_norm": 1.035900030611362,
|
||
|
|
"learning_rate": 1.399980325252823e-05,
|
||
|
|
"loss": 0.1041,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02010546252131462,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 870.0,
|
||
|
|
"valid_targets_min": 588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.526315789473684,
|
||
|
|
"grad_norm": 0.8454908753679591,
|
||
|
|
"learning_rate": 1.3500780495443098e-05,
|
||
|
|
"loss": 0.0967,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0443265363574028,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 2228.5,
|
||
|
|
"valid_targets_min": 382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.578947368421053,
|
||
|
|
"grad_norm": 0.9079677228427804,
|
||
|
|
"learning_rate": 1.3006241821120483e-05,
|
||
|
|
"loss": 0.0967,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.020769363269209862,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 1294.0,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.631578947368421,
|
||
|
|
"grad_norm": 0.697471073230868,
|
||
|
|
"learning_rate": 1.2516528432429955e-05,
|
||
|
|
"loss": 0.0912,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.017268797382712364,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 1621.0,
|
||
|
|
"valid_targets_min": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.684210526315789,
|
||
|
|
"grad_norm": 0.6926037557955551,
|
||
|
|
"learning_rate": 1.2031978203075172e-05,
|
||
|
|
"loss": 0.0969,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.041803259402513504,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 1963.0,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7368421052631575,
|
||
|
|
"grad_norm": 0.7472368581347761,
|
||
|
|
"learning_rate": 1.1552925444480674e-05,
|
||
|
|
"loss": 0.0975,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01823119819164276,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 1856.8,
|
||
|
|
"valid_targets_min": 461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7894736842105265,
|
||
|
|
"grad_norm": 0.6588601167548072,
|
||
|
|
"learning_rate": 1.1079700675136506e-05,
|
||
|
|
"loss": 0.096,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.024438511580228806,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 3060.0,
|
||
|
|
"valid_targets_min": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.842105263157895,
|
||
|
|
"grad_norm": 1.0306521791067318,
|
||
|
|
"learning_rate": 1.0612630392559728e-05,
|
||
|
|
"loss": 0.098,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08015666902065277,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 2960.8,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.894736842105263,
|
||
|
|
"grad_norm": 0.5351687335944878,
|
||
|
|
"learning_rate": 1.015203684803013e-05,
|
||
|
|
"loss": 0.0822,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.015991540625691414,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 4341.2,
|
||
|
|
"valid_targets_min": 1093
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.947368421052632,
|
||
|
|
"grad_norm": 0.7874517868903579,
|
||
|
|
"learning_rate": 9.698237824255634e-06,
|
||
|
|
"loss": 0.1013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02613271400332451,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 1760.5,
|
||
|
|
"valid_targets_min": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.7529240572345405,
|
||
|
|
"learning_rate": 9.251546416120756e-06,
|
||
|
|
"loss": 0.0948,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.038777366280555725,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 2599.2,
|
||
|
|
"valid_targets_min": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.052631578947368,
|
||
|
|
"grad_norm": 0.8391592840372387,
|
||
|
|
"learning_rate": 8.812270814669338e-06,
|
||
|
|
"loss": 0.0991,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.012826068326830864,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 827.0,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.105263157894737,
|
||
|
|
"grad_norm": 0.898064625511165,
|
||
|
|
"learning_rate": 8.38071409447074e-06,
|
||
|
|
"loss": 0.0798,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.013072891160845757,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 2060.5,
|
||
|
|
"valid_targets_min": 526
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.157894736842105,
|
||
|
|
"grad_norm": 0.7414990110957742,
|
||
|
|
"learning_rate": 7.957174004516015e-06,
|
||
|
|
"loss": 0.0807,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.017101336270570755,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 2430.8,
|
||
|
|
"valid_targets_min": 418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.2105263157894735,
|
||
|
|
"grad_norm": 0.8541483835659059,
|
||
|
|
"learning_rate": 7.5419427627884586e-06,
|
||
|
|
"loss": 0.0903,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.016206396743655205,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 2121.8,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.2631578947368425,
|
||
|
|
"grad_norm": 0.9520445173297055,
|
||
|
|
"learning_rate": 7.1353068546502144e-06,
|
||
|
|
"loss": 0.0876,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0290323905646801,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 2227.2,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.315789473684211,
|
||
|
|
"grad_norm": 0.49442257527886013,
|
||
|
|
"learning_rate": 6.737546835184101e-06,
|
||
|
|
"loss": 0.0681,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005898091942071915,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 3989.0,
|
||
|
|
"valid_targets_min": 441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.368421052631579,
|
||
|
|
"grad_norm": 0.6172028111736194,
|
||
|
|
"learning_rate": 6.348937135626922e-06,
|
||
|
|
"loss": 0.0795,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02028818055987358,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 2356.5,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.421052631578947,
|
||
|
|
"grad_norm": 1.0863613555570175,
|
||
|
|
"learning_rate": 5.9697458740279165e-06,
|
||
|
|
"loss": 0.095,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02723700925707817,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 1796.5,
|
||
|
|
"valid_targets_min": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.473684210526316,
|
||
|
|
"grad_norm": 0.9127541216790402,
|
||
|
|
"learning_rate": 5.600234670262925e-06,
|
||
|
|
"loss": 0.0657,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02121194452047348,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 1008.2,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.526315789473684,
|
||
|
|
"grad_norm": 0.7939548918919143,
|
||
|
|
"learning_rate": 5.240658465531914e-06,
|
||
|
|
"loss": 0.0907,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.020264407619833946,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 2318.5,
|
||
|
|
"valid_targets_min": 1029
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.578947368421053,
|
||
|
|
"grad_norm": 0.7958309073175084,
|
||
|
|
"learning_rate": 4.891265346464416e-06,
|
||
|
|
"loss": 0.0811,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.016854919493198395,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 1867.5,
|
||
|
|
"valid_targets_min": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.631578947368421,
|
||
|
|
"grad_norm": 0.964741333422222,
|
||
|
|
"learning_rate": 4.552296373954194e-06,
|
||
|
|
"loss": 0.0916,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02305922657251358,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 1494.0,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.684210526315789,
|
||
|
|
"grad_norm": 0.6953130743902943,
|
||
|
|
"learning_rate": 4.223985416841292e-06,
|
||
|
|
"loss": 0.0858,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.006064994726330042,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 2224.8,
|
||
|
|
"valid_targets_min": 436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7368421052631575,
|
||
|
|
"grad_norm": 0.8426129883534313,
|
||
|
|
"learning_rate": 3.906558990556126e-06,
|
||
|
|
"loss": 0.0867,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.015257779508829117,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 1543.0,
|
||
|
|
"valid_targets_min": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7894736842105265,
|
||
|
|
"grad_norm": 0.8466455484780093,
|
||
|
|
"learning_rate": 3.6002361008370802e-06,
|
||
|
|
"loss": 0.0782,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.017657935619354248,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 1370.5,
|
||
|
|
"valid_targets_min": 796
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.842105263157895,
|
||
|
|
"grad_norm": 0.7810621379380235,
|
||
|
|
"learning_rate": 3.3052280926292802e-06,
|
||
|
|
"loss": 0.0857,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.017286978662014008,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 1197.5,
|
||
|
|
"valid_targets_min": 482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.894736842105263,
|
||
|
|
"grad_norm": 0.9429542983678016,
|
||
|
|
"learning_rate": 3.021738504268905e-06,
|
||
|
|
"loss": 0.0803,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.019310947507619858,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 740.2,
|
||
|
|
"valid_targets_min": 405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.947368421052632,
|
||
|
|
"grad_norm": 0.848754138173841,
|
||
|
|
"learning_rate": 2.7499629270535954e-06,
|
||
|
|
"loss": 0.0795,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.036632854491472244,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 2569.2,
|
||
|
|
"valid_targets_min": 1078
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0,
|
||
|
|
"grad_norm": 0.8474574025573199,
|
||
|
|
"learning_rate": 2.490088870295839e-06,
|
||
|
|
"loss": 0.0748,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.013190139085054398,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 1706.0,
|
||
|
|
"valid_targets_min": 539
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.052631578947368,
|
||
|
|
"grad_norm": 1.277717890049751,
|
||
|
|
"learning_rate": 2.242295631952496e-06,
|
||
|
|
"loss": 0.0854,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02643488347530365,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 1145.2,
|
||
|
|
"valid_targets_min": 561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.105263157894737,
|
||
|
|
"grad_norm": 1.002374231512521,
|
||
|
|
"learning_rate": 2.0067541749196453e-06,
|
||
|
|
"loss": 0.0711,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.021946445107460022,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 784.0,
|
||
|
|
"valid_targets_min": 421
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.157894736842105,
|
||
|
|
"grad_norm": 0.5901745775554788,
|
||
|
|
"learning_rate": 1.783627009078137e-06,
|
||
|
|
"loss": 0.0829,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.014920946210622787,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 2335.2,
|
||
|
|
"valid_targets_min": 485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2105263157894735,
|
||
|
|
"grad_norm": 0.8805052231905595,
|
||
|
|
"learning_rate": 1.573068079171265e-06,
|
||
|
|
"loss": 0.0813,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03274039924144745,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 2157.8,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2631578947368425,
|
||
|
|
"grad_norm": 0.932027933585667,
|
||
|
|
"learning_rate": 1.3752226585918416e-06,
|
||
|
|
"loss": 0.0719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.014052268117666245,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 994.8,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.315789473684211,
|
||
|
|
"grad_norm": 0.8541046370873381,
|
||
|
|
"learning_rate": 1.1902272491520362e-06,
|
||
|
|
"loss": 0.0653,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00850268267095089,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 1679.0,
|
||
|
|
"valid_targets_min": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.368421052631579,
|
||
|
|
"grad_norm": 0.9133472628588594,
|
||
|
|
"learning_rate": 1.0182094869050796e-06,
|
||
|
|
"loss": 0.0788,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02191070280969143,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 1648.2,
|
||
|
|
"valid_targets_min": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.421052631578947,
|
||
|
|
"grad_norm": 0.8210144567874685,
|
||
|
|
"learning_rate": 8.592880540838111e-07,
|
||
|
|
"loss": 0.066,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.014700721949338913,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 868.8,
|
||
|
|
"valid_targets_min": 471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.473684210526316,
|
||
|
|
"grad_norm": 0.6702334373473492,
|
||
|
|
"learning_rate": 7.135725972168694e-07,
|
||
|
|
"loss": 0.085,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.014351149089634418,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 2272.8,
|
||
|
|
"valid_targets_min": 546
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.526315789473684,
|
||
|
|
"grad_norm": 0.6787505389804506,
|
||
|
|
"learning_rate": 5.811636514789598e-07,
|
||
|
|
"loss": 0.0628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.011107168160378933,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 1480.2,
|
||
|
|
"valid_targets_min": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.578947368421053,
|
||
|
|
"grad_norm": 0.7692352383890824,
|
||
|
|
"learning_rate": 4.621525713274588e-07,
|
||
|
|
"loss": 0.0716,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.017746414989233017,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 2110.8,
|
||
|
|
"valid_targets_min": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.631578947368421,
|
||
|
|
"grad_norm": 0.7906146884753448,
|
||
|
|
"learning_rate": 3.5662146747315054e-07,
|
||
|
|
"loss": 0.0882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.022792555391788483,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 2510.0,
|
||
|
|
"valid_targets_min": 433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.684210526315789,
|
||
|
|
"grad_norm": 1.1576408782365855,
|
||
|
|
"learning_rate": 2.6464315022861844e-07,
|
||
|
|
"loss": 0.0855,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.024924524128437042,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 1319.0,
|
||
|
|
"valid_targets_min": 413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7368421052631575,
|
||
|
|
"grad_norm": 0.7614682584594898,
|
||
|
|
"learning_rate": 1.862810792733849e-07,
|
||
|
|
"loss": 0.0731,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.025046605616807938,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 1614.5,
|
||
|
|
"valid_targets_min": 344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7894736842105265,
|
||
|
|
"grad_norm": 0.8926016666095682,
|
||
|
|
"learning_rate": 1.2158931987041877e-07,
|
||
|
|
"loss": 0.0746,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01438556332141161,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 1287.2,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.842105263157895,
|
||
|
|
"grad_norm": 0.906919830054555,
|
||
|
|
"learning_rate": 7.06125055642537e-08,
|
||
|
|
"loss": 0.0677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008928455412387848,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 464.8,
|
||
|
|
"valid_targets_min": 411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.894736842105263,
|
||
|
|
"grad_norm": 0.7154592892609424,
|
||
|
|
"learning_rate": 3.3385807386456804e-08,
|
||
|
|
"loss": 0.0664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03817162662744522,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 2167.2,
|
||
|
|
"valid_targets_min": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.947368421052632,
|
||
|
|
"grad_norm": 0.9354297577789874,
|
||
|
|
"learning_rate": 9.934909589646157e-09,
|
||
|
|
"loss": 0.0767,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01679036021232605,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 1597.2,
|
||
|
|
"valid_targets_min": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"grad_norm": 0.9200047562223783,
|
||
|
|
"learning_rate": 2.759919268702227e-10,
|
||
|
|
"loss": 0.0788,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.028918465599417686,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 1713.0,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.028918465599417686,
|
||
|
|
"step": 665,
|
||
|
|
"total_flos": 1.8900557354971955e+17,
|
||
|
|
"train_loss": 0.14472091395155828,
|
||
|
|
"train_runtime": 29358.9617,
|
||
|
|
"train_samples_per_second": 0.362,
|
||
|
|
"train_steps_per_second": 0.023,
|
||
|
|
"valid_targets_mean": 1713.0,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 665,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 1500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 1.8900557354971955e+17,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|