{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 665, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05263157894736842, "grad_norm": 12.553807503106103, "learning_rate": 2.3880597014925373e-06, "loss": 0.6006, "loss_nan_ranks": 0, "loss_rank_avg": 0.11287848651409149, "step": 5, "valid_targets_mean": 673.5, "valid_targets_min": 369 }, { "epoch": 0.10526315789473684, "grad_norm": 7.035434903432055, "learning_rate": 5.37313432835821e-06, "loss": 0.6424, "loss_nan_ranks": 0, "loss_rank_avg": 0.10525691509246826, "step": 10, "valid_targets_mean": 3003.0, "valid_targets_min": 529 }, { "epoch": 0.15789473684210525, "grad_norm": 5.198775023562205, "learning_rate": 8.35820895522388e-06, "loss": 0.4501, "loss_nan_ranks": 0, "loss_rank_avg": 0.0761948674917221, "step": 15, "valid_targets_mean": 1735.2, "valid_targets_min": 610 }, { "epoch": 0.21052631578947367, "grad_norm": 2.0741815557089174, "learning_rate": 1.1343283582089553e-05, "loss": 0.3755, "loss_nan_ranks": 0, "loss_rank_avg": 0.10525241494178772, "step": 20, "valid_targets_mean": 1774.2, "valid_targets_min": 632 }, { "epoch": 0.2631578947368421, "grad_norm": 1.850608693504522, "learning_rate": 1.4328358208955224e-05, "loss": 0.3576, "loss_nan_ranks": 0, "loss_rank_avg": 0.2170872539281845, "step": 25, "valid_targets_mean": 1434.5, "valid_targets_min": 537 }, { "epoch": 0.3157894736842105, "grad_norm": 0.7064366027469681, "learning_rate": 1.7313432835820894e-05, "loss": 0.2887, "loss_nan_ranks": 0, "loss_rank_avg": 0.04902719706296921, "step": 30, "valid_targets_mean": 1492.2, "valid_targets_min": 337 }, { "epoch": 0.3684210526315789, "grad_norm": 0.8492931103555103, "learning_rate": 2.029850746268657e-05, "loss": 0.3099, "loss_nan_ranks": 0, "loss_rank_avg": 0.11869967728853226, "step": 35, "valid_targets_mean": 2878.5, "valid_targets_min": 574 }, { "epoch": 0.42105263157894735, "grad_norm": 0.9128923904158283, "learning_rate": 2.3283582089552242e-05, "loss": 0.2594, "loss_nan_ranks": 0, "loss_rank_avg": 0.06856397539377213, "step": 40, "valid_targets_mean": 2244.8, "valid_targets_min": 478 }, { "epoch": 0.47368421052631576, "grad_norm": 0.7721603884282439, "learning_rate": 2.6268656716417913e-05, "loss": 0.2576, "loss_nan_ranks": 0, "loss_rank_avg": 0.07116584479808807, "step": 45, "valid_targets_mean": 1934.0, "valid_targets_min": 567 }, { "epoch": 0.5263157894736842, "grad_norm": 0.815787876318566, "learning_rate": 2.9253731343283584e-05, "loss": 0.291, "loss_nan_ranks": 0, "loss_rank_avg": 0.13275116682052612, "step": 50, "valid_targets_mean": 2430.8, "valid_targets_min": 1481 }, { "epoch": 0.5789473684210527, "grad_norm": 0.7820305833709643, "learning_rate": 3.2238805970149255e-05, "loss": 0.2378, "loss_nan_ranks": 0, "loss_rank_avg": 0.06999962031841278, "step": 55, "valid_targets_mean": 1782.0, "valid_targets_min": 459 }, { "epoch": 0.631578947368421, "grad_norm": 0.921532705829729, "learning_rate": 3.522388059701493e-05, "loss": 0.2227, "loss_nan_ranks": 0, "loss_rank_avg": 0.04323829710483551, "step": 60, "valid_targets_mean": 758.5, "valid_targets_min": 591 }, { "epoch": 0.6842105263157895, "grad_norm": 0.7092700459991306, "learning_rate": 3.8208955223880596e-05, "loss": 0.2336, "loss_nan_ranks": 0, "loss_rank_avg": 0.037662893533706665, "step": 65, "valid_targets_mean": 730.0, "valid_targets_min": 436 }, { "epoch": 0.7368421052631579, "grad_norm": 0.7461432563225519, "learning_rate": 3.9998896039909675e-05, "loss": 0.1988, "loss_nan_ranks": 0, "loss_rank_avg": 0.02792029082775116, "step": 70, "valid_targets_mean": 497.0, "valid_targets_min": 461 }, { "epoch": 0.7894736842105263, "grad_norm": 0.7023900039706257, "learning_rate": 3.998647788848384e-05, "loss": 0.2185, "loss_nan_ranks": 0, "loss_rank_avg": 0.03114963322877884, "step": 75, "valid_targets_mean": 1628.8, "valid_targets_min": 382 }, { "epoch": 0.8421052631578947, "grad_norm": 0.7055640361791583, "learning_rate": 3.996027023188427e-05, "loss": 0.2002, "loss_nan_ranks": 0, "loss_rank_avg": 0.026642896234989166, "step": 80, "valid_targets_mean": 570.2, "valid_targets_min": 453 }, { "epoch": 0.8947368421052632, "grad_norm": 0.8834619078292679, "learning_rate": 3.9920291151866977e-05, "loss": 0.2278, "loss_nan_ranks": 0, "loss_rank_avg": 0.08590055257081985, "step": 85, "valid_targets_mean": 2245.0, "valid_targets_min": 564 }, { "epoch": 0.9473684210526315, "grad_norm": 0.8048346765640612, "learning_rate": 3.986656823166766e-05, "loss": 0.2334, "loss_nan_ranks": 0, "loss_rank_avg": 0.08790746331214905, "step": 90, "valid_targets_mean": 3408.2, "valid_targets_min": 1208 }, { "epoch": 1.0, "grad_norm": 0.7721245809969578, "learning_rate": 3.979913853697095e-05, "loss": 0.1963, "loss_nan_ranks": 0, "loss_rank_avg": 0.05376936122775078, "step": 95, "valid_targets_mean": 1591.8, "valid_targets_min": 804 }, { "epoch": 1.0526315789473684, "grad_norm": 0.6211663045932956, "learning_rate": 3.9718048590337186e-05, "loss": 0.1857, "loss_nan_ranks": 0, "loss_rank_avg": 0.0645800530910492, "step": 100, "valid_targets_mean": 2262.0, "valid_targets_min": 1426 }, { "epoch": 1.1052631578947367, "grad_norm": 0.7785104666209682, "learning_rate": 3.962335433910463e-05, "loss": 0.1824, "loss_nan_ranks": 0, "loss_rank_avg": 0.025534367188811302, "step": 105, "valid_targets_mean": 679.8, "valid_targets_min": 514 }, { "epoch": 1.1578947368421053, "grad_norm": 0.8004929865701785, "learning_rate": 3.9515121116788985e-05, "loss": 0.205, "loss_nan_ranks": 0, "loss_rank_avg": 0.10348161309957504, "step": 110, "valid_targets_mean": 2548.2, "valid_targets_min": 1644 }, { "epoch": 1.2105263157894737, "grad_norm": 0.8442132787033475, "learning_rate": 3.939342359800714e-05, "loss": 0.1878, "loss_nan_ranks": 0, "loss_rank_avg": 0.046689361333847046, "step": 115, "valid_targets_mean": 1556.2, "valid_targets_min": 361 }, { "epoch": 1.263157894736842, "grad_norm": 0.8100669921933641, "learning_rate": 3.925834574695599e-05, "loss": 0.1719, "loss_nan_ranks": 0, "loss_rank_avg": 0.0551832839846611, "step": 120, "valid_targets_mean": 2410.0, "valid_targets_min": 523 }, { "epoch": 1.3157894736842106, "grad_norm": 0.7871886106834695, "learning_rate": 3.910998075948207e-05, "loss": 0.1911, "loss_nan_ranks": 0, "loss_rank_avg": 0.05720491334795952, "step": 125, "valid_targets_mean": 2352.5, "valid_targets_min": 532 }, { "epoch": 1.368421052631579, "grad_norm": 0.7323699198683509, "learning_rate": 3.8948430998781824e-05, "loss": 0.1753, "loss_nan_ranks": 0, "loss_rank_avg": 0.0170910581946373, "step": 130, "valid_targets_mean": 1404.5, "valid_targets_min": 575 }, { "epoch": 1.4210526315789473, "grad_norm": 1.0920789638236659, "learning_rate": 3.8773807924776976e-05, "loss": 0.1912, "loss_nan_ranks": 0, "loss_rank_avg": 0.06749822944402695, "step": 135, "valid_targets_mean": 929.8, "valid_targets_min": 369 }, { "epoch": 1.4736842105263157, "grad_norm": 0.7229550949497463, "learning_rate": 3.8586232017213675e-05, "loss": 0.1831, "loss_nan_ranks": 0, "loss_rank_avg": 0.03690353408455849, "step": 140, "valid_targets_mean": 1208.2, "valid_targets_min": 861 }, { "epoch": 1.526315789473684, "grad_norm": 0.7657486783018452, "learning_rate": 3.83858326925385e-05, "loss": 0.1812, "loss_nan_ranks": 0, "loss_rank_avg": 0.037817131727933884, "step": 145, "valid_targets_mean": 1573.8, "valid_targets_min": 524 }, { "epoch": 1.5789473684210527, "grad_norm": 0.7208446240323184, "learning_rate": 3.8172748214608624e-05, "loss": 0.1665, "loss_nan_ranks": 0, "loss_rank_avg": 0.06162188947200775, "step": 150, "valid_targets_mean": 1708.5, "valid_targets_min": 480 }, { "epoch": 1.631578947368421, "grad_norm": 0.7918543247788804, "learning_rate": 3.7947125599297856e-05, "loss": 0.1776, "loss_nan_ranks": 0, "loss_rank_avg": 0.04460986703634262, "step": 155, "valid_targets_mean": 2118.2, "valid_targets_min": 451 }, { "epoch": 1.6842105263157894, "grad_norm": 0.7001163120437345, "learning_rate": 3.7709120513064196e-05, "loss": 0.1856, "loss_nan_ranks": 0, "loss_rank_avg": 0.03856482356786728, "step": 160, "valid_targets_mean": 1108.5, "valid_targets_min": 478 }, { "epoch": 1.736842105263158, "grad_norm": 0.6906214593751792, "learning_rate": 3.745889716554912e-05, "loss": 0.1873, "loss_nan_ranks": 0, "loss_rank_avg": 0.040963269770145416, "step": 165, "valid_targets_mean": 1622.5, "valid_targets_min": 717 }, { "epoch": 1.7894736842105263, "grad_norm": 0.798273215978815, "learning_rate": 3.7196628196282415e-05, "loss": 0.1787, "loss_nan_ranks": 0, "loss_rank_avg": 0.020104659721255302, "step": 170, "valid_targets_mean": 551.8, "valid_targets_min": 482 }, { "epoch": 1.8421052631578947, "grad_norm": 0.6391796891465947, "learning_rate": 3.692249455557103e-05, "loss": 0.1619, "loss_nan_ranks": 0, "loss_rank_avg": 0.025837548077106476, "step": 175, "valid_targets_mean": 3984.0, "valid_targets_min": 252 }, { "epoch": 1.8947368421052633, "grad_norm": 0.5174143688291576, "learning_rate": 3.6636685379653875e-05, "loss": 0.1677, "loss_nan_ranks": 0, "loss_rank_avg": 0.023429114371538162, "step": 180, "valid_targets_mean": 763.5, "valid_targets_min": 520 }, { "epoch": 1.9473684210526314, "grad_norm": 0.8576835506620597, "learning_rate": 3.633939786020884e-05, "loss": 0.1675, "loss_nan_ranks": 0, "loss_rank_avg": 0.03479863703250885, "step": 185, "valid_targets_mean": 1539.5, "valid_targets_min": 452 }, { "epoch": 2.0, "grad_norm": 0.7568710245152749, "learning_rate": 3.603083710830205e-05, "loss": 0.1779, "loss_nan_ranks": 0, "loss_rank_avg": 0.06252141296863556, "step": 190, "valid_targets_mean": 2105.8, "valid_targets_min": 477 }, { "epoch": 2.0526315789473686, "grad_norm": 0.6843664313982926, "learning_rate": 3.5711216012873114e-05, "loss": 0.1443, "loss_nan_ranks": 0, "loss_rank_avg": 0.05131068080663681, "step": 195, "valid_targets_mean": 3221.2, "valid_targets_min": 766 }, { "epoch": 2.1052631578947367, "grad_norm": 0.6549037149587634, "learning_rate": 3.538075509385427e-05, "loss": 0.1406, "loss_nan_ranks": 0, "loss_rank_avg": 0.035776007920503616, "step": 200, "valid_targets_mean": 2162.2, "valid_targets_min": 564 }, { "epoch": 2.1578947368421053, "grad_norm": 0.6798810404078721, "learning_rate": 3.503968235002437e-05, "loss": 0.136, "loss_nan_ranks": 0, "loss_rank_avg": 0.01746884360909462, "step": 205, "valid_targets_mean": 1817.8, "valid_targets_min": 482 }, { "epoch": 2.2105263157894735, "grad_norm": 0.7040977825128967, "learning_rate": 3.468823310170309e-05, "loss": 0.1594, "loss_nan_ranks": 0, "loss_rank_avg": 0.03740096092224121, "step": 210, "valid_targets_mean": 1947.5, "valid_targets_min": 567 }, { "epoch": 2.263157894736842, "grad_norm": 0.7823204339158937, "learning_rate": 3.4326649828393565e-05, "loss": 0.1399, "loss_nan_ranks": 0, "loss_rank_avg": 0.033209312707185745, "step": 215, "valid_targets_mean": 1384.5, "valid_targets_min": 591 }, { "epoch": 2.3157894736842106, "grad_norm": 0.7781824943502823, "learning_rate": 3.395518200148571e-05, "loss": 0.1494, "loss_nan_ranks": 0, "loss_rank_avg": 0.0761878490447998, "step": 220, "valid_targets_mean": 1668.5, "valid_targets_min": 677 }, { "epoch": 2.3684210526315788, "grad_norm": 0.6417846187297783, "learning_rate": 3.357408591213544e-05, "loss": 0.149, "loss_nan_ranks": 0, "loss_rank_avg": 0.02233138121664524, "step": 225, "valid_targets_mean": 869.8, "valid_targets_min": 452 }, { "epoch": 2.4210526315789473, "grad_norm": 1.2326477223555594, "learning_rate": 3.318362449443876e-05, "loss": 0.1446, "loss_nan_ranks": 0, "loss_rank_avg": 0.058247171342372894, "step": 230, "valid_targets_mean": 1183.0, "valid_targets_min": 406 }, { "epoch": 2.473684210526316, "grad_norm": 0.8902988410602314, "learning_rate": 3.278406714402253e-05, "loss": 0.1496, "loss_nan_ranks": 0, "loss_rank_avg": 0.019801612943410873, "step": 235, "valid_targets_mean": 810.2, "valid_targets_min": 436 }, { "epoch": 2.526315789473684, "grad_norm": 0.7722716265711358, "learning_rate": 3.237568953217717e-05, "loss": 0.1357, "loss_nan_ranks": 0, "loss_rank_avg": 0.023706065490841866, "step": 240, "valid_targets_mean": 1874.5, "valid_targets_min": 487 }, { "epoch": 2.5789473684210527, "grad_norm": 0.7505590432900965, "learning_rate": 3.195877341565958e-05, "loss": 0.1605, "loss_nan_ranks": 0, "loss_rank_avg": 0.04506827890872955, "step": 245, "valid_targets_mean": 2340.8, "valid_targets_min": 1596 }, { "epoch": 2.6315789473684212, "grad_norm": 1.078393610170121, "learning_rate": 3.153360644229735e-05, "loss": 0.1365, "loss_nan_ranks": 0, "loss_rank_avg": 0.03983701393008232, "step": 250, "valid_targets_mean": 1252.5, "valid_targets_min": 418 }, { "epoch": 2.6842105263157894, "grad_norm": 1.0240506694449787, "learning_rate": 3.110048195252851e-05, "loss": 0.1763, "loss_nan_ranks": 0, "loss_rank_avg": 0.042883411049842834, "step": 255, "valid_targets_mean": 972.8, "valid_targets_min": 416 }, { "epoch": 2.736842105263158, "grad_norm": 0.88391634666274, "learning_rate": 3.065969877701378e-05, "loss": 0.1496, "loss_nan_ranks": 0, "loss_rank_avg": 0.03341054543852806, "step": 260, "valid_targets_mean": 1375.5, "valid_targets_min": 468 }, { "epoch": 2.7894736842105265, "grad_norm": 0.9935905952495295, "learning_rate": 3.0211561030460755e-05, "loss": 0.1611, "loss_nan_ranks": 0, "loss_rank_avg": 0.04604099690914154, "step": 265, "valid_targets_mean": 1171.0, "valid_targets_min": 572 }, { "epoch": 2.8421052631578947, "grad_norm": 0.7349039809089655, "learning_rate": 2.975637790180255e-05, "loss": 0.1212, "loss_nan_ranks": 0, "loss_rank_avg": 0.01938222162425518, "step": 270, "valid_targets_mean": 813.5, "valid_targets_min": 523 }, { "epoch": 2.8947368421052633, "grad_norm": 0.6446524265967656, "learning_rate": 2.9294463440875375e-05, "loss": 0.1461, "loss_nan_ranks": 0, "loss_rank_avg": 0.032505787909030914, "step": 275, "valid_targets_mean": 2418.0, "valid_targets_min": 791 }, { "epoch": 2.9473684210526314, "grad_norm": 0.8168387870454594, "learning_rate": 2.8826136341742504e-05, "loss": 0.1415, "loss_nan_ranks": 0, "loss_rank_avg": 0.041891537606716156, "step": 280, "valid_targets_mean": 1029.5, "valid_targets_min": 487 }, { "epoch": 3.0, "grad_norm": 0.6795412956632005, "learning_rate": 2.8351719722813933e-05, "loss": 0.1587, "loss_nan_ranks": 0, "loss_rank_avg": 0.03285399451851845, "step": 285, "valid_targets_mean": 1979.0, "valid_targets_min": 917 }, { "epoch": 3.0526315789473686, "grad_norm": 0.8429893002470394, "learning_rate": 2.7871540903913465e-05, "loss": 0.1186, "loss_nan_ranks": 0, "loss_rank_avg": 0.051500104367733, "step": 290, "valid_targets_mean": 1433.0, "valid_targets_min": 407 }, { "epoch": 3.1052631578947367, "grad_norm": 0.7288845482984847, "learning_rate": 2.7385931180447145e-05, "loss": 0.1252, "loss_nan_ranks": 0, "loss_rank_avg": 0.031095707789063454, "step": 295, "valid_targets_mean": 2574.0, "valid_targets_min": 597 }, { "epoch": 3.1578947368421053, "grad_norm": 0.8785679932821717, "learning_rate": 2.6895225594828743e-05, "loss": 0.1249, "loss_nan_ranks": 0, "loss_rank_avg": 0.020273303613066673, "step": 300, "valid_targets_mean": 1149.5, "valid_targets_min": 600 }, { "epoch": 3.2105263157894735, "grad_norm": 0.5964839392422349, "learning_rate": 2.639976270531996e-05, "loss": 0.1096, "loss_nan_ranks": 0, "loss_rank_avg": 0.02001447230577469, "step": 305, "valid_targets_mean": 1876.8, "valid_targets_min": 463 }, { "epoch": 3.263157894736842, "grad_norm": 1.0058384602350647, "learning_rate": 2.5899884352444994e-05, "loss": 0.1292, "loss_nan_ranks": 0, "loss_rank_avg": 0.03967369720339775, "step": 310, "valid_targets_mean": 1636.5, "valid_targets_min": 1320 }, { "epoch": 3.3157894736842106, "grad_norm": 0.7379249353528501, "learning_rate": 2.5395935423140487e-05, "loss": 0.1148, "loss_nan_ranks": 0, "loss_rank_avg": 0.023941613733768463, "step": 315, "valid_targets_mean": 1889.2, "valid_targets_min": 456 }, { "epoch": 3.3684210526315788, "grad_norm": 0.6320511316986958, "learning_rate": 2.4888263612803637e-05, "loss": 0.111, "loss_nan_ranks": 0, "loss_rank_avg": 0.015035606920719147, "step": 320, "valid_targets_mean": 2843.8, "valid_targets_min": 579 }, { "epoch": 3.4210526315789473, "grad_norm": 0.7888755202797919, "learning_rate": 2.4377219185402613e-05, "loss": 0.1091, "loss_nan_ranks": 0, "loss_rank_avg": 0.028542907908558846, "step": 325, "valid_targets_mean": 1003.2, "valid_targets_min": 499 }, { "epoch": 3.473684210526316, "grad_norm": 1.129509556464978, "learning_rate": 2.3863154731814867e-05, "loss": 0.1241, "loss_nan_ranks": 0, "loss_rank_avg": 0.05130193009972572, "step": 330, "valid_targets_mean": 1287.8, "valid_targets_min": 560 }, { "epoch": 3.526315789473684, "grad_norm": 0.6395207589130021, "learning_rate": 2.3346424926559935e-05, "loss": 0.1313, "loss_nan_ranks": 0, "loss_rank_avg": 0.019851651042699814, "step": 335, "valid_targets_mean": 2567.2, "valid_targets_min": 454 }, { "epoch": 3.5789473684210527, "grad_norm": 0.5599961293630993, "learning_rate": 2.2827386283094707e-05, "loss": 0.1229, "loss_nan_ranks": 0, "loss_rank_avg": 0.015991318970918655, "step": 340, "valid_targets_mean": 1478.5, "valid_targets_min": 457 }, { "epoch": 3.6315789473684212, "grad_norm": 0.8348704783982805, "learning_rate": 2.2306396907839883e-05, "loss": 0.121, "loss_nan_ranks": 0, "loss_rank_avg": 0.014956638216972351, "step": 345, "valid_targets_mean": 1313.2, "valid_targets_min": 489 }, { "epoch": 3.6842105263157894, "grad_norm": 0.8193826114929279, "learning_rate": 2.178381625310748e-05, "loss": 0.1168, "loss_nan_ranks": 0, "loss_rank_avg": 0.016708550974726677, "step": 350, "valid_targets_mean": 1944.0, "valid_targets_min": 549 }, { "epoch": 3.736842105263158, "grad_norm": 0.9430505757719317, "learning_rate": 2.1260004869099583e-05, "loss": 0.1054, "loss_nan_ranks": 0, "loss_rank_avg": 0.018935151398181915, "step": 355, "valid_targets_mean": 575.2, "valid_targets_min": 360 }, { "epoch": 3.7894736842105265, "grad_norm": 0.6631035172044142, "learning_rate": 2.0735324155149795e-05, "loss": 0.135, "loss_nan_ranks": 0, "loss_rank_avg": 0.014331744983792305, "step": 360, "valid_targets_mean": 934.5, "valid_targets_min": 524 }, { "epoch": 3.8421052631578947, "grad_norm": 0.8657436402850743, "learning_rate": 2.021013611037873e-05, "loss": 0.1331, "loss_nan_ranks": 0, "loss_rank_avg": 0.03150756284594536, "step": 365, "valid_targets_mean": 1456.0, "valid_targets_min": 468 }, { "epoch": 3.8947368421052633, "grad_norm": 0.7075879023467191, "learning_rate": 1.9684803083935676e-05, "loss": 0.1389, "loss_nan_ranks": 0, "loss_rank_avg": 0.02251843735575676, "step": 370, "valid_targets_mean": 915.5, "valid_targets_min": 433 }, { "epoch": 3.9473684210526314, "grad_norm": 0.5419112299991141, "learning_rate": 1.915968752499886e-05, "loss": 0.115, "loss_nan_ranks": 0, "loss_rank_avg": 0.015261461958289146, "step": 375, "valid_targets_mean": 1871.0, "valid_targets_min": 446 }, { "epoch": 4.0, "grad_norm": 0.7993441162916212, "learning_rate": 1.8635151732706586e-05, "loss": 0.1336, "loss_nan_ranks": 0, "loss_rank_avg": 0.022348973900079727, "step": 380, "valid_targets_mean": 848.2, "valid_targets_min": 518 }, { "epoch": 4.052631578947368, "grad_norm": 0.6631125816107435, "learning_rate": 1.8111557606191946e-05, "loss": 0.1049, "loss_nan_ranks": 0, "loss_rank_avg": 0.02011416107416153, "step": 385, "valid_targets_mean": 2040.8, "valid_targets_min": 575 }, { "epoch": 4.105263157894737, "grad_norm": 1.040065523528812, "learning_rate": 1.758926639489354e-05, "loss": 0.1107, "loss_nan_ranks": 0, "loss_rank_avg": 0.02567300572991371, "step": 390, "valid_targets_mean": 1236.0, "valid_targets_min": 281 }, { "epoch": 4.157894736842105, "grad_norm": 0.8157880749784518, "learning_rate": 1.7068638449314365e-05, "loss": 0.1135, "loss_nan_ranks": 0, "loss_rank_avg": 0.02425241470336914, "step": 395, "valid_targets_mean": 1700.0, "valid_targets_min": 436 }, { "epoch": 4.2105263157894735, "grad_norm": 0.6407113553475756, "learning_rate": 1.6550032972400996e-05, "loss": 0.1007, "loss_nan_ranks": 0, "loss_rank_avg": 0.015315159223973751, "step": 400, "valid_targets_mean": 2811.0, "valid_targets_min": 655 }, { "epoch": 4.2631578947368425, "grad_norm": 0.792271856227501, "learning_rate": 1.6033807771714464e-05, "loss": 0.0856, "loss_nan_ranks": 0, "loss_rank_avg": 0.0246428269892931, "step": 405, "valid_targets_mean": 1821.0, "valid_targets_min": 453 }, { "epoch": 4.315789473684211, "grad_norm": 0.7488624643430496, "learning_rate": 1.552031901256391e-05, "loss": 0.1061, "loss_nan_ranks": 0, "loss_rank_avg": 0.012473690323531628, "step": 410, "valid_targets_mean": 744.2, "valid_targets_min": 501 }, { "epoch": 4.368421052631579, "grad_norm": 0.7373911618925023, "learning_rate": 1.5009920972273255e-05, "loss": 0.1026, "loss_nan_ranks": 0, "loss_rank_avg": 0.024960853159427643, "step": 415, "valid_targets_mean": 1795.8, "valid_targets_min": 537 }, { "epoch": 4.421052631578947, "grad_norm": 0.8368699116001358, "learning_rate": 1.4502965795750487e-05, "loss": 0.1084, "loss_nan_ranks": 0, "loss_rank_avg": 0.024426810443401337, "step": 420, "valid_targets_mean": 1879.0, "valid_targets_min": 477 }, { "epoch": 4.473684210526316, "grad_norm": 1.035900030611362, "learning_rate": 1.399980325252823e-05, "loss": 0.1041, "loss_nan_ranks": 0, "loss_rank_avg": 0.02010546252131462, "step": 425, "valid_targets_mean": 870.0, "valid_targets_min": 588 }, { "epoch": 4.526315789473684, "grad_norm": 0.8454908753679591, "learning_rate": 1.3500780495443098e-05, "loss": 0.0967, "loss_nan_ranks": 0, "loss_rank_avg": 0.0443265363574028, "step": 430, "valid_targets_mean": 2228.5, "valid_targets_min": 382 }, { "epoch": 4.578947368421053, "grad_norm": 0.9079677228427804, "learning_rate": 1.3006241821120483e-05, "loss": 0.0967, "loss_nan_ranks": 0, "loss_rank_avg": 0.020769363269209862, "step": 435, "valid_targets_mean": 1294.0, "valid_targets_min": 469 }, { "epoch": 4.631578947368421, "grad_norm": 0.697471073230868, "learning_rate": 1.2516528432429955e-05, "loss": 0.0912, "loss_nan_ranks": 0, "loss_rank_avg": 0.017268797382712364, "step": 440, "valid_targets_mean": 1621.0, "valid_targets_min": 440 }, { "epoch": 4.684210526315789, "grad_norm": 0.6926037557955551, "learning_rate": 1.2031978203075172e-05, "loss": 0.0969, "loss_nan_ranks": 0, "loss_rank_avg": 0.041803259402513504, "step": 445, "valid_targets_mean": 1963.0, "valid_targets_min": 524 }, { "epoch": 4.7368421052631575, "grad_norm": 0.7472368581347761, "learning_rate": 1.1552925444480674e-05, "loss": 0.0975, "loss_nan_ranks": 0, "loss_rank_avg": 0.01823119819164276, "step": 450, "valid_targets_mean": 1856.8, "valid_targets_min": 461 }, { "epoch": 4.7894736842105265, "grad_norm": 0.6588601167548072, "learning_rate": 1.1079700675136506e-05, "loss": 0.096, "loss_nan_ranks": 0, "loss_rank_avg": 0.024438511580228806, "step": 455, "valid_targets_mean": 3060.0, "valid_targets_min": 605 }, { "epoch": 4.842105263157895, "grad_norm": 1.0306521791067318, "learning_rate": 1.0612630392559728e-05, "loss": 0.098, "loss_nan_ranks": 0, "loss_rank_avg": 0.08015666902065277, "step": 460, "valid_targets_mean": 2960.8, "valid_targets_min": 509 }, { "epoch": 4.894736842105263, "grad_norm": 0.5351687335944878, "learning_rate": 1.015203684803013e-05, "loss": 0.0822, "loss_nan_ranks": 0, "loss_rank_avg": 0.015991540625691414, "step": 465, "valid_targets_mean": 4341.2, "valid_targets_min": 1093 }, { "epoch": 4.947368421052632, "grad_norm": 0.7874517868903579, "learning_rate": 9.698237824255634e-06, "loss": 0.1013, "loss_nan_ranks": 0, "loss_rank_avg": 0.02613271400332451, "step": 470, "valid_targets_mean": 1760.5, "valid_targets_min": 415 }, { "epoch": 5.0, "grad_norm": 0.7529240572345405, "learning_rate": 9.251546416120756e-06, "loss": 0.0948, "loss_nan_ranks": 0, "loss_rank_avg": 0.038777366280555725, "step": 475, "valid_targets_mean": 2599.2, "valid_targets_min": 360 }, { "epoch": 5.052631578947368, "grad_norm": 0.8391592840372387, "learning_rate": 8.812270814669338e-06, "loss": 0.0991, "loss_nan_ranks": 0, "loss_rank_avg": 0.012826068326830864, "step": 480, "valid_targets_mean": 827.0, "valid_targets_min": 549 }, { "epoch": 5.105263157894737, "grad_norm": 0.898064625511165, "learning_rate": 8.38071409447074e-06, "loss": 0.0798, "loss_nan_ranks": 0, "loss_rank_avg": 0.013072891160845757, "step": 485, "valid_targets_mean": 2060.5, "valid_targets_min": 526 }, { "epoch": 5.157894736842105, "grad_norm": 0.7414990110957742, "learning_rate": 7.957174004516015e-06, "loss": 0.0807, "loss_nan_ranks": 0, "loss_rank_avg": 0.017101336270570755, "step": 490, "valid_targets_mean": 2430.8, "valid_targets_min": 418 }, { "epoch": 5.2105263157894735, "grad_norm": 0.8541483835659059, "learning_rate": 7.5419427627884586e-06, "loss": 0.0903, "loss_nan_ranks": 0, "loss_rank_avg": 0.016206396743655205, "step": 495, "valid_targets_mean": 2121.8, "valid_targets_min": 451 }, { "epoch": 5.2631578947368425, "grad_norm": 0.9520445173297055, "learning_rate": 7.1353068546502144e-06, "loss": 0.0876, "loss_nan_ranks": 0, "loss_rank_avg": 0.0290323905646801, "step": 500, "valid_targets_mean": 2227.2, "valid_targets_min": 520 }, { "epoch": 5.315789473684211, "grad_norm": 0.49442257527886013, "learning_rate": 6.737546835184101e-06, "loss": 0.0681, "loss_nan_ranks": 0, "loss_rank_avg": 0.005898091942071915, "step": 505, "valid_targets_mean": 3989.0, "valid_targets_min": 441 }, { "epoch": 5.368421052631579, "grad_norm": 0.6172028111736194, "learning_rate": 6.348937135626922e-06, "loss": 0.0795, "loss_nan_ranks": 0, "loss_rank_avg": 0.02028818055987358, "step": 510, "valid_targets_mean": 2356.5, "valid_targets_min": 515 }, { "epoch": 5.421052631578947, "grad_norm": 1.0863613555570175, "learning_rate": 5.9697458740279165e-06, "loss": 0.095, "loss_nan_ranks": 0, "loss_rank_avg": 0.02723700925707817, "step": 515, "valid_targets_mean": 1796.5, "valid_targets_min": 542 }, { "epoch": 5.473684210526316, "grad_norm": 0.9127541216790402, "learning_rate": 5.600234670262925e-06, "loss": 0.0657, "loss_nan_ranks": 0, "loss_rank_avg": 0.02121194452047348, "step": 520, "valid_targets_mean": 1008.2, "valid_targets_min": 458 }, { "epoch": 5.526315789473684, "grad_norm": 0.7939548918919143, "learning_rate": 5.240658465531914e-06, "loss": 0.0907, "loss_nan_ranks": 0, "loss_rank_avg": 0.020264407619833946, "step": 525, "valid_targets_mean": 2318.5, "valid_targets_min": 1029 }, { "epoch": 5.578947368421053, "grad_norm": 0.7958309073175084, "learning_rate": 4.891265346464416e-06, "loss": 0.0811, "loss_nan_ranks": 0, "loss_rank_avg": 0.016854919493198395, "step": 530, "valid_targets_mean": 1867.5, "valid_targets_min": 505 }, { "epoch": 5.631578947368421, "grad_norm": 0.964741333422222, "learning_rate": 4.552296373954194e-06, "loss": 0.0916, "loss_nan_ranks": 0, "loss_rank_avg": 0.02305922657251358, "step": 535, "valid_targets_mean": 1494.0, "valid_targets_min": 501 }, { "epoch": 5.684210526315789, "grad_norm": 0.6953130743902943, "learning_rate": 4.223985416841292e-06, "loss": 0.0858, "loss_nan_ranks": 0, "loss_rank_avg": 0.006064994726330042, "step": 540, "valid_targets_mean": 2224.8, "valid_targets_min": 436 }, { "epoch": 5.7368421052631575, "grad_norm": 0.8426129883534313, "learning_rate": 3.906558990556126e-06, "loss": 0.0867, "loss_nan_ranks": 0, "loss_rank_avg": 0.015257779508829117, "step": 545, "valid_targets_mean": 1543.0, "valid_targets_min": 398 }, { "epoch": 5.7894736842105265, "grad_norm": 0.8466455484780093, "learning_rate": 3.6002361008370802e-06, "loss": 0.0782, "loss_nan_ranks": 0, "loss_rank_avg": 0.017657935619354248, "step": 550, "valid_targets_mean": 1370.5, "valid_targets_min": 796 }, { "epoch": 5.842105263157895, "grad_norm": 0.7810621379380235, "learning_rate": 3.3052280926292802e-06, "loss": 0.0857, "loss_nan_ranks": 0, "loss_rank_avg": 0.017286978662014008, "step": 555, "valid_targets_mean": 1197.5, "valid_targets_min": 482 }, { "epoch": 5.894736842105263, "grad_norm": 0.9429542983678016, "learning_rate": 3.021738504268905e-06, "loss": 0.0803, "loss_nan_ranks": 0, "loss_rank_avg": 0.019310947507619858, "step": 560, "valid_targets_mean": 740.2, "valid_targets_min": 405 }, { "epoch": 5.947368421052632, "grad_norm": 0.848754138173841, "learning_rate": 2.7499629270535954e-06, "loss": 0.0795, "loss_nan_ranks": 0, "loss_rank_avg": 0.036632854491472244, "step": 565, "valid_targets_mean": 2569.2, "valid_targets_min": 1078 }, { "epoch": 6.0, "grad_norm": 0.8474574025573199, "learning_rate": 2.490088870295839e-06, "loss": 0.0748, "loss_nan_ranks": 0, "loss_rank_avg": 0.013190139085054398, "step": 570, "valid_targets_mean": 1706.0, "valid_targets_min": 539 }, { "epoch": 6.052631578947368, "grad_norm": 1.277717890049751, "learning_rate": 2.242295631952496e-06, "loss": 0.0854, "loss_nan_ranks": 0, "loss_rank_avg": 0.02643488347530365, "step": 575, "valid_targets_mean": 1145.2, "valid_targets_min": 561 }, { "epoch": 6.105263157894737, "grad_norm": 1.002374231512521, "learning_rate": 2.0067541749196453e-06, "loss": 0.0711, "loss_nan_ranks": 0, "loss_rank_avg": 0.021946445107460022, "step": 580, "valid_targets_mean": 784.0, "valid_targets_min": 421 }, { "epoch": 6.157894736842105, "grad_norm": 0.5901745775554788, "learning_rate": 1.783627009078137e-06, "loss": 0.0829, "loss_nan_ranks": 0, "loss_rank_avg": 0.014920946210622787, "step": 585, "valid_targets_mean": 2335.2, "valid_targets_min": 485 }, { "epoch": 6.2105263157894735, "grad_norm": 0.8805052231905595, "learning_rate": 1.573068079171265e-06, "loss": 0.0813, "loss_nan_ranks": 0, "loss_rank_avg": 0.03274039924144745, "step": 590, "valid_targets_mean": 2157.8, "valid_targets_min": 529 }, { "epoch": 6.2631578947368425, "grad_norm": 0.932027933585667, "learning_rate": 1.3752226585918416e-06, "loss": 0.0719, "loss_nan_ranks": 0, "loss_rank_avg": 0.014052268117666245, "step": 595, "valid_targets_mean": 994.8, "valid_targets_min": 513 }, { "epoch": 6.315789473684211, "grad_norm": 0.8541046370873381, "learning_rate": 1.1902272491520362e-06, "loss": 0.0653, "loss_nan_ranks": 0, "loss_rank_avg": 0.00850268267095089, "step": 600, "valid_targets_mean": 1679.0, "valid_targets_min": 525 }, { "epoch": 6.368421052631579, "grad_norm": 0.9133472628588594, "learning_rate": 1.0182094869050796e-06, "loss": 0.0788, "loss_nan_ranks": 0, "loss_rank_avg": 0.02191070280969143, "step": 605, "valid_targets_mean": 1648.2, "valid_targets_min": 415 }, { "epoch": 6.421052631578947, "grad_norm": 0.8210144567874685, "learning_rate": 8.592880540838111e-07, "loss": 0.066, "loss_nan_ranks": 0, "loss_rank_avg": 0.014700721949338913, "step": 610, "valid_targets_mean": 868.8, "valid_targets_min": 471 }, { "epoch": 6.473684210526316, "grad_norm": 0.6702334373473492, "learning_rate": 7.135725972168694e-07, "loss": 0.085, "loss_nan_ranks": 0, "loss_rank_avg": 0.014351149089634418, "step": 615, "valid_targets_mean": 2272.8, "valid_targets_min": 546 }, { "epoch": 6.526315789473684, "grad_norm": 0.6787505389804506, "learning_rate": 5.811636514789598e-07, "loss": 0.0628, "loss_nan_ranks": 0, "loss_rank_avg": 0.011107168160378933, "step": 620, "valid_targets_mean": 1480.2, "valid_targets_min": 415 }, { "epoch": 6.578947368421053, "grad_norm": 0.7692352383890824, "learning_rate": 4.621525713274588e-07, "loss": 0.0716, "loss_nan_ranks": 0, "loss_rank_avg": 0.017746414989233017, "step": 625, "valid_targets_mean": 2110.8, "valid_targets_min": 504 }, { "epoch": 6.631578947368421, "grad_norm": 0.7906146884753448, "learning_rate": 3.5662146747315054e-07, "loss": 0.0882, "loss_nan_ranks": 0, "loss_rank_avg": 0.022792555391788483, "step": 630, "valid_targets_mean": 2510.0, "valid_targets_min": 433 }, { "epoch": 6.684210526315789, "grad_norm": 1.1576408782365855, "learning_rate": 2.6464315022861844e-07, "loss": 0.0855, "loss_nan_ranks": 0, "loss_rank_avg": 0.024924524128437042, "step": 635, "valid_targets_mean": 1319.0, "valid_targets_min": 413 }, { "epoch": 6.7368421052631575, "grad_norm": 0.7614682584594898, "learning_rate": 1.862810792733849e-07, "loss": 0.0731, "loss_nan_ranks": 0, "loss_rank_avg": 0.025046605616807938, "step": 640, "valid_targets_mean": 1614.5, "valid_targets_min": 344 }, { "epoch": 6.7894736842105265, "grad_norm": 0.8926016666095682, "learning_rate": 1.2158931987041877e-07, "loss": 0.0746, "loss_nan_ranks": 0, "loss_rank_avg": 0.01438556332141161, "step": 645, "valid_targets_mean": 1287.2, "valid_targets_min": 524 }, { "epoch": 6.842105263157895, "grad_norm": 0.906919830054555, "learning_rate": 7.06125055642537e-08, "loss": 0.0677, "loss_nan_ranks": 0, "loss_rank_avg": 0.008928455412387848, "step": 650, "valid_targets_mean": 464.8, "valid_targets_min": 411 }, { "epoch": 6.894736842105263, "grad_norm": 0.7154592892609424, "learning_rate": 3.3385807386456804e-08, "loss": 0.0664, "loss_nan_ranks": 0, "loss_rank_avg": 0.03817162662744522, "step": 655, "valid_targets_mean": 2167.2, "valid_targets_min": 407 }, { "epoch": 6.947368421052632, "grad_norm": 0.9354297577789874, "learning_rate": 9.934909589646157e-09, "loss": 0.0767, "loss_nan_ranks": 0, "loss_rank_avg": 0.01679036021232605, "step": 660, "valid_targets_mean": 1597.2, "valid_targets_min": 505 }, { "epoch": 7.0, "grad_norm": 0.9200047562223783, "learning_rate": 2.759919268702227e-10, "loss": 0.0788, "loss_nan_ranks": 0, "loss_rank_avg": 0.028918465599417686, "step": 665, "valid_targets_mean": 1713.0, "valid_targets_min": 479 }, { "epoch": 7.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.028918465599417686, "step": 665, "total_flos": 1.8900557354971955e+17, "train_loss": 0.14472091395155828, "train_runtime": 29358.9617, "train_samples_per_second": 0.362, "train_steps_per_second": 0.023, "valid_targets_mean": 1713.0, "valid_targets_min": 479 } ], "logging_steps": 5, "max_steps": 665, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8900557354971955e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }