{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 4375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008, "grad_norm": 18.4569905168907, "learning_rate": 3.6529680365296803e-07, "loss": 0.7031, "loss_nan_ranks": 0, "loss_rank_avg": 0.6569720506668091, "step": 5, "valid_targets_mean": 1009.4, "valid_targets_min": 490 }, { "epoch": 0.016, "grad_norm": 18.37728431878093, "learning_rate": 8.219178082191781e-07, "loss": 0.6996, "loss_nan_ranks": 0, "loss_rank_avg": 0.6515499353408813, "step": 10, "valid_targets_mean": 991.0, "valid_targets_min": 471 }, { "epoch": 0.024, "grad_norm": 16.431678071793698, "learning_rate": 1.278538812785388e-06, "loss": 0.6774, "loss_nan_ranks": 0, "loss_rank_avg": 0.6113685965538025, "step": 15, "valid_targets_mean": 998.6, "valid_targets_min": 562 }, { "epoch": 0.032, "grad_norm": 15.86978299079565, "learning_rate": 1.7351598173515982e-06, "loss": 0.6387, "loss_nan_ranks": 0, "loss_rank_avg": 0.609423816204071, "step": 20, "valid_targets_mean": 895.1, "valid_targets_min": 542 }, { "epoch": 0.04, "grad_norm": 9.041629780491458, "learning_rate": 2.191780821917808e-06, "loss": 0.5928, "loss_nan_ranks": 0, "loss_rank_avg": 0.5422854423522949, "step": 25, "valid_targets_mean": 998.1, "valid_targets_min": 459 }, { "epoch": 0.048, "grad_norm": 5.754764448188752, "learning_rate": 2.6484018264840183e-06, "loss": 0.523, "loss_nan_ranks": 0, "loss_rank_avg": 0.4981729984283447, "step": 30, "valid_targets_mean": 965.2, "valid_targets_min": 518 }, { "epoch": 0.056, "grad_norm": 3.3907319203399213, "learning_rate": 3.1050228310502285e-06, "loss": 0.496, "loss_nan_ranks": 0, "loss_rank_avg": 0.6502364873886108, "step": 35, "valid_targets_mean": 1243.8, "valid_targets_min": 515 }, { "epoch": 0.064, "grad_norm": 3.282314265195484, "learning_rate": 3.5616438356164386e-06, "loss": 0.4792, "loss_nan_ranks": 0, "loss_rank_avg": 0.4102541208267212, "step": 40, "valid_targets_mean": 827.4, "valid_targets_min": 584 }, { "epoch": 0.072, "grad_norm": 2.2441663968283954, "learning_rate": 4.018264840182649e-06, "loss": 0.4362, "loss_nan_ranks": 0, "loss_rank_avg": 0.4924893379211426, "step": 45, "valid_targets_mean": 1066.7, "valid_targets_min": 563 }, { "epoch": 0.08, "grad_norm": 2.2276595339792884, "learning_rate": 4.4748858447488585e-06, "loss": 0.4237, "loss_nan_ranks": 0, "loss_rank_avg": 0.33928295969963074, "step": 50, "valid_targets_mean": 683.0, "valid_targets_min": 472 }, { "epoch": 0.088, "grad_norm": 1.5634112203072432, "learning_rate": 4.931506849315069e-06, "loss": 0.3485, "loss_nan_ranks": 0, "loss_rank_avg": 0.3636552691459656, "step": 55, "valid_targets_mean": 839.6, "valid_targets_min": 550 }, { "epoch": 0.096, "grad_norm": 1.4191405974580216, "learning_rate": 5.388127853881279e-06, "loss": 0.327, "loss_nan_ranks": 0, "loss_rank_avg": 0.2820422649383545, "step": 60, "valid_targets_mean": 844.1, "valid_targets_min": 456 }, { "epoch": 0.104, "grad_norm": 1.23315150232887, "learning_rate": 5.8447488584474885e-06, "loss": 0.3257, "loss_nan_ranks": 0, "loss_rank_avg": 0.2875695824623108, "step": 65, "valid_targets_mean": 892.1, "valid_targets_min": 479 }, { "epoch": 0.112, "grad_norm": 1.0958845246431541, "learning_rate": 6.301369863013699e-06, "loss": 0.2519, "loss_nan_ranks": 0, "loss_rank_avg": 0.3263506293296814, "step": 70, "valid_targets_mean": 1024.4, "valid_targets_min": 478 }, { "epoch": 0.12, "grad_norm": 1.4675627271539373, "learning_rate": 6.757990867579909e-06, "loss": 0.293, "loss_nan_ranks": 0, "loss_rank_avg": 0.28500908613204956, "step": 75, "valid_targets_mean": 928.7, "valid_targets_min": 495 }, { "epoch": 0.128, "grad_norm": 0.9999302683084342, "learning_rate": 7.214611872146119e-06, "loss": 0.3002, "loss_nan_ranks": 0, "loss_rank_avg": 0.28047606348991394, "step": 80, "valid_targets_mean": 969.8, "valid_targets_min": 520 }, { "epoch": 0.136, "grad_norm": 1.2285241043015935, "learning_rate": 7.671232876712329e-06, "loss": 0.2784, "loss_nan_ranks": 0, "loss_rank_avg": 0.26190608739852905, "step": 85, "valid_targets_mean": 707.2, "valid_targets_min": 474 }, { "epoch": 0.144, "grad_norm": 1.1912365240631553, "learning_rate": 8.127853881278539e-06, "loss": 0.3047, "loss_nan_ranks": 0, "loss_rank_avg": 0.38986343145370483, "step": 90, "valid_targets_mean": 1021.1, "valid_targets_min": 445 }, { "epoch": 0.152, "grad_norm": 0.9470584016240954, "learning_rate": 8.584474885844748e-06, "loss": 0.2826, "loss_nan_ranks": 0, "loss_rank_avg": 0.20783933997154236, "step": 95, "valid_targets_mean": 854.7, "valid_targets_min": 489 }, { "epoch": 0.16, "grad_norm": 1.0266730827392707, "learning_rate": 9.04109589041096e-06, "loss": 0.2661, "loss_nan_ranks": 0, "loss_rank_avg": 0.257321834564209, "step": 100, "valid_targets_mean": 810.8, "valid_targets_min": 523 }, { "epoch": 0.168, "grad_norm": 0.9796631800596888, "learning_rate": 9.49771689497717e-06, "loss": 0.2297, "loss_nan_ranks": 0, "loss_rank_avg": 0.2218109667301178, "step": 105, "valid_targets_mean": 875.9, "valid_targets_min": 504 }, { "epoch": 0.176, "grad_norm": 0.9414981219144624, "learning_rate": 9.95433789954338e-06, "loss": 0.2492, "loss_nan_ranks": 0, "loss_rank_avg": 0.2698684632778168, "step": 110, "valid_targets_mean": 966.4, "valid_targets_min": 443 }, { "epoch": 0.184, "grad_norm": 0.9364497568626377, "learning_rate": 1.0410958904109589e-05, "loss": 0.2368, "loss_nan_ranks": 0, "loss_rank_avg": 0.2731756269931793, "step": 115, "valid_targets_mean": 974.8, "valid_targets_min": 444 }, { "epoch": 0.192, "grad_norm": 1.0965744906125983, "learning_rate": 1.08675799086758e-05, "loss": 0.2206, "loss_nan_ranks": 0, "loss_rank_avg": 0.26202571392059326, "step": 120, "valid_targets_mean": 929.9, "valid_targets_min": 518 }, { "epoch": 0.2, "grad_norm": 1.0127475793360836, "learning_rate": 1.132420091324201e-05, "loss": 0.2676, "loss_nan_ranks": 0, "loss_rank_avg": 0.3210471272468567, "step": 125, "valid_targets_mean": 904.8, "valid_targets_min": 478 }, { "epoch": 0.208, "grad_norm": 0.8732913297809469, "learning_rate": 1.178082191780822e-05, "loss": 0.2073, "loss_nan_ranks": 0, "loss_rank_avg": 0.15821754932403564, "step": 130, "valid_targets_mean": 789.3, "valid_targets_min": 506 }, { "epoch": 0.216, "grad_norm": 1.1510599786111335, "learning_rate": 1.223744292237443e-05, "loss": 0.2252, "loss_nan_ranks": 0, "loss_rank_avg": 0.24508722126483917, "step": 135, "valid_targets_mean": 804.2, "valid_targets_min": 477 }, { "epoch": 0.224, "grad_norm": 1.996701902148517, "learning_rate": 1.2694063926940641e-05, "loss": 0.2402, "loss_nan_ranks": 0, "loss_rank_avg": 0.2965832054615021, "step": 140, "valid_targets_mean": 902.7, "valid_targets_min": 476 }, { "epoch": 0.232, "grad_norm": 1.0173663666021984, "learning_rate": 1.3150684931506849e-05, "loss": 0.2633, "loss_nan_ranks": 0, "loss_rank_avg": 0.35601651668548584, "step": 145, "valid_targets_mean": 1106.1, "valid_targets_min": 486 }, { "epoch": 0.24, "grad_norm": 0.9949771832156836, "learning_rate": 1.360730593607306e-05, "loss": 0.2359, "loss_nan_ranks": 0, "loss_rank_avg": 0.2783992290496826, "step": 150, "valid_targets_mean": 911.1, "valid_targets_min": 439 }, { "epoch": 0.248, "grad_norm": 0.9181491942298239, "learning_rate": 1.406392694063927e-05, "loss": 0.2558, "loss_nan_ranks": 0, "loss_rank_avg": 0.16628001630306244, "step": 155, "valid_targets_mean": 814.1, "valid_targets_min": 503 }, { "epoch": 0.256, "grad_norm": 0.974921377150203, "learning_rate": 1.4520547945205482e-05, "loss": 0.2168, "loss_nan_ranks": 0, "loss_rank_avg": 0.2327520251274109, "step": 160, "valid_targets_mean": 975.6, "valid_targets_min": 539 }, { "epoch": 0.264, "grad_norm": 0.8330947154576838, "learning_rate": 1.497716894977169e-05, "loss": 0.2905, "loss_nan_ranks": 0, "loss_rank_avg": 0.3683410882949829, "step": 165, "valid_targets_mean": 1346.4, "valid_targets_min": 524 }, { "epoch": 0.272, "grad_norm": 1.071730464304723, "learning_rate": 1.54337899543379e-05, "loss": 0.2268, "loss_nan_ranks": 0, "loss_rank_avg": 0.32522037625312805, "step": 170, "valid_targets_mean": 993.7, "valid_targets_min": 547 }, { "epoch": 0.28, "grad_norm": 1.0956676683570343, "learning_rate": 1.589041095890411e-05, "loss": 0.229, "loss_nan_ranks": 0, "loss_rank_avg": 0.19170644879341125, "step": 175, "valid_targets_mean": 701.1, "valid_targets_min": 474 }, { "epoch": 0.288, "grad_norm": 0.8852839703904614, "learning_rate": 1.634703196347032e-05, "loss": 0.2205, "loss_nan_ranks": 0, "loss_rank_avg": 0.1616896688938141, "step": 180, "valid_targets_mean": 836.2, "valid_targets_min": 520 }, { "epoch": 0.296, "grad_norm": 0.9638350793584443, "learning_rate": 1.680365296803653e-05, "loss": 0.1893, "loss_nan_ranks": 0, "loss_rank_avg": 0.18771511316299438, "step": 185, "valid_targets_mean": 769.9, "valid_targets_min": 424 }, { "epoch": 0.304, "grad_norm": 1.063824391310194, "learning_rate": 1.726027397260274e-05, "loss": 0.2607, "loss_nan_ranks": 0, "loss_rank_avg": 0.22938010096549988, "step": 190, "valid_targets_mean": 948.7, "valid_targets_min": 501 }, { "epoch": 0.312, "grad_norm": 0.9153693008635697, "learning_rate": 1.771689497716895e-05, "loss": 0.2549, "loss_nan_ranks": 0, "loss_rank_avg": 0.30292022228240967, "step": 195, "valid_targets_mean": 1185.6, "valid_targets_min": 527 }, { "epoch": 0.32, "grad_norm": 1.2046470357044166, "learning_rate": 1.8173515981735163e-05, "loss": 0.1737, "loss_nan_ranks": 0, "loss_rank_avg": 0.1771753877401352, "step": 200, "valid_targets_mean": 823.1, "valid_targets_min": 406 }, { "epoch": 0.328, "grad_norm": 0.9946763380079855, "learning_rate": 1.863013698630137e-05, "loss": 0.2218, "loss_nan_ranks": 0, "loss_rank_avg": 0.21841683983802795, "step": 205, "valid_targets_mean": 911.6, "valid_targets_min": 451 }, { "epoch": 0.336, "grad_norm": 0.9553594171492042, "learning_rate": 1.9086757990867582e-05, "loss": 0.2307, "loss_nan_ranks": 0, "loss_rank_avg": 0.17415133118629456, "step": 210, "valid_targets_mean": 757.9, "valid_targets_min": 455 }, { "epoch": 0.344, "grad_norm": 1.0370039920938834, "learning_rate": 1.954337899543379e-05, "loss": 0.2457, "loss_nan_ranks": 0, "loss_rank_avg": 0.17233221232891083, "step": 215, "valid_targets_mean": 731.9, "valid_targets_min": 439 }, { "epoch": 0.352, "grad_norm": 1.0056651249289874, "learning_rate": 2e-05, "loss": 0.216, "loss_nan_ranks": 0, "loss_rank_avg": 0.23098325729370117, "step": 220, "valid_targets_mean": 868.7, "valid_targets_min": 527 }, { "epoch": 0.36, "grad_norm": 0.8927780696688123, "learning_rate": 2.045662100456621e-05, "loss": 0.2311, "loss_nan_ranks": 0, "loss_rank_avg": 0.20995758473873138, "step": 225, "valid_targets_mean": 893.7, "valid_targets_min": 434 }, { "epoch": 0.368, "grad_norm": 0.9329266167157003, "learning_rate": 2.0913242009132424e-05, "loss": 0.1906, "loss_nan_ranks": 0, "loss_rank_avg": 0.24100714921951294, "step": 230, "valid_targets_mean": 965.1, "valid_targets_min": 530 }, { "epoch": 0.376, "grad_norm": 0.9597984786382444, "learning_rate": 2.1369863013698632e-05, "loss": 0.2111, "loss_nan_ranks": 0, "loss_rank_avg": 0.18597939610481262, "step": 235, "valid_targets_mean": 772.9, "valid_targets_min": 446 }, { "epoch": 0.384, "grad_norm": 0.9528637578725856, "learning_rate": 2.182648401826484e-05, "loss": 0.2189, "loss_nan_ranks": 0, "loss_rank_avg": 0.16806253790855408, "step": 240, "valid_targets_mean": 764.3, "valid_targets_min": 520 }, { "epoch": 0.392, "grad_norm": 1.134206080982455, "learning_rate": 2.2283105022831052e-05, "loss": 0.25, "loss_nan_ranks": 0, "loss_rank_avg": 0.18358182907104492, "step": 245, "valid_targets_mean": 800.2, "valid_targets_min": 515 }, { "epoch": 0.4, "grad_norm": 0.8772583543749906, "learning_rate": 2.2739726027397263e-05, "loss": 0.1896, "loss_nan_ranks": 0, "loss_rank_avg": 0.1544710397720337, "step": 250, "valid_targets_mean": 854.1, "valid_targets_min": 558 }, { "epoch": 0.408, "grad_norm": 1.1118411973916886, "learning_rate": 2.3196347031963475e-05, "loss": 0.1909, "loss_nan_ranks": 0, "loss_rank_avg": 0.17419582605361938, "step": 255, "valid_targets_mean": 780.8, "valid_targets_min": 501 }, { "epoch": 0.416, "grad_norm": 0.9732009947953545, "learning_rate": 2.3652968036529683e-05, "loss": 0.2025, "loss_nan_ranks": 0, "loss_rank_avg": 0.18934506177902222, "step": 260, "valid_targets_mean": 709.7, "valid_targets_min": 466 }, { "epoch": 0.424, "grad_norm": 0.7748866871282665, "learning_rate": 2.410958904109589e-05, "loss": 0.2181, "loss_nan_ranks": 0, "loss_rank_avg": 0.18687203526496887, "step": 265, "valid_targets_mean": 1078.9, "valid_targets_min": 517 }, { "epoch": 0.432, "grad_norm": 0.9657748196334329, "learning_rate": 2.4566210045662106e-05, "loss": 0.221, "loss_nan_ranks": 0, "loss_rank_avg": 0.15517109632492065, "step": 270, "valid_targets_mean": 790.0, "valid_targets_min": 481 }, { "epoch": 0.44, "grad_norm": 0.8792635823194007, "learning_rate": 2.5022831050228314e-05, "loss": 0.2554, "loss_nan_ranks": 0, "loss_rank_avg": 0.3027264475822449, "step": 275, "valid_targets_mean": 1207.8, "valid_targets_min": 536 }, { "epoch": 0.448, "grad_norm": 0.9037898067976381, "learning_rate": 2.547945205479452e-05, "loss": 0.2211, "loss_nan_ranks": 0, "loss_rank_avg": 0.2351336032152176, "step": 280, "valid_targets_mean": 988.6, "valid_targets_min": 443 }, { "epoch": 0.456, "grad_norm": 0.8560337432929743, "learning_rate": 2.593607305936073e-05, "loss": 0.1959, "loss_nan_ranks": 0, "loss_rank_avg": 0.18937864899635315, "step": 285, "valid_targets_mean": 1041.7, "valid_targets_min": 494 }, { "epoch": 0.464, "grad_norm": 0.8737234325775536, "learning_rate": 2.6392694063926944e-05, "loss": 0.1977, "loss_nan_ranks": 0, "loss_rank_avg": 0.18464210629463196, "step": 290, "valid_targets_mean": 934.6, "valid_targets_min": 532 }, { "epoch": 0.472, "grad_norm": 0.905138785163492, "learning_rate": 2.6849315068493153e-05, "loss": 0.2144, "loss_nan_ranks": 0, "loss_rank_avg": 0.2070557177066803, "step": 295, "valid_targets_mean": 959.8, "valid_targets_min": 451 }, { "epoch": 0.48, "grad_norm": 0.9681305277619024, "learning_rate": 2.7305936073059364e-05, "loss": 0.2019, "loss_nan_ranks": 0, "loss_rank_avg": 0.1667182743549347, "step": 300, "valid_targets_mean": 756.1, "valid_targets_min": 447 }, { "epoch": 0.488, "grad_norm": 0.7668950133552566, "learning_rate": 2.7762557077625572e-05, "loss": 0.2044, "loss_nan_ranks": 0, "loss_rank_avg": 0.16860172152519226, "step": 305, "valid_targets_mean": 972.9, "valid_targets_min": 499 }, { "epoch": 0.496, "grad_norm": 0.923077750951701, "learning_rate": 2.8219178082191783e-05, "loss": 0.2591, "loss_nan_ranks": 0, "loss_rank_avg": 0.2699459195137024, "step": 310, "valid_targets_mean": 1024.6, "valid_targets_min": 432 }, { "epoch": 0.504, "grad_norm": 0.8907714935859932, "learning_rate": 2.8675799086757995e-05, "loss": 0.2433, "loss_nan_ranks": 0, "loss_rank_avg": 0.26105940341949463, "step": 315, "valid_targets_mean": 979.0, "valid_targets_min": 525 }, { "epoch": 0.512, "grad_norm": 1.0158109379952605, "learning_rate": 2.9132420091324203e-05, "loss": 0.2125, "loss_nan_ranks": 0, "loss_rank_avg": 0.17170169949531555, "step": 320, "valid_targets_mean": 846.8, "valid_targets_min": 418 }, { "epoch": 0.52, "grad_norm": 0.8773836099643207, "learning_rate": 2.958904109589041e-05, "loss": 0.1986, "loss_nan_ranks": 0, "loss_rank_avg": 0.21758343279361725, "step": 325, "valid_targets_mean": 1011.7, "valid_targets_min": 497 }, { "epoch": 0.528, "grad_norm": 0.8877924719800544, "learning_rate": 3.0045662100456626e-05, "loss": 0.1905, "loss_nan_ranks": 0, "loss_rank_avg": 0.20519107580184937, "step": 330, "valid_targets_mean": 780.4, "valid_targets_min": 458 }, { "epoch": 0.536, "grad_norm": 0.8401077516113821, "learning_rate": 3.0502283105022834e-05, "loss": 0.2333, "loss_nan_ranks": 0, "loss_rank_avg": 0.16374966502189636, "step": 335, "valid_targets_mean": 917.1, "valid_targets_min": 521 }, { "epoch": 0.544, "grad_norm": 0.9699448470038969, "learning_rate": 3.0958904109589045e-05, "loss": 0.2358, "loss_nan_ranks": 0, "loss_rank_avg": 0.2687930762767792, "step": 340, "valid_targets_mean": 944.8, "valid_targets_min": 549 }, { "epoch": 0.552, "grad_norm": 1.0452101409572365, "learning_rate": 3.141552511415525e-05, "loss": 0.2214, "loss_nan_ranks": 0, "loss_rank_avg": 0.23149526119232178, "step": 345, "valid_targets_mean": 867.1, "valid_targets_min": 524 }, { "epoch": 0.56, "grad_norm": 1.1104839128029447, "learning_rate": 3.187214611872147e-05, "loss": 0.2009, "loss_nan_ranks": 0, "loss_rank_avg": 0.16360512375831604, "step": 350, "valid_targets_mean": 791.6, "valid_targets_min": 477 }, { "epoch": 0.568, "grad_norm": 0.9640804406336958, "learning_rate": 3.2328767123287676e-05, "loss": 0.2185, "loss_nan_ranks": 0, "loss_rank_avg": 0.23180308938026428, "step": 355, "valid_targets_mean": 931.9, "valid_targets_min": 527 }, { "epoch": 0.576, "grad_norm": 0.951770582924293, "learning_rate": 3.2785388127853884e-05, "loss": 0.2156, "loss_nan_ranks": 0, "loss_rank_avg": 0.23412451148033142, "step": 360, "valid_targets_mean": 845.6, "valid_targets_min": 480 }, { "epoch": 0.584, "grad_norm": 0.8066551360509446, "learning_rate": 3.324200913242009e-05, "loss": 0.2161, "loss_nan_ranks": 0, "loss_rank_avg": 0.16541028022766113, "step": 365, "valid_targets_mean": 892.4, "valid_targets_min": 450 }, { "epoch": 0.592, "grad_norm": 0.8950754907204298, "learning_rate": 3.369863013698631e-05, "loss": 0.2108, "loss_nan_ranks": 0, "loss_rank_avg": 0.2536185681819916, "step": 370, "valid_targets_mean": 959.8, "valid_targets_min": 476 }, { "epoch": 0.6, "grad_norm": 0.8557198986382014, "learning_rate": 3.4155251141552515e-05, "loss": 0.1744, "loss_nan_ranks": 0, "loss_rank_avg": 0.14356130361557007, "step": 375, "valid_targets_mean": 779.0, "valid_targets_min": 553 }, { "epoch": 0.608, "grad_norm": 0.80119065982916, "learning_rate": 3.461187214611872e-05, "loss": 0.2034, "loss_nan_ranks": 0, "loss_rank_avg": 0.273703932762146, "step": 380, "valid_targets_mean": 1140.9, "valid_targets_min": 454 }, { "epoch": 0.616, "grad_norm": 0.8725319150358779, "learning_rate": 3.506849315068493e-05, "loss": 0.2366, "loss_nan_ranks": 0, "loss_rank_avg": 0.18104848265647888, "step": 385, "valid_targets_mean": 840.5, "valid_targets_min": 467 }, { "epoch": 0.624, "grad_norm": 0.8081587096181386, "learning_rate": 3.5525114155251146e-05, "loss": 0.2282, "loss_nan_ranks": 0, "loss_rank_avg": 0.16609951853752136, "step": 390, "valid_targets_mean": 862.6, "valid_targets_min": 506 }, { "epoch": 0.632, "grad_norm": 0.8859866609772926, "learning_rate": 3.5981735159817354e-05, "loss": 0.178, "loss_nan_ranks": 0, "loss_rank_avg": 0.17506490647792816, "step": 395, "valid_targets_mean": 830.5, "valid_targets_min": 521 }, { "epoch": 0.64, "grad_norm": 0.839676869764214, "learning_rate": 3.643835616438356e-05, "loss": 0.218, "loss_nan_ranks": 0, "loss_rank_avg": 0.14028877019882202, "step": 400, "valid_targets_mean": 722.8, "valid_targets_min": 484 }, { "epoch": 0.648, "grad_norm": 0.9365083173383925, "learning_rate": 3.689497716894977e-05, "loss": 0.1808, "loss_nan_ranks": 0, "loss_rank_avg": 0.17637063562870026, "step": 405, "valid_targets_mean": 786.9, "valid_targets_min": 499 }, { "epoch": 0.656, "grad_norm": 0.8793841415231017, "learning_rate": 3.7351598173515985e-05, "loss": 0.2499, "loss_nan_ranks": 0, "loss_rank_avg": 0.18461337685585022, "step": 410, "valid_targets_mean": 849.4, "valid_targets_min": 513 }, { "epoch": 0.664, "grad_norm": 0.8290466124079698, "learning_rate": 3.780821917808219e-05, "loss": 0.1796, "loss_nan_ranks": 0, "loss_rank_avg": 0.1359868049621582, "step": 415, "valid_targets_mean": 780.6, "valid_targets_min": 472 }, { "epoch": 0.672, "grad_norm": 0.8870519393979674, "learning_rate": 3.82648401826484e-05, "loss": 0.2658, "loss_nan_ranks": 0, "loss_rank_avg": 0.45135176181793213, "step": 420, "valid_targets_mean": 1707.6, "valid_targets_min": 475 }, { "epoch": 0.68, "grad_norm": 0.789933869448683, "learning_rate": 3.8721461187214615e-05, "loss": 0.1907, "loss_nan_ranks": 0, "loss_rank_avg": 0.15668487548828125, "step": 425, "valid_targets_mean": 866.2, "valid_targets_min": 445 }, { "epoch": 0.688, "grad_norm": 0.7871956491176277, "learning_rate": 3.9178082191780823e-05, "loss": 0.2072, "loss_nan_ranks": 0, "loss_rank_avg": 0.20149065554141998, "step": 430, "valid_targets_mean": 987.6, "valid_targets_min": 502 }, { "epoch": 0.696, "grad_norm": 0.6989689379036534, "learning_rate": 3.963470319634704e-05, "loss": 0.2233, "loss_nan_ranks": 0, "loss_rank_avg": 0.1556272655725479, "step": 435, "valid_targets_mean": 929.2, "valid_targets_min": 504 }, { "epoch": 0.704, "grad_norm": 0.8014076411649944, "learning_rate": 3.99999936325009e-05, "loss": 0.2199, "loss_nan_ranks": 0, "loss_rank_avg": 0.21174079179763794, "step": 440, "valid_targets_mean": 881.0, "valid_targets_min": 490 }, { "epoch": 0.712, "grad_norm": 0.8201622776606732, "learning_rate": 3.9999770770457856e-05, "loss": 0.2333, "loss_nan_ranks": 0, "loss_rank_avg": 0.22932517528533936, "step": 445, "valid_targets_mean": 1038.4, "valid_targets_min": 481 }, { "epoch": 0.72, "grad_norm": 0.8487813327477935, "learning_rate": 3.9999229537513936e-05, "loss": 0.1749, "loss_nan_ranks": 0, "loss_rank_avg": 0.15758301317691803, "step": 450, "valid_targets_mean": 783.1, "valid_targets_min": 505 }, { "epoch": 0.728, "grad_norm": 0.8952764089193642, "learning_rate": 3.999836994228487e-05, "loss": 0.1984, "loss_nan_ranks": 0, "loss_rank_avg": 0.26837146282196045, "step": 455, "valid_targets_mean": 901.9, "valid_targets_min": 479 }, { "epoch": 0.736, "grad_norm": 0.8779288729886306, "learning_rate": 3.999719199845432e-05, "loss": 0.1818, "loss_nan_ranks": 0, "loss_rank_avg": 0.17460289597511292, "step": 460, "valid_targets_mean": 866.8, "valid_targets_min": 494 }, { "epoch": 0.744, "grad_norm": 0.7572742289270304, "learning_rate": 3.999569572477366e-05, "loss": 0.1905, "loss_nan_ranks": 0, "loss_rank_avg": 0.14431582391262054, "step": 465, "valid_targets_mean": 799.2, "valid_targets_min": 453 }, { "epoch": 0.752, "grad_norm": 0.8242200271581344, "learning_rate": 3.999388114506166e-05, "loss": 0.2043, "loss_nan_ranks": 0, "loss_rank_avg": 0.14205431938171387, "step": 470, "valid_targets_mean": 804.6, "valid_targets_min": 398 }, { "epoch": 0.76, "grad_norm": 0.790064793882924, "learning_rate": 3.999174828820413e-05, "loss": 0.2032, "loss_nan_ranks": 0, "loss_rank_avg": 0.25519803166389465, "step": 475, "valid_targets_mean": 1264.1, "valid_targets_min": 586 }, { "epoch": 0.768, "grad_norm": 0.8293696975789525, "learning_rate": 3.998929718815341e-05, "loss": 0.2135, "loss_nan_ranks": 0, "loss_rank_avg": 0.2700884938240051, "step": 480, "valid_targets_mean": 1072.2, "valid_targets_min": 467 }, { "epoch": 0.776, "grad_norm": 0.7187946308165557, "learning_rate": 3.998652788392792e-05, "loss": 0.1796, "loss_nan_ranks": 0, "loss_rank_avg": 0.16771897673606873, "step": 485, "valid_targets_mean": 765.6, "valid_targets_min": 523 }, { "epoch": 0.784, "grad_norm": 1.2061081366763156, "learning_rate": 3.9983440419611445e-05, "loss": 0.1842, "loss_nan_ranks": 0, "loss_rank_avg": 0.203563392162323, "step": 490, "valid_targets_mean": 874.2, "valid_targets_min": 490 }, { "epoch": 0.792, "grad_norm": 0.8127919624289628, "learning_rate": 3.9980034844352494e-05, "loss": 0.1818, "loss_nan_ranks": 0, "loss_rank_avg": 0.22209608554840088, "step": 495, "valid_targets_mean": 968.9, "valid_targets_min": 493 }, { "epoch": 0.8, "grad_norm": 0.7264376380855042, "learning_rate": 3.9976311212363495e-05, "loss": 0.2122, "loss_nan_ranks": 0, "loss_rank_avg": 0.19169259071350098, "step": 500, "valid_targets_mean": 931.7, "valid_targets_min": 528 }, { "epoch": 0.808, "grad_norm": 0.7044398835028907, "learning_rate": 3.997226958291992e-05, "loss": 0.162, "loss_nan_ranks": 0, "loss_rank_avg": 0.19893905520439148, "step": 505, "valid_targets_mean": 1076.0, "valid_targets_min": 521 }, { "epoch": 0.816, "grad_norm": 0.9410261251551351, "learning_rate": 3.996791002035937e-05, "loss": 0.2024, "loss_nan_ranks": 0, "loss_rank_avg": 0.21313059329986572, "step": 510, "valid_targets_mean": 912.6, "valid_targets_min": 524 }, { "epoch": 0.824, "grad_norm": 0.8084364192923831, "learning_rate": 3.996323259408055e-05, "loss": 0.1981, "loss_nan_ranks": 0, "loss_rank_avg": 0.17545118927955627, "step": 515, "valid_targets_mean": 806.0, "valid_targets_min": 455 }, { "epoch": 0.832, "grad_norm": 0.7177048085138111, "learning_rate": 3.995823737854211e-05, "loss": 0.1707, "loss_nan_ranks": 0, "loss_rank_avg": 0.17910686135292053, "step": 520, "valid_targets_mean": 897.8, "valid_targets_min": 481 }, { "epoch": 0.84, "grad_norm": 0.7475898854985661, "learning_rate": 3.9952924453261534e-05, "loss": 0.209, "loss_nan_ranks": 0, "loss_rank_avg": 0.17634199559688568, "step": 525, "valid_targets_mean": 926.7, "valid_targets_min": 521 }, { "epoch": 0.848, "grad_norm": 0.7546628398117033, "learning_rate": 3.994729390281384e-05, "loss": 0.2101, "loss_nan_ranks": 0, "loss_rank_avg": 0.13323059678077698, "step": 530, "valid_targets_mean": 780.4, "valid_targets_min": 434 }, { "epoch": 0.856, "grad_norm": 0.8075157474709176, "learning_rate": 3.994134581683021e-05, "loss": 0.2108, "loss_nan_ranks": 0, "loss_rank_avg": 0.36277538537979126, "step": 535, "valid_targets_mean": 1380.0, "valid_targets_min": 501 }, { "epoch": 0.864, "grad_norm": 0.7426841895384861, "learning_rate": 3.9935080289996626e-05, "loss": 0.166, "loss_nan_ranks": 0, "loss_rank_avg": 0.1710471212863922, "step": 540, "valid_targets_mean": 933.2, "valid_targets_min": 515 }, { "epoch": 0.872, "grad_norm": 0.7543785500593825, "learning_rate": 3.992849742205228e-05, "loss": 0.1949, "loss_nan_ranks": 0, "loss_rank_avg": 0.14548061788082123, "step": 545, "valid_targets_mean": 746.6, "valid_targets_min": 396 }, { "epoch": 0.88, "grad_norm": 0.733905548864994, "learning_rate": 3.9921597317788065e-05, "loss": 0.2205, "loss_nan_ranks": 0, "loss_rank_avg": 0.15254877507686615, "step": 550, "valid_targets_mean": 755.8, "valid_targets_min": 485 }, { "epoch": 0.888, "grad_norm": 0.5814823219075821, "learning_rate": 3.991438008704486e-05, "loss": 0.1882, "loss_nan_ranks": 0, "loss_rank_avg": 0.2061724066734314, "step": 555, "valid_targets_mean": 1354.9, "valid_targets_min": 524 }, { "epoch": 0.896, "grad_norm": 0.8896697436915326, "learning_rate": 3.990684584471179e-05, "loss": 0.2223, "loss_nan_ranks": 0, "loss_rank_avg": 0.28819096088409424, "step": 560, "valid_targets_mean": 1018.8, "valid_targets_min": 572 }, { "epoch": 0.904, "grad_norm": 0.7027146233089345, "learning_rate": 3.989899471072441e-05, "loss": 0.2032, "loss_nan_ranks": 0, "loss_rank_avg": 0.22908379137516022, "step": 565, "valid_targets_mean": 1084.9, "valid_targets_min": 513 }, { "epoch": 0.912, "grad_norm": 0.7297407172134633, "learning_rate": 3.9890826810062784e-05, "loss": 0.2577, "loss_nan_ranks": 0, "loss_rank_avg": 0.162847101688385, "step": 570, "valid_targets_mean": 836.3, "valid_targets_min": 449 }, { "epoch": 0.92, "grad_norm": 0.7246071620305752, "learning_rate": 3.988234227274949e-05, "loss": 0.1578, "loss_nan_ranks": 0, "loss_rank_avg": 0.17553573846817017, "step": 575, "valid_targets_mean": 854.9, "valid_targets_min": 495 }, { "epoch": 0.928, "grad_norm": 0.7329086051551755, "learning_rate": 3.987354123384757e-05, "loss": 0.1694, "loss_nan_ranks": 0, "loss_rank_avg": 0.17792247235774994, "step": 580, "valid_targets_mean": 827.2, "valid_targets_min": 549 }, { "epoch": 0.936, "grad_norm": 0.7630014106827517, "learning_rate": 3.9864423833458364e-05, "loss": 0.218, "loss_nan_ranks": 0, "loss_rank_avg": 0.18731489777565002, "step": 585, "valid_targets_mean": 843.2, "valid_targets_min": 496 }, { "epoch": 0.944, "grad_norm": 0.7339005237798694, "learning_rate": 3.9854990216719285e-05, "loss": 0.1951, "loss_nan_ranks": 0, "loss_rank_avg": 0.19711682200431824, "step": 590, "valid_targets_mean": 838.9, "valid_targets_min": 511 }, { "epoch": 0.952, "grad_norm": 0.7090853251170041, "learning_rate": 3.98452405338015e-05, "loss": 0.1735, "loss_nan_ranks": 0, "loss_rank_avg": 0.18280717730522156, "step": 595, "valid_targets_mean": 835.9, "valid_targets_min": 492 }, { "epoch": 0.96, "grad_norm": 0.6730331550707466, "learning_rate": 3.983517493990756e-05, "loss": 0.2228, "loss_nan_ranks": 0, "loss_rank_avg": 0.16648909449577332, "step": 600, "valid_targets_mean": 987.8, "valid_targets_min": 456 }, { "epoch": 0.968, "grad_norm": 0.6671466020202659, "learning_rate": 3.982479359526892e-05, "loss": 0.2181, "loss_nan_ranks": 0, "loss_rank_avg": 0.12919144332408905, "step": 605, "valid_targets_mean": 851.2, "valid_targets_min": 453 }, { "epoch": 0.976, "grad_norm": 0.7468131098588212, "learning_rate": 3.981409666514336e-05, "loss": 0.2178, "loss_nan_ranks": 0, "loss_rank_avg": 0.24386389553546906, "step": 610, "valid_targets_mean": 1015.8, "valid_targets_min": 466 }, { "epoch": 0.984, "grad_norm": 0.7165572848472425, "learning_rate": 3.98030843198124e-05, "loss": 0.16, "loss_nan_ranks": 0, "loss_rank_avg": 0.17434345185756683, "step": 615, "valid_targets_mean": 874.8, "valid_targets_min": 520 }, { "epoch": 0.992, "grad_norm": 0.7155565677019673, "learning_rate": 3.979175673457858e-05, "loss": 0.1677, "loss_nan_ranks": 0, "loss_rank_avg": 0.16234704852104187, "step": 620, "valid_targets_mean": 787.7, "valid_targets_min": 513 }, { "epoch": 1.0, "grad_norm": 0.7163938049205832, "learning_rate": 3.9780114089762616e-05, "loss": 0.1584, "loss_nan_ranks": 0, "loss_rank_avg": 0.156229630112648, "step": 625, "valid_targets_mean": 925.1, "valid_targets_min": 476 }, { "epoch": 1.008, "grad_norm": 0.7163952448739522, "learning_rate": 3.976815657070062e-05, "loss": 0.2132, "loss_nan_ranks": 0, "loss_rank_avg": 0.13931968808174133, "step": 630, "valid_targets_mean": 825.9, "valid_targets_min": 467 }, { "epoch": 1.016, "grad_norm": 0.8471080789416268, "learning_rate": 3.975588436774107e-05, "loss": 0.179, "loss_nan_ranks": 0, "loss_rank_avg": 0.14833077788352966, "step": 635, "valid_targets_mean": 767.4, "valid_targets_min": 443 }, { "epoch": 1.024, "grad_norm": 0.8554813868151919, "learning_rate": 3.9743297676241826e-05, "loss": 0.188, "loss_nan_ranks": 0, "loss_rank_avg": 0.21545778214931488, "step": 640, "valid_targets_mean": 992.5, "valid_targets_min": 451 }, { "epoch": 1.032, "grad_norm": 0.8308020439403382, "learning_rate": 3.9730396696566994e-05, "loss": 0.1479, "loss_nan_ranks": 0, "loss_rank_avg": 0.1487816572189331, "step": 645, "valid_targets_mean": 791.4, "valid_targets_min": 538 }, { "epoch": 1.04, "grad_norm": 0.7933045581970818, "learning_rate": 3.971718163408375e-05, "loss": 0.1591, "loss_nan_ranks": 0, "loss_rank_avg": 0.16302749514579773, "step": 650, "valid_targets_mean": 819.8, "valid_targets_min": 502 }, { "epoch": 1.048, "grad_norm": 0.7252431785653002, "learning_rate": 3.9703652699159093e-05, "loss": 0.2051, "loss_nan_ranks": 0, "loss_rank_avg": 0.13497385382652283, "step": 655, "valid_targets_mean": 881.0, "valid_targets_min": 480 }, { "epoch": 1.056, "grad_norm": 0.7663840270716306, "learning_rate": 3.9689810107156425e-05, "loss": 0.1767, "loss_nan_ranks": 0, "loss_rank_avg": 0.27258244156837463, "step": 660, "valid_targets_mean": 1186.0, "valid_targets_min": 520 }, { "epoch": 1.064, "grad_norm": 0.7650802553886993, "learning_rate": 3.967565407843222e-05, "loss": 0.2198, "loss_nan_ranks": 0, "loss_rank_avg": 0.1957072615623474, "step": 665, "valid_targets_mean": 877.8, "valid_targets_min": 481 }, { "epoch": 1.072, "grad_norm": 0.7555776836490888, "learning_rate": 3.966118483833242e-05, "loss": 0.1928, "loss_nan_ranks": 0, "loss_rank_avg": 0.1382647156715393, "step": 670, "valid_targets_mean": 735.2, "valid_targets_min": 491 }, { "epoch": 1.08, "grad_norm": 0.7202017893256426, "learning_rate": 3.964640261718893e-05, "loss": 0.16, "loss_nan_ranks": 0, "loss_rank_avg": 0.13862335681915283, "step": 675, "valid_targets_mean": 868.3, "valid_targets_min": 469 }, { "epoch": 1.088, "grad_norm": 0.6865668130814834, "learning_rate": 3.963130765031589e-05, "loss": 0.2171, "loss_nan_ranks": 0, "loss_rank_avg": 0.1527758687734604, "step": 680, "valid_targets_mean": 822.8, "valid_targets_min": 479 }, { "epoch": 1.096, "grad_norm": 0.7466684411513757, "learning_rate": 3.961590017800598e-05, "loss": 0.2372, "loss_nan_ranks": 0, "loss_rank_avg": 0.13745608925819397, "step": 685, "valid_targets_mean": 807.3, "valid_targets_min": 498 }, { "epoch": 1.104, "grad_norm": 0.730266233475475, "learning_rate": 3.960018044552653e-05, "loss": 0.1958, "loss_nan_ranks": 0, "loss_rank_avg": 0.15390340983867645, "step": 690, "valid_targets_mean": 899.9, "valid_targets_min": 536 }, { "epoch": 1.112, "grad_norm": 0.7389053816779148, "learning_rate": 3.9584148703115704e-05, "loss": 0.1544, "loss_nan_ranks": 0, "loss_rank_avg": 0.14118877053260803, "step": 695, "valid_targets_mean": 842.4, "valid_targets_min": 441 }, { "epoch": 1.12, "grad_norm": 0.7579424093822917, "learning_rate": 3.956780520597842e-05, "loss": 0.1707, "loss_nan_ranks": 0, "loss_rank_avg": 0.16270402073860168, "step": 700, "valid_targets_mean": 919.3, "valid_targets_min": 434 }, { "epoch": 1.1280000000000001, "grad_norm": 0.7565287647746204, "learning_rate": 3.955115021428236e-05, "loss": 0.1604, "loss_nan_ranks": 0, "loss_rank_avg": 0.18007123470306396, "step": 705, "valid_targets_mean": 873.1, "valid_targets_min": 458 }, { "epoch": 1.1360000000000001, "grad_norm": 0.7677756015390184, "learning_rate": 3.95341839931538e-05, "loss": 0.1956, "loss_nan_ranks": 0, "loss_rank_avg": 0.24875490367412567, "step": 710, "valid_targets_mean": 1183.9, "valid_targets_min": 468 }, { "epoch": 1.144, "grad_norm": 0.7396285700137841, "learning_rate": 3.95169068126734e-05, "loss": 0.1873, "loss_nan_ranks": 0, "loss_rank_avg": 0.19427287578582764, "step": 715, "valid_targets_mean": 889.6, "valid_targets_min": 490 }, { "epoch": 1.152, "grad_norm": 0.7465845123157784, "learning_rate": 3.949931894787187e-05, "loss": 0.2645, "loss_nan_ranks": 0, "loss_rank_avg": 0.18916454911231995, "step": 720, "valid_targets_mean": 892.0, "valid_targets_min": 499 }, { "epoch": 1.16, "grad_norm": 0.8513718868816662, "learning_rate": 3.948142067872565e-05, "loss": 0.1771, "loss_nan_ranks": 0, "loss_rank_avg": 0.18964636325836182, "step": 725, "valid_targets_mean": 874.4, "valid_targets_min": 424 }, { "epoch": 1.168, "grad_norm": 0.8462870831677214, "learning_rate": 3.946321229015241e-05, "loss": 0.1604, "loss_nan_ranks": 0, "loss_rank_avg": 0.2392217218875885, "step": 730, "valid_targets_mean": 990.5, "valid_targets_min": 451 }, { "epoch": 1.176, "grad_norm": 0.7205212704585172, "learning_rate": 3.944469407200652e-05, "loss": 0.1474, "loss_nan_ranks": 0, "loss_rank_avg": 0.1376579999923706, "step": 735, "valid_targets_mean": 763.5, "valid_targets_min": 498 }, { "epoch": 1.184, "grad_norm": 0.9037787051653652, "learning_rate": 3.942586631907444e-05, "loss": 0.1952, "loss_nan_ranks": 0, "loss_rank_avg": 0.2241126149892807, "step": 740, "valid_targets_mean": 978.2, "valid_targets_min": 469 }, { "epoch": 1.192, "grad_norm": 0.7386674551344024, "learning_rate": 3.9406729331070054e-05, "loss": 0.1863, "loss_nan_ranks": 0, "loss_rank_avg": 0.19754649698734283, "step": 745, "valid_targets_mean": 854.7, "valid_targets_min": 457 }, { "epoch": 1.2, "grad_norm": 0.7729001315896438, "learning_rate": 3.938728341262985e-05, "loss": 0.1973, "loss_nan_ranks": 0, "loss_rank_avg": 0.2784135341644287, "step": 750, "valid_targets_mean": 1191.6, "valid_targets_min": 458 }, { "epoch": 1.208, "grad_norm": 0.7838951426901545, "learning_rate": 3.936752887330812e-05, "loss": 0.1662, "loss_nan_ranks": 0, "loss_rank_avg": 0.21011705696582794, "step": 755, "valid_targets_mean": 876.6, "valid_targets_min": 520 }, { "epoch": 1.216, "grad_norm": 0.6669012023304911, "learning_rate": 3.9347466027571975e-05, "loss": 0.1624, "loss_nan_ranks": 0, "loss_rank_avg": 0.1563045084476471, "step": 760, "valid_targets_mean": 1017.4, "valid_targets_min": 615 }, { "epoch": 1.224, "grad_norm": 0.6694182335646301, "learning_rate": 3.932709519479639e-05, "loss": 0.153, "loss_nan_ranks": 0, "loss_rank_avg": 0.160549595952034, "step": 765, "valid_targets_mean": 932.6, "valid_targets_min": 479 }, { "epoch": 1.232, "grad_norm": 0.7915346441577366, "learning_rate": 3.930641669925911e-05, "loss": 0.1804, "loss_nan_ranks": 0, "loss_rank_avg": 0.18584375083446503, "step": 770, "valid_targets_mean": 1022.6, "valid_targets_min": 481 }, { "epoch": 1.24, "grad_norm": 0.7002389484036419, "learning_rate": 3.928543087013546e-05, "loss": 0.1638, "loss_nan_ranks": 0, "loss_rank_avg": 0.15864118933677673, "step": 775, "valid_targets_mean": 941.6, "valid_targets_min": 503 }, { "epoch": 1.248, "grad_norm": 0.8419494208862676, "learning_rate": 3.926413804149315e-05, "loss": 0.17, "loss_nan_ranks": 0, "loss_rank_avg": 0.14137229323387146, "step": 780, "valid_targets_mean": 681.3, "valid_targets_min": 509 }, { "epoch": 1.256, "grad_norm": 0.8364682773807548, "learning_rate": 3.9242538552286894e-05, "loss": 0.1559, "loss_nan_ranks": 0, "loss_rank_avg": 0.17337577044963837, "step": 785, "valid_targets_mean": 968.3, "valid_targets_min": 465 }, { "epoch": 1.264, "grad_norm": 0.6961189538122279, "learning_rate": 3.9220632746353096e-05, "loss": 0.1708, "loss_nan_ranks": 0, "loss_rank_avg": 0.13661912083625793, "step": 790, "valid_targets_mean": 863.1, "valid_targets_min": 498 }, { "epoch": 1.272, "grad_norm": 0.715740620782587, "learning_rate": 3.91984209724043e-05, "loss": 0.1912, "loss_nan_ranks": 0, "loss_rank_avg": 0.14010174572467804, "step": 795, "valid_targets_mean": 782.7, "valid_targets_min": 474 }, { "epoch": 1.28, "grad_norm": 0.7346689256256704, "learning_rate": 3.917590358402369e-05, "loss": 0.195, "loss_nan_ranks": 0, "loss_rank_avg": 0.28644707798957825, "step": 800, "valid_targets_mean": 1305.2, "valid_targets_min": 515 }, { "epoch": 1.288, "grad_norm": 0.7385141102122526, "learning_rate": 3.915308093965943e-05, "loss": 0.1888, "loss_nan_ranks": 0, "loss_rank_avg": 0.14231915771961212, "step": 805, "valid_targets_mean": 743.3, "valid_targets_min": 465 }, { "epoch": 1.296, "grad_norm": 0.7629780064003469, "learning_rate": 3.9129953402618976e-05, "loss": 0.1511, "loss_nan_ranks": 0, "loss_rank_avg": 0.18741099536418915, "step": 810, "valid_targets_mean": 986.6, "valid_targets_min": 456 }, { "epoch": 1.304, "grad_norm": 0.7269492317726601, "learning_rate": 3.91065213410633e-05, "loss": 0.1635, "loss_nan_ranks": 0, "loss_rank_avg": 0.25146985054016113, "step": 815, "valid_targets_mean": 1202.8, "valid_targets_min": 560 }, { "epoch": 1.312, "grad_norm": 0.7343886255914776, "learning_rate": 3.908278512800098e-05, "loss": 0.2026, "loss_nan_ranks": 0, "loss_rank_avg": 0.14359501004219055, "step": 820, "valid_targets_mean": 782.1, "valid_targets_min": 466 }, { "epoch": 1.32, "grad_norm": 0.6799273008618566, "learning_rate": 3.905874514128235e-05, "loss": 0.1859, "loss_nan_ranks": 0, "loss_rank_avg": 0.2700965702533722, "step": 825, "valid_targets_mean": 1318.8, "valid_targets_min": 566 }, { "epoch": 1.328, "grad_norm": 0.6929688539379448, "learning_rate": 3.903440176359338e-05, "loss": 0.2076, "loss_nan_ranks": 0, "loss_rank_avg": 0.1730683296918869, "step": 830, "valid_targets_mean": 943.1, "valid_targets_min": 512 }, { "epoch": 1.336, "grad_norm": 0.7245659858484952, "learning_rate": 3.90097553824497e-05, "loss": 0.1678, "loss_nan_ranks": 0, "loss_rank_avg": 0.17537246644496918, "step": 835, "valid_targets_mean": 894.2, "valid_targets_min": 478 }, { "epoch": 1.3439999999999999, "grad_norm": 0.7823444209829755, "learning_rate": 3.8984806390190304e-05, "loss": 0.1484, "loss_nan_ranks": 0, "loss_rank_avg": 0.12793993949890137, "step": 840, "valid_targets_mean": 716.6, "valid_targets_min": 469 }, { "epoch": 1.3519999999999999, "grad_norm": 0.7109404586284385, "learning_rate": 3.895955518397141e-05, "loss": 0.1502, "loss_nan_ranks": 0, "loss_rank_avg": 0.15655386447906494, "step": 845, "valid_targets_mean": 845.7, "valid_targets_min": 407 }, { "epoch": 1.3599999999999999, "grad_norm": 0.6871711965474857, "learning_rate": 3.893400216576011e-05, "loss": 0.2019, "loss_nan_ranks": 0, "loss_rank_avg": 0.15780571103096008, "step": 850, "valid_targets_mean": 861.4, "valid_targets_min": 497 }, { "epoch": 1.3679999999999999, "grad_norm": 0.5783865771624349, "learning_rate": 3.89081477423279e-05, "loss": 0.2152, "loss_nan_ranks": 0, "loss_rank_avg": 0.1397193968296051, "step": 855, "valid_targets_mean": 965.9, "valid_targets_min": 458 }, { "epoch": 1.376, "grad_norm": 0.7111643170486737, "learning_rate": 3.888199232524434e-05, "loss": 0.1536, "loss_nan_ranks": 0, "loss_rank_avg": 0.1263478398323059, "step": 860, "valid_targets_mean": 860.1, "valid_targets_min": 556 }, { "epoch": 1.384, "grad_norm": 0.6880233126552526, "learning_rate": 3.8855536330870354e-05, "loss": 0.1449, "loss_nan_ranks": 0, "loss_rank_avg": 0.12957754731178284, "step": 865, "valid_targets_mean": 769.5, "valid_targets_min": 508 }, { "epoch": 1.392, "grad_norm": 0.7196773389821709, "learning_rate": 3.882878018035173e-05, "loss": 0.1792, "loss_nan_ranks": 0, "loss_rank_avg": 0.23339521884918213, "step": 870, "valid_targets_mean": 1114.1, "valid_targets_min": 505 }, { "epoch": 1.4, "grad_norm": 0.5175206065306407, "learning_rate": 3.880172429961232e-05, "loss": 0.1784, "loss_nan_ranks": 0, "loss_rank_avg": 0.19581802189350128, "step": 875, "valid_targets_mean": 1740.6, "valid_targets_min": 567 }, { "epoch": 1.408, "grad_norm": 0.7027122734134265, "learning_rate": 3.877436911934733e-05, "loss": 0.1567, "loss_nan_ranks": 0, "loss_rank_avg": 0.21533304452896118, "step": 880, "valid_targets_mean": 1119.6, "valid_targets_min": 451 }, { "epoch": 1.416, "grad_norm": 0.6879782852818223, "learning_rate": 3.874671507501641e-05, "loss": 0.1779, "loss_nan_ranks": 0, "loss_rank_avg": 0.2163260579109192, "step": 885, "valid_targets_mean": 985.2, "valid_targets_min": 562 }, { "epoch": 1.424, "grad_norm": 0.6932470125732457, "learning_rate": 3.871876260683677e-05, "loss": 0.1687, "loss_nan_ranks": 0, "loss_rank_avg": 0.1334119737148285, "step": 890, "valid_targets_mean": 764.4, "valid_targets_min": 448 }, { "epoch": 1.432, "grad_norm": 0.6148520291402507, "learning_rate": 3.869051215977612e-05, "loss": 0.1503, "loss_nan_ranks": 0, "loss_rank_avg": 0.16043812036514282, "step": 895, "valid_targets_mean": 1053.5, "valid_targets_min": 450 }, { "epoch": 1.44, "grad_norm": 0.5982261878444286, "learning_rate": 3.8661964183545634e-05, "loss": 0.16, "loss_nan_ranks": 0, "loss_rank_avg": 0.15015794336795807, "step": 900, "valid_targets_mean": 862.0, "valid_targets_min": 495 }, { "epoch": 1.448, "grad_norm": 0.713865426155975, "learning_rate": 3.863311913259276e-05, "loss": 0.1671, "loss_nan_ranks": 0, "loss_rank_avg": 0.16693344712257385, "step": 905, "valid_targets_mean": 970.3, "valid_targets_min": 527 }, { "epoch": 1.456, "grad_norm": 0.7241685968365615, "learning_rate": 3.860397746609402e-05, "loss": 0.1539, "loss_nan_ranks": 0, "loss_rank_avg": 0.13500142097473145, "step": 910, "valid_targets_mean": 716.9, "valid_targets_min": 455 }, { "epoch": 1.464, "grad_norm": 0.7054645907135928, "learning_rate": 3.857453964794764e-05, "loss": 0.1913, "loss_nan_ranks": 0, "loss_rank_avg": 0.14917078614234924, "step": 915, "valid_targets_mean": 762.1, "valid_targets_min": 453 }, { "epoch": 1.472, "grad_norm": 0.667178052044947, "learning_rate": 3.854480614676624e-05, "loss": 0.1551, "loss_nan_ranks": 0, "loss_rank_avg": 0.12918347120285034, "step": 920, "valid_targets_mean": 716.3, "valid_targets_min": 478 }, { "epoch": 1.48, "grad_norm": 0.6908927945761476, "learning_rate": 3.851477743586932e-05, "loss": 0.1873, "loss_nan_ranks": 0, "loss_rank_avg": 0.22470583021640778, "step": 925, "valid_targets_mean": 1062.8, "valid_targets_min": 487 }, { "epoch": 1.488, "grad_norm": 0.778072768317643, "learning_rate": 3.8484453993275746e-05, "loss": 0.1841, "loss_nan_ranks": 0, "loss_rank_avg": 0.19133061170578003, "step": 930, "valid_targets_mean": 816.6, "valid_targets_min": 453 }, { "epoch": 1.496, "grad_norm": 0.6708195314357452, "learning_rate": 3.8453836301696134e-05, "loss": 0.1677, "loss_nan_ranks": 0, "loss_rank_avg": 0.14336860179901123, "step": 935, "valid_targets_mean": 908.8, "valid_targets_min": 524 }, { "epoch": 1.504, "grad_norm": 0.6726051866296197, "learning_rate": 3.842292484852518e-05, "loss": 0.1715, "loss_nan_ranks": 0, "loss_rank_avg": 0.2406589686870575, "step": 940, "valid_targets_mean": 1354.1, "valid_targets_min": 539 }, { "epoch": 1.512, "grad_norm": 0.8150253436875796, "learning_rate": 3.8391720125833875e-05, "loss": 0.2038, "loss_nan_ranks": 0, "loss_rank_avg": 0.2769927978515625, "step": 945, "valid_targets_mean": 1018.1, "valid_targets_min": 446 }, { "epoch": 1.52, "grad_norm": 0.6891761844067167, "learning_rate": 3.83602226303617e-05, "loss": 0.1629, "loss_nan_ranks": 0, "loss_rank_avg": 0.15847039222717285, "step": 950, "valid_targets_mean": 838.3, "valid_targets_min": 466 }, { "epoch": 1.528, "grad_norm": 0.6608491869189769, "learning_rate": 3.83284328635087e-05, "loss": 0.1488, "loss_nan_ranks": 0, "loss_rank_avg": 0.15309180319309235, "step": 955, "valid_targets_mean": 971.8, "valid_targets_min": 436 }, { "epoch": 1.536, "grad_norm": 0.6554841706714233, "learning_rate": 3.829635133132751e-05, "loss": 0.1507, "loss_nan_ranks": 0, "loss_rank_avg": 0.17172548174858093, "step": 960, "valid_targets_mean": 1017.0, "valid_targets_min": 523 }, { "epoch": 1.544, "grad_norm": 0.6466612083633514, "learning_rate": 3.8263978544515304e-05, "loss": 0.2526, "loss_nan_ranks": 0, "loss_rank_avg": 0.16498011350631714, "step": 965, "valid_targets_mean": 910.4, "valid_targets_min": 566 }, { "epoch": 1.552, "grad_norm": 0.631142435859698, "learning_rate": 3.823131501840565e-05, "loss": 0.1688, "loss_nan_ranks": 0, "loss_rank_avg": 0.10579565167427063, "step": 970, "valid_targets_mean": 761.9, "valid_targets_min": 510 }, { "epoch": 1.56, "grad_norm": 0.6819743081635528, "learning_rate": 3.819836127296032e-05, "loss": 0.1907, "loss_nan_ranks": 0, "loss_rank_avg": 0.12957951426506042, "step": 975, "valid_targets_mean": 872.1, "valid_targets_min": 572 }, { "epoch": 1.568, "grad_norm": 0.674719954457241, "learning_rate": 3.8165117832761016e-05, "loss": 0.179, "loss_nan_ranks": 0, "loss_rank_avg": 0.2050054520368576, "step": 980, "valid_targets_mean": 971.7, "valid_targets_min": 514 }, { "epoch": 1.576, "grad_norm": 0.9896649808276848, "learning_rate": 3.813158522700098e-05, "loss": 0.1757, "loss_nan_ranks": 0, "loss_rank_avg": 0.18996283411979675, "step": 985, "valid_targets_mean": 987.2, "valid_targets_min": 486 }, { "epoch": 1.584, "grad_norm": 0.661148717635557, "learning_rate": 3.809776398947665e-05, "loss": 0.1917, "loss_nan_ranks": 0, "loss_rank_avg": 0.11855830997228622, "step": 990, "valid_targets_mean": 741.8, "valid_targets_min": 527 }, { "epoch": 1.592, "grad_norm": 0.7254948219099878, "learning_rate": 3.806365465857908e-05, "loss": 0.1584, "loss_nan_ranks": 0, "loss_rank_avg": 0.16585005819797516, "step": 995, "valid_targets_mean": 789.2, "valid_targets_min": 495 }, { "epoch": 1.6, "grad_norm": 0.7277818146668404, "learning_rate": 3.802925777728541e-05, "loss": 0.1855, "loss_nan_ranks": 0, "loss_rank_avg": 0.2311524599790573, "step": 1000, "valid_targets_mean": 1143.7, "valid_targets_min": 531 }, { "epoch": 1.608, "grad_norm": 0.7923440526648224, "learning_rate": 3.799457389315023e-05, "loss": 0.1982, "loss_nan_ranks": 0, "loss_rank_avg": 0.33600425720214844, "step": 1005, "valid_targets_mean": 1205.4, "valid_targets_min": 537 }, { "epoch": 1.616, "grad_norm": 0.657240434703054, "learning_rate": 3.795960355829683e-05, "loss": 0.1901, "loss_nan_ranks": 0, "loss_rank_avg": 0.15497495234012604, "step": 1010, "valid_targets_mean": 873.0, "valid_targets_min": 429 }, { "epoch": 1.624, "grad_norm": 0.7919659404982559, "learning_rate": 3.7924347329408444e-05, "loss": 0.1779, "loss_nan_ranks": 0, "loss_rank_avg": 0.13609406352043152, "step": 1015, "valid_targets_mean": 794.4, "valid_targets_min": 442 }, { "epoch": 1.6320000000000001, "grad_norm": 0.7020518413812868, "learning_rate": 3.788880576771937e-05, "loss": 0.1663, "loss_nan_ranks": 0, "loss_rank_avg": 0.23722708225250244, "step": 1020, "valid_targets_mean": 1074.6, "valid_targets_min": 502 }, { "epoch": 1.6400000000000001, "grad_norm": 0.7170819930313959, "learning_rate": 3.785297943900605e-05, "loss": 0.1814, "loss_nan_ranks": 0, "loss_rank_avg": 0.20049814879894257, "step": 1025, "valid_targets_mean": 979.7, "valid_targets_min": 555 }, { "epoch": 1.6480000000000001, "grad_norm": 0.5529666573219981, "learning_rate": 3.7816868913578044e-05, "loss": 0.1388, "loss_nan_ranks": 0, "loss_rank_avg": 0.12410497665405273, "step": 1030, "valid_targets_mean": 946.4, "valid_targets_min": 525 }, { "epoch": 1.6560000000000001, "grad_norm": 0.7104488195204124, "learning_rate": 3.778047476626897e-05, "loss": 0.1799, "loss_nan_ranks": 0, "loss_rank_avg": 0.13769274950027466, "step": 1035, "valid_targets_mean": 825.1, "valid_targets_min": 509 }, { "epoch": 1.6640000000000001, "grad_norm": 0.7224254406062387, "learning_rate": 3.7743797576427335e-05, "loss": 0.1809, "loss_nan_ranks": 0, "loss_rank_avg": 0.1998235285282135, "step": 1040, "valid_targets_mean": 992.2, "valid_targets_min": 541 }, { "epoch": 1.6720000000000002, "grad_norm": 0.6372839611313066, "learning_rate": 3.770683792790733e-05, "loss": 0.1496, "loss_nan_ranks": 0, "loss_rank_avg": 0.1310766339302063, "step": 1045, "valid_targets_mean": 862.4, "valid_targets_min": 463 }, { "epoch": 1.6800000000000002, "grad_norm": 0.7426837827375632, "learning_rate": 3.766959640905954e-05, "loss": 0.1647, "loss_nan_ranks": 0, "loss_rank_avg": 0.13092140853405, "step": 1050, "valid_targets_mean": 804.3, "valid_targets_min": 441 }, { "epoch": 1.688, "grad_norm": 0.700407597454358, "learning_rate": 3.763207361272153e-05, "loss": 0.1827, "loss_nan_ranks": 0, "loss_rank_avg": 0.16554605960845947, "step": 1055, "valid_targets_mean": 919.0, "valid_targets_min": 604 }, { "epoch": 1.696, "grad_norm": 0.701310339235854, "learning_rate": 3.759427013620849e-05, "loss": 0.1768, "loss_nan_ranks": 0, "loss_rank_avg": 0.14189761877059937, "step": 1060, "valid_targets_mean": 728.5, "valid_targets_min": 455 }, { "epoch": 1.704, "grad_norm": 0.7706015818198613, "learning_rate": 3.755618658130366e-05, "loss": 0.1961, "loss_nan_ranks": 0, "loss_rank_avg": 0.2144859880208969, "step": 1065, "valid_targets_mean": 979.9, "valid_targets_min": 541 }, { "epoch": 1.712, "grad_norm": 0.7389928113492108, "learning_rate": 3.751782355424877e-05, "loss": 0.1719, "loss_nan_ranks": 0, "loss_rank_avg": 0.1575872302055359, "step": 1070, "valid_targets_mean": 858.1, "valid_targets_min": 540 }, { "epoch": 1.72, "grad_norm": 0.7535959567378652, "learning_rate": 3.7479181665734395e-05, "loss": 0.1748, "loss_nan_ranks": 0, "loss_rank_avg": 0.2564152777194977, "step": 1075, "valid_targets_mean": 1000.1, "valid_targets_min": 495 }, { "epoch": 1.728, "grad_norm": 0.6333921431272092, "learning_rate": 3.7440261530890213e-05, "loss": 0.1629, "loss_nan_ranks": 0, "loss_rank_avg": 0.12830719351768494, "step": 1080, "valid_targets_mean": 871.8, "valid_targets_min": 438 }, { "epoch": 1.736, "grad_norm": 0.9380322383200947, "learning_rate": 3.740106376927527e-05, "loss": 0.2729, "loss_nan_ranks": 0, "loss_rank_avg": 0.37827736139297485, "step": 1085, "valid_targets_mean": 1459.2, "valid_targets_min": 650 }, { "epoch": 1.744, "grad_norm": 0.7438467929313883, "learning_rate": 3.7361589004868035e-05, "loss": 0.1951, "loss_nan_ranks": 0, "loss_rank_avg": 0.26724135875701904, "step": 1090, "valid_targets_mean": 1121.4, "valid_targets_min": 536 }, { "epoch": 1.752, "grad_norm": 0.6105440147560874, "learning_rate": 3.7321837866056535e-05, "loss": 0.1603, "loss_nan_ranks": 0, "loss_rank_avg": 0.14079777896404266, "step": 1095, "valid_targets_mean": 904.3, "valid_targets_min": 515 }, { "epoch": 1.76, "grad_norm": 0.5593597328714256, "learning_rate": 3.728181098562831e-05, "loss": 0.1658, "loss_nan_ranks": 0, "loss_rank_avg": 0.1442725956439972, "step": 1100, "valid_targets_mean": 900.8, "valid_targets_min": 566 }, { "epoch": 1.768, "grad_norm": 0.64740837468299, "learning_rate": 3.7241509000760355e-05, "loss": 0.1876, "loss_nan_ranks": 0, "loss_rank_avg": 0.14557820558547974, "step": 1105, "valid_targets_mean": 851.6, "valid_targets_min": 497 }, { "epoch": 1.776, "grad_norm": 0.6640831943887205, "learning_rate": 3.720093255300899e-05, "loss": 0.1871, "loss_nan_ranks": 0, "loss_rank_avg": 0.1812448352575302, "step": 1110, "valid_targets_mean": 1060.0, "valid_targets_min": 523 }, { "epoch": 1.784, "grad_norm": 0.5691045293886683, "learning_rate": 3.7160082288299645e-05, "loss": 0.1827, "loss_nan_ranks": 0, "loss_rank_avg": 0.13262201845645905, "step": 1115, "valid_targets_mean": 823.4, "valid_targets_min": 446 }, { "epoch": 1.792, "grad_norm": 0.6836751894395403, "learning_rate": 3.7118958856916534e-05, "loss": 0.1915, "loss_nan_ranks": 0, "loss_rank_avg": 0.1724395751953125, "step": 1120, "valid_targets_mean": 890.5, "valid_targets_min": 522 }, { "epoch": 1.8, "grad_norm": 0.7712927602843659, "learning_rate": 3.707756291349237e-05, "loss": 0.1824, "loss_nan_ranks": 0, "loss_rank_avg": 0.2417829930782318, "step": 1125, "valid_targets_mean": 1004.9, "valid_targets_min": 502 }, { "epoch": 1.808, "grad_norm": 0.6092761609855996, "learning_rate": 3.703589511699787e-05, "loss": 0.1424, "loss_nan_ranks": 0, "loss_rank_avg": 0.11884502321481705, "step": 1130, "valid_targets_mean": 842.1, "valid_targets_min": 630 }, { "epoch": 1.8159999999999998, "grad_norm": 0.6943283284639649, "learning_rate": 3.6993956130731355e-05, "loss": 0.1677, "loss_nan_ranks": 0, "loss_rank_avg": 0.1568162441253662, "step": 1135, "valid_targets_mean": 893.6, "valid_targets_min": 510 }, { "epoch": 1.8239999999999998, "grad_norm": 0.5735978116252018, "learning_rate": 3.6951746622308106e-05, "loss": 0.2083, "loss_nan_ranks": 0, "loss_rank_avg": 0.23380722105503082, "step": 1140, "valid_targets_mean": 1723.0, "valid_targets_min": 467 }, { "epoch": 1.8319999999999999, "grad_norm": 0.7198338854179046, "learning_rate": 3.69092672636498e-05, "loss": 0.1459, "loss_nan_ranks": 0, "loss_rank_avg": 0.19796811044216156, "step": 1145, "valid_targets_mean": 849.2, "valid_targets_min": 497 }, { "epoch": 1.8399999999999999, "grad_norm": 0.7408908195363425, "learning_rate": 3.686651873097375e-05, "loss": 0.1735, "loss_nan_ranks": 0, "loss_rank_avg": 0.19666478037834167, "step": 1150, "valid_targets_mean": 850.2, "valid_targets_min": 424 }, { "epoch": 1.8479999999999999, "grad_norm": 0.6762739154058124, "learning_rate": 3.682350170478223e-05, "loss": 0.1381, "loss_nan_ranks": 0, "loss_rank_avg": 0.15073895454406738, "step": 1155, "valid_targets_mean": 817.7, "valid_targets_min": 522 }, { "epoch": 1.8559999999999999, "grad_norm": 0.6793788519811231, "learning_rate": 3.678021686985153e-05, "loss": 0.1448, "loss_nan_ranks": 0, "loss_rank_avg": 0.16136187314987183, "step": 1160, "valid_targets_mean": 838.0, "valid_targets_min": 535 }, { "epoch": 1.8639999999999999, "grad_norm": 0.6810417595416728, "learning_rate": 3.6736664915221144e-05, "loss": 0.1848, "loss_nan_ranks": 0, "loss_rank_avg": 0.18239787220954895, "step": 1165, "valid_targets_mean": 936.2, "valid_targets_min": 496 }, { "epoch": 1.8719999999999999, "grad_norm": 0.6866199170569222, "learning_rate": 3.669284653418278e-05, "loss": 0.1585, "loss_nan_ranks": 0, "loss_rank_avg": 0.18783831596374512, "step": 1170, "valid_targets_mean": 939.8, "valid_targets_min": 405 }, { "epoch": 1.88, "grad_norm": 0.6419374253103914, "learning_rate": 3.6648762424269306e-05, "loss": 0.2158, "loss_nan_ranks": 0, "loss_rank_avg": 0.20050100982189178, "step": 1175, "valid_targets_mean": 994.1, "valid_targets_min": 455 }, { "epoch": 1.888, "grad_norm": 0.6102929503441015, "learning_rate": 3.660441328724365e-05, "loss": 0.1407, "loss_nan_ranks": 0, "loss_rank_avg": 0.1384953111410141, "step": 1180, "valid_targets_mean": 914.1, "valid_targets_min": 494 }, { "epoch": 1.896, "grad_norm": 0.9113657887405613, "learning_rate": 3.655979982908764e-05, "loss": 0.1608, "loss_nan_ranks": 0, "loss_rank_avg": 0.19064557552337646, "step": 1185, "valid_targets_mean": 976.2, "valid_targets_min": 507 }, { "epoch": 1.904, "grad_norm": 0.6209355511318521, "learning_rate": 3.6514922759990756e-05, "loss": 0.1455, "loss_nan_ranks": 0, "loss_rank_avg": 0.12727031111717224, "step": 1190, "valid_targets_mean": 743.3, "valid_targets_min": 462 }, { "epoch": 1.912, "grad_norm": 0.7706200982444233, "learning_rate": 3.646978279433883e-05, "loss": 0.221, "loss_nan_ranks": 0, "loss_rank_avg": 0.18883904814720154, "step": 1195, "valid_targets_mean": 913.3, "valid_targets_min": 461 }, { "epoch": 1.92, "grad_norm": 0.6451225901538964, "learning_rate": 3.6424380650702685e-05, "loss": 0.1689, "loss_nan_ranks": 0, "loss_rank_avg": 0.12537574768066406, "step": 1200, "valid_targets_mean": 807.4, "valid_targets_min": 501 }, { "epoch": 1.928, "grad_norm": 0.7540037123180529, "learning_rate": 3.637871705182667e-05, "loss": 0.1824, "loss_nan_ranks": 0, "loss_rank_avg": 0.19827613234519958, "step": 1205, "valid_targets_mean": 1028.9, "valid_targets_min": 522 }, { "epoch": 1.936, "grad_norm": 0.6307659588381288, "learning_rate": 3.633279272461717e-05, "loss": 0.159, "loss_nan_ranks": 0, "loss_rank_avg": 0.14449161291122437, "step": 1210, "valid_targets_mean": 832.3, "valid_targets_min": 439 }, { "epoch": 1.944, "grad_norm": 0.5870419604308534, "learning_rate": 3.628660840013102e-05, "loss": 0.148, "loss_nan_ranks": 0, "loss_rank_avg": 0.12478692829608917, "step": 1215, "valid_targets_mean": 798.2, "valid_targets_min": 513 }, { "epoch": 1.952, "grad_norm": 0.682646779731463, "learning_rate": 3.624016481356392e-05, "loss": 0.2556, "loss_nan_ranks": 0, "loss_rank_avg": 0.2125765085220337, "step": 1220, "valid_targets_mean": 1070.2, "valid_targets_min": 621 }, { "epoch": 1.96, "grad_norm": 0.6057329961642336, "learning_rate": 3.619346270423866e-05, "loss": 0.1598, "loss_nan_ranks": 0, "loss_rank_avg": 0.13245423138141632, "step": 1225, "valid_targets_mean": 852.5, "valid_targets_min": 440 }, { "epoch": 1.968, "grad_norm": 0.6869070863200438, "learning_rate": 3.6146502815593384e-05, "loss": 0.1348, "loss_nan_ranks": 0, "loss_rank_avg": 0.15041041374206543, "step": 1230, "valid_targets_mean": 780.8, "valid_targets_min": 470 }, { "epoch": 1.976, "grad_norm": 0.6438842436430051, "learning_rate": 3.609928589516977e-05, "loss": 0.1615, "loss_nan_ranks": 0, "loss_rank_avg": 0.17005938291549683, "step": 1235, "valid_targets_mean": 904.0, "valid_targets_min": 488 }, { "epoch": 1.984, "grad_norm": 0.8188644878480772, "learning_rate": 3.6051812694601114e-05, "loss": 0.1386, "loss_nan_ranks": 0, "loss_rank_avg": 0.11581555008888245, "step": 1240, "valid_targets_mean": 744.3, "valid_targets_min": 503 }, { "epoch": 1.992, "grad_norm": 0.6677337021986096, "learning_rate": 3.6004083969600346e-05, "loss": 0.2029, "loss_nan_ranks": 0, "loss_rank_avg": 0.14588750898838043, "step": 1245, "valid_targets_mean": 892.2, "valid_targets_min": 607 }, { "epoch": 2.0, "grad_norm": 0.675639545335633, "learning_rate": 3.595610047994804e-05, "loss": 0.1651, "loss_nan_ranks": 0, "loss_rank_avg": 0.14435610175132751, "step": 1250, "valid_targets_mean": 788.2, "valid_targets_min": 517 }, { "epoch": 2.008, "grad_norm": 0.6671909434793415, "learning_rate": 3.5907862989480285e-05, "loss": 0.1488, "loss_nan_ranks": 0, "loss_rank_avg": 0.13127434253692627, "step": 1255, "valid_targets_mean": 808.4, "valid_targets_min": 537 }, { "epoch": 2.016, "grad_norm": 0.6695140708226522, "learning_rate": 3.585937226607656e-05, "loss": 0.1381, "loss_nan_ranks": 0, "loss_rank_avg": 0.1698671281337738, "step": 1260, "valid_targets_mean": 1084.6, "valid_targets_min": 611 }, { "epoch": 2.024, "grad_norm": 0.812996750262902, "learning_rate": 3.5810629081647476e-05, "loss": 0.1701, "loss_nan_ranks": 0, "loss_rank_avg": 0.20692437887191772, "step": 1265, "valid_targets_mean": 963.8, "valid_targets_min": 430 }, { "epoch": 2.032, "grad_norm": 0.6142695293984198, "learning_rate": 3.576163421212249e-05, "loss": 0.1688, "loss_nan_ranks": 0, "loss_rank_avg": 0.109217070043087, "step": 1270, "valid_targets_mean": 769.1, "valid_targets_min": 464 }, { "epoch": 2.04, "grad_norm": 0.704398806152235, "learning_rate": 3.5712388437437576e-05, "loss": 0.1204, "loss_nan_ranks": 0, "loss_rank_avg": 0.12265762686729431, "step": 1275, "valid_targets_mean": 816.1, "valid_targets_min": 478 }, { "epoch": 2.048, "grad_norm": 0.777803434971521, "learning_rate": 3.566289254152283e-05, "loss": 0.1641, "loss_nan_ranks": 0, "loss_rank_avg": 0.15761694312095642, "step": 1280, "valid_targets_mean": 829.4, "valid_targets_min": 527 }, { "epoch": 2.056, "grad_norm": 0.6217244928443194, "learning_rate": 3.56131473122899e-05, "loss": 0.1208, "loss_nan_ranks": 0, "loss_rank_avg": 0.11285565793514252, "step": 1285, "valid_targets_mean": 900.2, "valid_targets_min": 462 }, { "epoch": 2.064, "grad_norm": 0.6970991090986864, "learning_rate": 3.556315354161955e-05, "loss": 0.146, "loss_nan_ranks": 0, "loss_rank_avg": 0.1326259821653366, "step": 1290, "valid_targets_mean": 820.5, "valid_targets_min": 535 }, { "epoch": 2.072, "grad_norm": 0.6227996898139345, "learning_rate": 3.551291202534899e-05, "loss": 0.1331, "loss_nan_ranks": 0, "loss_rank_avg": 0.12502652406692505, "step": 1295, "valid_targets_mean": 941.1, "valid_targets_min": 424 }, { "epoch": 2.08, "grad_norm": 0.6163809129412077, "learning_rate": 3.546242356325922e-05, "loss": 0.1601, "loss_nan_ranks": 0, "loss_rank_avg": 0.11981011182069778, "step": 1300, "valid_targets_mean": 909.6, "valid_targets_min": 480 }, { "epoch": 2.088, "grad_norm": 0.7139872878534804, "learning_rate": 3.5411688959062323e-05, "loss": 0.1211, "loss_nan_ranks": 0, "loss_rank_avg": 0.13094274699687958, "step": 1305, "valid_targets_mean": 752.4, "valid_targets_min": 458 }, { "epoch": 2.096, "grad_norm": 0.701035601155758, "learning_rate": 3.5360709020388625e-05, "loss": 0.1514, "loss_nan_ranks": 0, "loss_rank_avg": 0.12007572501897812, "step": 1310, "valid_targets_mean": 811.1, "valid_targets_min": 458 }, { "epoch": 2.104, "grad_norm": 0.6739881647901436, "learning_rate": 3.530948455877388e-05, "loss": 0.1313, "loss_nan_ranks": 0, "loss_rank_avg": 0.12018238753080368, "step": 1315, "valid_targets_mean": 749.4, "valid_targets_min": 501 }, { "epoch": 2.112, "grad_norm": 0.6968055383560433, "learning_rate": 3.525801638964634e-05, "loss": 0.1377, "loss_nan_ranks": 0, "loss_rank_avg": 0.1314522922039032, "step": 1320, "valid_targets_mean": 842.8, "valid_targets_min": 509 }, { "epoch": 2.12, "grad_norm": 0.6624716484945574, "learning_rate": 3.520630533231376e-05, "loss": 0.1439, "loss_nan_ranks": 0, "loss_rank_avg": 0.13978487253189087, "step": 1325, "valid_targets_mean": 907.9, "valid_targets_min": 527 }, { "epoch": 2.128, "grad_norm": 0.6585719327478531, "learning_rate": 3.5154352209950376e-05, "loss": 0.1217, "loss_nan_ranks": 0, "loss_rank_avg": 0.11899503320455551, "step": 1330, "valid_targets_mean": 748.1, "valid_targets_min": 419 }, { "epoch": 2.136, "grad_norm": 0.7568797476611201, "learning_rate": 3.510215784958376e-05, "loss": 0.1449, "loss_nan_ranks": 0, "loss_rank_avg": 0.1821085512638092, "step": 1335, "valid_targets_mean": 1131.6, "valid_targets_min": 629 }, { "epoch": 2.144, "grad_norm": 0.6989537757915661, "learning_rate": 3.5049723082081755e-05, "loss": 0.1648, "loss_nan_ranks": 0, "loss_rank_avg": 0.11788707226514816, "step": 1340, "valid_targets_mean": 832.4, "valid_targets_min": 544 }, { "epoch": 2.152, "grad_norm": 0.6341811561632761, "learning_rate": 3.49970487421391e-05, "loss": 0.1367, "loss_nan_ranks": 0, "loss_rank_avg": 0.19004957377910614, "step": 1345, "valid_targets_mean": 1656.5, "valid_targets_min": 468 }, { "epoch": 2.16, "grad_norm": 0.6634817410417032, "learning_rate": 3.494413566826427e-05, "loss": 0.1594, "loss_nan_ranks": 0, "loss_rank_avg": 0.14211371541023254, "step": 1350, "valid_targets_mean": 1018.9, "valid_targets_min": 612 }, { "epoch": 2.168, "grad_norm": 0.6959937182985245, "learning_rate": 3.489098470276608e-05, "loss": 0.1342, "loss_nan_ranks": 0, "loss_rank_avg": 0.12617823481559753, "step": 1355, "valid_targets_mean": 830.5, "valid_targets_min": 473 }, { "epoch": 2.176, "grad_norm": 0.6581171678360322, "learning_rate": 3.483759669174024e-05, "loss": 0.1462, "loss_nan_ranks": 0, "loss_rank_avg": 0.11195407807826996, "step": 1360, "valid_targets_mean": 854.4, "valid_targets_min": 524 }, { "epoch": 2.184, "grad_norm": 0.8403619558072984, "learning_rate": 3.478397248505598e-05, "loss": 0.1919, "loss_nan_ranks": 0, "loss_rank_avg": 0.2636897563934326, "step": 1365, "valid_targets_mean": 1145.6, "valid_targets_min": 560 }, { "epoch": 2.192, "grad_norm": 0.6544919553846851, "learning_rate": 3.473011293634241e-05, "loss": 0.1444, "loss_nan_ranks": 0, "loss_rank_avg": 0.16438347101211548, "step": 1370, "valid_targets_mean": 1205.9, "valid_targets_min": 476 }, { "epoch": 2.2, "grad_norm": 0.715642039819752, "learning_rate": 3.467601890297502e-05, "loss": 0.1883, "loss_nan_ranks": 0, "loss_rank_avg": 0.12462947517633438, "step": 1375, "valid_targets_mean": 835.9, "valid_targets_min": 521 }, { "epoch": 2.208, "grad_norm": 0.5887672811038482, "learning_rate": 3.4621691246061976e-05, "loss": 0.1289, "loss_nan_ranks": 0, "loss_rank_avg": 0.09311959147453308, "step": 1380, "valid_targets_mean": 925.4, "valid_targets_min": 516 }, { "epoch": 2.216, "grad_norm": 0.722356368405542, "learning_rate": 3.456713083043046e-05, "loss": 0.1398, "loss_nan_ranks": 0, "loss_rank_avg": 0.12428110092878342, "step": 1385, "valid_targets_mean": 813.9, "valid_targets_min": 556 }, { "epoch": 2.224, "grad_norm": 0.6761216165323534, "learning_rate": 3.451233852461285e-05, "loss": 0.1336, "loss_nan_ranks": 0, "loss_rank_avg": 0.12943808734416962, "step": 1390, "valid_targets_mean": 883.8, "valid_targets_min": 465 }, { "epoch": 2.232, "grad_norm": 0.6108070076628657, "learning_rate": 3.4457315200832935e-05, "loss": 0.1182, "loss_nan_ranks": 0, "loss_rank_avg": 0.10476785898208618, "step": 1395, "valid_targets_mean": 756.9, "valid_targets_min": 478 }, { "epoch": 2.24, "grad_norm": 0.7036553476885127, "learning_rate": 3.440206173499201e-05, "loss": 0.1575, "loss_nan_ranks": 0, "loss_rank_avg": 0.15050138533115387, "step": 1400, "valid_targets_mean": 900.4, "valid_targets_min": 515 }, { "epoch": 2.248, "grad_norm": 0.7032647871620187, "learning_rate": 3.4346579006654945e-05, "loss": 0.1304, "loss_nan_ranks": 0, "loss_rank_avg": 0.1485375314950943, "step": 1405, "valid_targets_mean": 894.5, "valid_targets_min": 458 }, { "epoch": 2.2560000000000002, "grad_norm": 0.7955675404987136, "learning_rate": 3.4290867899036166e-05, "loss": 0.2338, "loss_nan_ranks": 0, "loss_rank_avg": 0.31520774960517883, "step": 1410, "valid_targets_mean": 1497.8, "valid_targets_min": 535 }, { "epoch": 2.2640000000000002, "grad_norm": 0.805423853291883, "learning_rate": 3.4234929298985614e-05, "loss": 0.1588, "loss_nan_ranks": 0, "loss_rank_avg": 0.2818862795829773, "step": 1415, "valid_targets_mean": 1244.6, "valid_targets_min": 466 }, { "epoch": 2.2720000000000002, "grad_norm": 0.7128978513934985, "learning_rate": 3.417876409697463e-05, "loss": 0.1383, "loss_nan_ranks": 0, "loss_rank_avg": 0.18899387121200562, "step": 1420, "valid_targets_mean": 1032.2, "valid_targets_min": 507 }, { "epoch": 2.2800000000000002, "grad_norm": 0.6379976195130551, "learning_rate": 3.412237318708175e-05, "loss": 0.1311, "loss_nan_ranks": 0, "loss_rank_avg": 0.11107778549194336, "step": 1425, "valid_targets_mean": 791.8, "valid_targets_min": 510 }, { "epoch": 2.288, "grad_norm": 0.6338543183979924, "learning_rate": 3.4065757466978504e-05, "loss": 0.1812, "loss_nan_ranks": 0, "loss_rank_avg": 0.1256306767463684, "step": 1430, "valid_targets_mean": 817.4, "valid_targets_min": 496 }, { "epoch": 2.296, "grad_norm": 0.702151402068189, "learning_rate": 3.400891783791511e-05, "loss": 0.146, "loss_nan_ranks": 0, "loss_rank_avg": 0.11320549249649048, "step": 1435, "valid_targets_mean": 712.9, "valid_targets_min": 435 }, { "epoch": 2.304, "grad_norm": 0.6217875950926312, "learning_rate": 3.395185520470614e-05, "loss": 0.1639, "loss_nan_ranks": 0, "loss_rank_avg": 0.11201746016740799, "step": 1440, "valid_targets_mean": 803.2, "valid_targets_min": 513 }, { "epoch": 2.312, "grad_norm": 0.706308661700129, "learning_rate": 3.38945704757161e-05, "loss": 0.1624, "loss_nan_ranks": 0, "loss_rank_avg": 0.13947395980358124, "step": 1445, "valid_targets_mean": 937.0, "valid_targets_min": 417 }, { "epoch": 2.32, "grad_norm": 0.6919298828515554, "learning_rate": 3.383706456284498e-05, "loss": 0.143, "loss_nan_ranks": 0, "loss_rank_avg": 0.1306343674659729, "step": 1450, "valid_targets_mean": 843.5, "valid_targets_min": 424 }, { "epoch": 2.328, "grad_norm": 0.6440894176240278, "learning_rate": 3.377933838151374e-05, "loss": 0.124, "loss_nan_ranks": 0, "loss_rank_avg": 0.11203815042972565, "step": 1455, "valid_targets_mean": 847.4, "valid_targets_min": 484 }, { "epoch": 2.336, "grad_norm": 0.6348699227329601, "learning_rate": 3.3721392850649714e-05, "loss": 0.1758, "loss_nan_ranks": 0, "loss_rank_avg": 0.1289135068655014, "step": 1460, "valid_targets_mean": 991.3, "valid_targets_min": 430 }, { "epoch": 2.344, "grad_norm": 0.7253148487683199, "learning_rate": 3.3663228892672034e-05, "loss": 0.1489, "loss_nan_ranks": 0, "loss_rank_avg": 0.12253601104021072, "step": 1465, "valid_targets_mean": 866.7, "valid_targets_min": 505 }, { "epoch": 2.352, "grad_norm": 0.6968645665141834, "learning_rate": 3.36048474334769e-05, "loss": 0.1543, "loss_nan_ranks": 0, "loss_rank_avg": 0.1570267379283905, "step": 1470, "valid_targets_mean": 999.6, "valid_targets_min": 523 }, { "epoch": 2.36, "grad_norm": 0.7237159999675643, "learning_rate": 3.3546249402422834e-05, "loss": 0.1562, "loss_nan_ranks": 0, "loss_rank_avg": 0.1736963391304016, "step": 1475, "valid_targets_mean": 937.2, "valid_targets_min": 531 }, { "epoch": 2.368, "grad_norm": 0.5043207572468628, "learning_rate": 3.3487435732315944e-05, "loss": 0.1653, "loss_nan_ranks": 0, "loss_rank_avg": 0.16285204887390137, "step": 1480, "valid_targets_mean": 2031.6, "valid_targets_min": 447 }, { "epoch": 2.376, "grad_norm": 0.7677480497490647, "learning_rate": 3.342840735939501e-05, "loss": 0.226, "loss_nan_ranks": 0, "loss_rank_avg": 0.306096613407135, "step": 1485, "valid_targets_mean": 1291.9, "valid_targets_min": 494 }, { "epoch": 2.384, "grad_norm": 0.6822127897762736, "learning_rate": 3.33691652233166e-05, "loss": 0.1422, "loss_nan_ranks": 0, "loss_rank_avg": 0.13742084801197052, "step": 1490, "valid_targets_mean": 876.8, "valid_targets_min": 499 }, { "epoch": 2.392, "grad_norm": 0.6428337691702128, "learning_rate": 3.330971026714016e-05, "loss": 0.146, "loss_nan_ranks": 0, "loss_rank_avg": 0.11387015879154205, "step": 1495, "valid_targets_mean": 922.8, "valid_targets_min": 434 }, { "epoch": 2.4, "grad_norm": 0.7714905951446593, "learning_rate": 3.325004343731292e-05, "loss": 0.1405, "loss_nan_ranks": 0, "loss_rank_avg": 0.15051744878292084, "step": 1500, "valid_targets_mean": 778.1, "valid_targets_min": 444 }, { "epoch": 2.408, "grad_norm": 0.6537243906589456, "learning_rate": 3.3190165683654885e-05, "loss": 0.1362, "loss_nan_ranks": 0, "loss_rank_avg": 0.12786319851875305, "step": 1505, "valid_targets_mean": 924.9, "valid_targets_min": 510 }, { "epoch": 2.416, "grad_norm": 0.7181334150669801, "learning_rate": 3.31300779593437e-05, "loss": 0.1587, "loss_nan_ranks": 0, "loss_rank_avg": 0.1835220456123352, "step": 1510, "valid_targets_mean": 1048.0, "valid_targets_min": 470 }, { "epoch": 2.424, "grad_norm": 0.7200024616684744, "learning_rate": 3.306978122089948e-05, "loss": 0.1273, "loss_nan_ranks": 0, "loss_rank_avg": 0.1197834461927414, "step": 1515, "valid_targets_mean": 701.9, "valid_targets_min": 500 }, { "epoch": 2.432, "grad_norm": 0.76193388728556, "learning_rate": 3.300927642816957e-05, "loss": 0.1289, "loss_nan_ranks": 0, "loss_rank_avg": 0.1792105734348297, "step": 1520, "valid_targets_mean": 1012.2, "valid_targets_min": 455 }, { "epoch": 2.44, "grad_norm": 0.6802775591904766, "learning_rate": 3.294856454431328e-05, "loss": 0.1377, "loss_nan_ranks": 0, "loss_rank_avg": 0.17906546592712402, "step": 1525, "valid_targets_mean": 1050.2, "valid_targets_min": 477 }, { "epoch": 2.448, "grad_norm": 1.127644705809151, "learning_rate": 3.288764653578653e-05, "loss": 0.1336, "loss_nan_ranks": 0, "loss_rank_avg": 0.098813995718956, "step": 1530, "valid_targets_mean": 773.1, "valid_targets_min": 443 }, { "epoch": 2.456, "grad_norm": 0.6447834377487103, "learning_rate": 3.2826523372326516e-05, "loss": 0.1288, "loss_nan_ranks": 0, "loss_rank_avg": 0.1183435320854187, "step": 1535, "valid_targets_mean": 878.8, "valid_targets_min": 537 }, { "epoch": 2.464, "grad_norm": 0.7130623789996078, "learning_rate": 3.276519602693621e-05, "loss": 0.1456, "loss_nan_ranks": 0, "loss_rank_avg": 0.1491391956806183, "step": 1540, "valid_targets_mean": 800.8, "valid_targets_min": 547 }, { "epoch": 2.472, "grad_norm": 0.620842798883401, "learning_rate": 3.270366547586892e-05, "loss": 0.1398, "loss_nan_ranks": 0, "loss_rank_avg": 0.1251269280910492, "step": 1545, "valid_targets_mean": 798.4, "valid_targets_min": 445 }, { "epoch": 2.48, "grad_norm": 0.6742982523505523, "learning_rate": 3.2641932698612715e-05, "loss": 0.1554, "loss_nan_ranks": 0, "loss_rank_avg": 0.11616584658622742, "step": 1550, "valid_targets_mean": 786.2, "valid_targets_min": 520 }, { "epoch": 2.488, "grad_norm": 0.7167899223398502, "learning_rate": 3.2579998677874855e-05, "loss": 0.1798, "loss_nan_ranks": 0, "loss_rank_avg": 0.17860479652881622, "step": 1555, "valid_targets_mean": 1121.8, "valid_targets_min": 475 }, { "epoch": 2.496, "grad_norm": 0.7001792061426741, "learning_rate": 3.251786439956614e-05, "loss": 0.1352, "loss_nan_ranks": 0, "loss_rank_avg": 0.1385573148727417, "step": 1560, "valid_targets_mean": 875.4, "valid_targets_min": 499 }, { "epoch": 2.504, "grad_norm": 0.6570116776180441, "learning_rate": 3.2455530852785206e-05, "loss": 0.1457, "loss_nan_ranks": 0, "loss_rank_avg": 0.1328207403421402, "step": 1565, "valid_targets_mean": 897.6, "valid_targets_min": 428 }, { "epoch": 2.512, "grad_norm": 0.611927697245731, "learning_rate": 3.239299902980281e-05, "loss": 0.1409, "loss_nan_ranks": 0, "loss_rank_avg": 0.11006229370832443, "step": 1570, "valid_targets_mean": 858.8, "valid_targets_min": 594 }, { "epoch": 2.52, "grad_norm": 0.6484285085659105, "learning_rate": 3.2330269926046e-05, "loss": 0.1542, "loss_nan_ranks": 0, "loss_rank_avg": 0.19695384800434113, "step": 1575, "valid_targets_mean": 1437.2, "valid_targets_min": 495 }, { "epoch": 2.528, "grad_norm": 0.5964550781287618, "learning_rate": 3.2267344540082284e-05, "loss": 0.1296, "loss_nan_ranks": 0, "loss_rank_avg": 0.12830229103565216, "step": 1580, "valid_targets_mean": 1032.3, "valid_targets_min": 453 }, { "epoch": 2.536, "grad_norm": 0.6517306987311609, "learning_rate": 3.220422387360373e-05, "loss": 0.1364, "loss_nan_ranks": 0, "loss_rank_avg": 0.10732553899288177, "step": 1585, "valid_targets_mean": 722.9, "valid_targets_min": 456 }, { "epoch": 2.544, "grad_norm": 0.7032594046384518, "learning_rate": 3.2140908931411026e-05, "loss": 0.1505, "loss_nan_ranks": 0, "loss_rank_avg": 0.12496422231197357, "step": 1590, "valid_targets_mean": 762.9, "valid_targets_min": 459 }, { "epoch": 2.552, "grad_norm": 0.647513049077345, "learning_rate": 3.207740072139748e-05, "loss": 0.1864, "loss_nan_ranks": 0, "loss_rank_avg": 0.14673733711242676, "step": 1595, "valid_targets_mean": 907.1, "valid_targets_min": 555 }, { "epoch": 2.56, "grad_norm": 0.6265580167463557, "learning_rate": 3.2013700254532996e-05, "loss": 0.1396, "loss_nan_ranks": 0, "loss_rank_avg": 0.11060081422328949, "step": 1600, "valid_targets_mean": 896.2, "valid_targets_min": 477 }, { "epoch": 2.568, "grad_norm": 0.6798658233952004, "learning_rate": 3.194980854484794e-05, "loss": 0.1523, "loss_nan_ranks": 0, "loss_rank_avg": 0.15329161286354065, "step": 1605, "valid_targets_mean": 882.2, "valid_targets_min": 475 }, { "epoch": 2.576, "grad_norm": 0.6635462419744681, "learning_rate": 3.188572660941702e-05, "loss": 0.1335, "loss_nan_ranks": 0, "loss_rank_avg": 0.11939306557178497, "step": 1610, "valid_targets_mean": 808.9, "valid_targets_min": 522 }, { "epoch": 2.584, "grad_norm": 0.6617445706954177, "learning_rate": 3.182145546834311e-05, "loss": 0.1415, "loss_nan_ranks": 0, "loss_rank_avg": 0.12312141805887222, "step": 1615, "valid_targets_mean": 789.6, "valid_targets_min": 480 }, { "epoch": 2.592, "grad_norm": 0.7205574844014744, "learning_rate": 3.1756996144740994e-05, "loss": 0.1463, "loss_nan_ranks": 0, "loss_rank_avg": 0.22105172276496887, "step": 1620, "valid_targets_mean": 1439.3, "valid_targets_min": 490 }, { "epoch": 2.6, "grad_norm": 0.7133516783301649, "learning_rate": 3.1692349664721074e-05, "loss": 0.1518, "loss_nan_ranks": 0, "loss_rank_avg": 0.11445567011833191, "step": 1625, "valid_targets_mean": 795.9, "valid_targets_min": 416 }, { "epoch": 2.608, "grad_norm": 0.6228609586291742, "learning_rate": 3.1627517057373046e-05, "loss": 0.1301, "loss_nan_ranks": 0, "loss_rank_avg": 0.14198367297649384, "step": 1630, "valid_targets_mean": 1275.9, "valid_targets_min": 439 }, { "epoch": 2.616, "grad_norm": 0.7777392370038086, "learning_rate": 3.156249935474953e-05, "loss": 0.1471, "loss_nan_ranks": 0, "loss_rank_avg": 0.12163381278514862, "step": 1635, "valid_targets_mean": 905.9, "valid_targets_min": 623 }, { "epoch": 2.624, "grad_norm": 0.7122943324469065, "learning_rate": 3.1497297591849614e-05, "loss": 0.1338, "loss_nan_ranks": 0, "loss_rank_avg": 0.15949739515781403, "step": 1640, "valid_targets_mean": 939.0, "valid_targets_min": 517 }, { "epoch": 2.632, "grad_norm": 0.5904037247920326, "learning_rate": 3.143191280660238e-05, "loss": 0.1496, "loss_nan_ranks": 0, "loss_rank_avg": 0.18335631489753723, "step": 1645, "valid_targets_mean": 1674.0, "valid_targets_min": 517 }, { "epoch": 2.64, "grad_norm": 0.6644180963489857, "learning_rate": 3.1366346039850424e-05, "loss": 0.152, "loss_nan_ranks": 0, "loss_rank_avg": 0.11574030667543411, "step": 1650, "valid_targets_mean": 785.8, "valid_targets_min": 574 }, { "epoch": 2.648, "grad_norm": 0.7622292992036905, "learning_rate": 3.130059833533323e-05, "loss": 0.1454, "loss_nan_ranks": 0, "loss_rank_avg": 0.13806472718715668, "step": 1655, "valid_targets_mean": 757.1, "valid_targets_min": 447 }, { "epoch": 2.656, "grad_norm": 0.7666468087153077, "learning_rate": 3.123467073967059e-05, "loss": 0.1468, "loss_nan_ranks": 0, "loss_rank_avg": 0.24357299506664276, "step": 1660, "valid_targets_mean": 1023.1, "valid_targets_min": 501 }, { "epoch": 2.664, "grad_norm": 0.7620880026977072, "learning_rate": 3.116856430234594e-05, "loss": 0.1795, "loss_nan_ranks": 0, "loss_rank_avg": 0.20035260915756226, "step": 1665, "valid_targets_mean": 1098.9, "valid_targets_min": 462 }, { "epoch": 2.672, "grad_norm": 0.7693275033385583, "learning_rate": 3.110228007568963e-05, "loss": 0.159, "loss_nan_ranks": 0, "loss_rank_avg": 0.270406037569046, "step": 1670, "valid_targets_mean": 1265.4, "valid_targets_min": 490 }, { "epoch": 2.68, "grad_norm": 0.6810833402248885, "learning_rate": 3.103581911486221e-05, "loss": 0.1578, "loss_nan_ranks": 0, "loss_rank_avg": 0.14812511205673218, "step": 1675, "valid_targets_mean": 970.6, "valid_targets_min": 519 }, { "epoch": 2.6879999999999997, "grad_norm": 0.7047758447085022, "learning_rate": 3.0969182477837604e-05, "loss": 0.1252, "loss_nan_ranks": 0, "loss_rank_avg": 0.16393721103668213, "step": 1680, "valid_targets_mean": 882.7, "valid_targets_min": 451 }, { "epoch": 2.6959999999999997, "grad_norm": 0.6996051941190828, "learning_rate": 3.090237122538628e-05, "loss": 0.1189, "loss_nan_ranks": 0, "loss_rank_avg": 0.12491434812545776, "step": 1685, "valid_targets_mean": 850.1, "valid_targets_min": 559 }, { "epoch": 2.7039999999999997, "grad_norm": 0.825415715952181, "learning_rate": 3.0835386421058345e-05, "loss": 0.1543, "loss_nan_ranks": 0, "loss_rank_avg": 0.20368003845214844, "step": 1690, "valid_targets_mean": 1022.1, "valid_targets_min": 495 }, { "epoch": 2.7119999999999997, "grad_norm": 0.7550799135353304, "learning_rate": 3.0768229131166664e-05, "loss": 0.1732, "loss_nan_ranks": 0, "loss_rank_avg": 0.20968970656394958, "step": 1695, "valid_targets_mean": 1147.6, "valid_targets_min": 553 }, { "epoch": 2.7199999999999998, "grad_norm": 0.6583647661605726, "learning_rate": 3.070090042476983e-05, "loss": 0.1816, "loss_nan_ranks": 0, "loss_rank_avg": 0.1478102207183838, "step": 1700, "valid_targets_mean": 989.4, "valid_targets_min": 496 }, { "epoch": 2.7279999999999998, "grad_norm": 0.6240340960504206, "learning_rate": 3.063340137365517e-05, "loss": 0.1446, "loss_nan_ranks": 0, "loss_rank_avg": 0.1631350815296173, "step": 1705, "valid_targets_mean": 1248.9, "valid_targets_min": 641 }, { "epoch": 2.7359999999999998, "grad_norm": 0.6965617320231037, "learning_rate": 3.0565733052321674e-05, "loss": 0.1823, "loss_nan_ranks": 0, "loss_rank_avg": 0.16352877020835876, "step": 1710, "valid_targets_mean": 1014.0, "valid_targets_min": 552 }, { "epoch": 2.7439999999999998, "grad_norm": 0.6546126474286746, "learning_rate": 3.0497896537962924e-05, "loss": 0.1295, "loss_nan_ranks": 0, "loss_rank_avg": 0.10371782630681992, "step": 1715, "valid_targets_mean": 816.9, "valid_targets_min": 530 }, { "epoch": 2.752, "grad_norm": 0.6863948280952433, "learning_rate": 3.042989291044991e-05, "loss": 0.1424, "loss_nan_ranks": 0, "loss_rank_avg": 0.1213807612657547, "step": 1720, "valid_targets_mean": 907.7, "valid_targets_min": 536 }, { "epoch": 2.76, "grad_norm": 0.6196156050560603, "learning_rate": 3.036172325231383e-05, "loss": 0.1265, "loss_nan_ranks": 0, "loss_rank_avg": 0.10670541971921921, "step": 1725, "valid_targets_mean": 817.1, "valid_targets_min": 479 }, { "epoch": 2.768, "grad_norm": 0.6885198422874407, "learning_rate": 3.0293388648728908e-05, "loss": 0.1732, "loss_nan_ranks": 0, "loss_rank_avg": 0.14460545778274536, "step": 1730, "valid_targets_mean": 836.1, "valid_targets_min": 511 }, { "epoch": 2.776, "grad_norm": 0.6716979655013353, "learning_rate": 3.022489018749508e-05, "loss": 0.1438, "loss_nan_ranks": 0, "loss_rank_avg": 0.128073051571846, "step": 1735, "valid_targets_mean": 822.5, "valid_targets_min": 462 }, { "epoch": 2.784, "grad_norm": 0.5905720717331144, "learning_rate": 3.015622895902068e-05, "loss": 0.1289, "loss_nan_ranks": 0, "loss_rank_avg": 0.10219383239746094, "step": 1740, "valid_targets_mean": 1018.3, "valid_targets_min": 528 }, { "epoch": 2.792, "grad_norm": 0.6538619633756149, "learning_rate": 3.008740605630508e-05, "loss": 0.1127, "loss_nan_ranks": 0, "loss_rank_avg": 0.11181123554706573, "step": 1745, "valid_targets_mean": 890.4, "valid_targets_min": 406 }, { "epoch": 2.8, "grad_norm": 0.5951374209934471, "learning_rate": 3.0018422574921337e-05, "loss": 0.136, "loss_nan_ranks": 0, "loss_rank_avg": 0.10837581753730774, "step": 1750, "valid_targets_mean": 899.2, "valid_targets_min": 498 }, { "epoch": 2.808, "grad_norm": 0.7052329434080948, "learning_rate": 2.9949279612998673e-05, "loss": 0.137, "loss_nan_ranks": 0, "loss_rank_avg": 0.13119608163833618, "step": 1755, "valid_targets_mean": 898.5, "valid_targets_min": 502 }, { "epoch": 2.816, "grad_norm": 0.8015749810161719, "learning_rate": 2.9879978271205064e-05, "loss": 0.158, "loss_nan_ranks": 0, "loss_rank_avg": 0.34896156191825867, "step": 1760, "valid_targets_mean": 1340.1, "valid_targets_min": 396 }, { "epoch": 2.824, "grad_norm": 0.6290954626710966, "learning_rate": 2.9810519652729692e-05, "loss": 0.1473, "loss_nan_ranks": 0, "loss_rank_avg": 0.14061371982097626, "step": 1765, "valid_targets_mean": 915.2, "valid_targets_min": 516 }, { "epoch": 2.832, "grad_norm": 0.7261508576120159, "learning_rate": 2.9740904863265378e-05, "loss": 0.1687, "loss_nan_ranks": 0, "loss_rank_avg": 0.17512479424476624, "step": 1770, "valid_targets_mean": 943.8, "valid_targets_min": 488 }, { "epoch": 2.84, "grad_norm": 0.8060494791243035, "learning_rate": 2.967113501099097e-05, "loss": 0.1405, "loss_nan_ranks": 0, "loss_rank_avg": 0.2883511781692505, "step": 1775, "valid_targets_mean": 1182.6, "valid_targets_min": 514 }, { "epoch": 2.848, "grad_norm": 0.6654915787262201, "learning_rate": 2.9601211206553745e-05, "loss": 0.1193, "loss_nan_ranks": 0, "loss_rank_avg": 0.1242620199918747, "step": 1780, "valid_targets_mean": 882.2, "valid_targets_min": 590 }, { "epoch": 2.856, "grad_norm": 0.7346953920740353, "learning_rate": 2.9531134563051686e-05, "loss": 0.1239, "loss_nan_ranks": 0, "loss_rank_avg": 0.11942558735609055, "step": 1785, "valid_targets_mean": 803.6, "valid_targets_min": 509 }, { "epoch": 2.864, "grad_norm": 0.65907236959137, "learning_rate": 2.946090619601579e-05, "loss": 0.1313, "loss_nan_ranks": 0, "loss_rank_avg": 0.13639883697032928, "step": 1790, "valid_targets_mean": 874.3, "valid_targets_min": 517 }, { "epoch": 2.872, "grad_norm": 0.6397246748417049, "learning_rate": 2.9390527223392292e-05, "loss": 0.1411, "loss_nan_ranks": 0, "loss_rank_avg": 0.12321165949106216, "step": 1795, "valid_targets_mean": 806.2, "valid_targets_min": 480 }, { "epoch": 2.88, "grad_norm": 1.0747048389216176, "learning_rate": 2.931999876552488e-05, "loss": 0.1455, "loss_nan_ranks": 0, "loss_rank_avg": 0.13217294216156006, "step": 1800, "valid_targets_mean": 878.2, "valid_targets_min": 467 }, { "epoch": 2.888, "grad_norm": 0.6288006656918863, "learning_rate": 2.9249321945136854e-05, "loss": 0.1347, "loss_nan_ranks": 0, "loss_rank_avg": 0.11415164917707443, "step": 1805, "valid_targets_mean": 833.4, "valid_targets_min": 504 }, { "epoch": 2.896, "grad_norm": 0.6493401209466289, "learning_rate": 2.9178497887313257e-05, "loss": 0.1533, "loss_nan_ranks": 0, "loss_rank_avg": 0.1355169266462326, "step": 1810, "valid_targets_mean": 1076.9, "valid_targets_min": 549 }, { "epoch": 2.904, "grad_norm": 0.6826145785690596, "learning_rate": 2.9107527719482968e-05, "loss": 0.1278, "loss_nan_ranks": 0, "loss_rank_avg": 0.14696922898292542, "step": 1815, "valid_targets_mean": 1041.2, "valid_targets_min": 567 }, { "epoch": 2.912, "grad_norm": 0.6714528039918809, "learning_rate": 2.9036412571400747e-05, "loss": 0.1557, "loss_nan_ranks": 0, "loss_rank_avg": 0.12627488374710083, "step": 1820, "valid_targets_mean": 791.7, "valid_targets_min": 467 }, { "epoch": 2.92, "grad_norm": 0.669057548346749, "learning_rate": 2.8965153575129255e-05, "loss": 0.1216, "loss_nan_ranks": 0, "loss_rank_avg": 0.11150173842906952, "step": 1825, "valid_targets_mean": 713.1, "valid_targets_min": 428 }, { "epoch": 2.928, "grad_norm": 6.262537715901929, "learning_rate": 2.8893751865021044e-05, "loss": 0.1561, "loss_nan_ranks": 0, "loss_rank_avg": 0.1828661561012268, "step": 1830, "valid_targets_mean": 1074.8, "valid_targets_min": 438 }, { "epoch": 2.936, "grad_norm": 0.8329681709717811, "learning_rate": 2.8822208577700473e-05, "loss": 0.1798, "loss_nan_ranks": 0, "loss_rank_avg": 0.3160470426082611, "step": 1835, "valid_targets_mean": 1414.8, "valid_targets_min": 511 }, { "epoch": 2.944, "grad_norm": 0.7357974139181813, "learning_rate": 2.8750524852045642e-05, "loss": 0.1211, "loss_nan_ranks": 0, "loss_rank_avg": 0.1282620131969452, "step": 1840, "valid_targets_mean": 820.2, "valid_targets_min": 448 }, { "epoch": 2.952, "grad_norm": 0.6775803708281112, "learning_rate": 2.867870182917024e-05, "loss": 0.1477, "loss_nan_ranks": 0, "loss_rank_avg": 0.1339489221572876, "step": 1845, "valid_targets_mean": 790.2, "valid_targets_min": 519 }, { "epoch": 2.96, "grad_norm": 0.6597044199311293, "learning_rate": 2.8606740652405394e-05, "loss": 0.1651, "loss_nan_ranks": 0, "loss_rank_avg": 0.15459227561950684, "step": 1850, "valid_targets_mean": 954.8, "valid_targets_min": 444 }, { "epoch": 2.968, "grad_norm": 0.5869493252220873, "learning_rate": 2.853464246728147e-05, "loss": 0.2023, "loss_nan_ranks": 0, "loss_rank_avg": 0.2387658804655075, "step": 1855, "valid_targets_mean": 1852.9, "valid_targets_min": 519 }, { "epoch": 2.976, "grad_norm": 0.7270659719435554, "learning_rate": 2.846240842150984e-05, "loss": 0.1283, "loss_nan_ranks": 0, "loss_rank_avg": 0.16146603226661682, "step": 1860, "valid_targets_mean": 959.2, "valid_targets_min": 480 }, { "epoch": 2.984, "grad_norm": 0.6700866512360112, "learning_rate": 2.839003966496458e-05, "loss": 0.1535, "loss_nan_ranks": 0, "loss_rank_avg": 0.13563427329063416, "step": 1865, "valid_targets_mean": 851.7, "valid_targets_min": 487 }, { "epoch": 2.992, "grad_norm": 0.5765343337231628, "learning_rate": 2.8317537349664215e-05, "loss": 0.1295, "loss_nan_ranks": 0, "loss_rank_avg": 0.10503876209259033, "step": 1870, "valid_targets_mean": 913.2, "valid_targets_min": 619 }, { "epoch": 3.0, "grad_norm": 0.6634220547526659, "learning_rate": 2.824490262975334e-05, "loss": 0.1364, "loss_nan_ranks": 0, "loss_rank_avg": 0.15313103795051575, "step": 1875, "valid_targets_mean": 1032.9, "valid_targets_min": 525 }, { "epoch": 3.008, "grad_norm": 0.6340931383149534, "learning_rate": 2.817213666148427e-05, "loss": 0.1325, "loss_nan_ranks": 0, "loss_rank_avg": 0.11905479431152344, "step": 1880, "valid_targets_mean": 989.5, "valid_targets_min": 543 }, { "epoch": 3.016, "grad_norm": 0.7397667131418036, "learning_rate": 2.809924060319862e-05, "loss": 0.1533, "loss_nan_ranks": 0, "loss_rank_avg": 0.09949196875095367, "step": 1885, "valid_targets_mean": 925.4, "valid_targets_min": 502 }, { "epoch": 3.024, "grad_norm": 0.7891254837207506, "learning_rate": 2.802621561530888e-05, "loss": 0.1245, "loss_nan_ranks": 0, "loss_rank_avg": 0.14507977664470673, "step": 1890, "valid_targets_mean": 951.9, "valid_targets_min": 455 }, { "epoch": 3.032, "grad_norm": 0.7167855246712508, "learning_rate": 2.7953062860279937e-05, "loss": 0.1173, "loss_nan_ranks": 0, "loss_rank_avg": 0.16226643323898315, "step": 1895, "valid_targets_mean": 1303.5, "valid_targets_min": 478 }, { "epoch": 3.04, "grad_norm": 0.8560753101693489, "learning_rate": 2.7879783502610557e-05, "loss": 0.1404, "loss_nan_ranks": 0, "loss_rank_avg": 0.1734582930803299, "step": 1900, "valid_targets_mean": 1111.7, "valid_targets_min": 479 }, { "epoch": 3.048, "grad_norm": 0.6097438352541962, "learning_rate": 2.7806378708814875e-05, "loss": 0.1176, "loss_nan_ranks": 0, "loss_rank_avg": 0.07898702472448349, "step": 1905, "valid_targets_mean": 814.9, "valid_targets_min": 517 }, { "epoch": 3.056, "grad_norm": 0.8334738917846108, "learning_rate": 2.773284964740379e-05, "loss": 0.1172, "loss_nan_ranks": 0, "loss_rank_avg": 0.1159522607922554, "step": 1910, "valid_targets_mean": 1074.0, "valid_targets_min": 552 }, { "epoch": 3.064, "grad_norm": 0.73709069608429, "learning_rate": 2.7659197488866403e-05, "loss": 0.1148, "loss_nan_ranks": 0, "loss_rank_avg": 0.09380116313695908, "step": 1915, "valid_targets_mean": 831.4, "valid_targets_min": 594 }, { "epoch": 3.072, "grad_norm": 0.7306469497568479, "learning_rate": 2.7585423405651347e-05, "loss": 0.1478, "loss_nan_ranks": 0, "loss_rank_avg": 0.1965886950492859, "step": 1920, "valid_targets_mean": 1309.8, "valid_targets_min": 551 }, { "epoch": 3.08, "grad_norm": 0.6295094975865887, "learning_rate": 2.7511528572148153e-05, "loss": 0.1062, "loss_nan_ranks": 0, "loss_rank_avg": 0.08431434631347656, "step": 1925, "valid_targets_mean": 857.9, "valid_targets_min": 418 }, { "epoch": 3.088, "grad_norm": 0.6649810415419259, "learning_rate": 2.7437514164668536e-05, "loss": 0.1123, "loss_nan_ranks": 0, "loss_rank_avg": 0.08847647160291672, "step": 1930, "valid_targets_mean": 897.8, "valid_targets_min": 523 }, { "epoch": 3.096, "grad_norm": 0.8373666738680312, "learning_rate": 2.7363381361427692e-05, "loss": 0.122, "loss_nan_ranks": 0, "loss_rank_avg": 0.15449827909469604, "step": 1935, "valid_targets_mean": 964.7, "valid_targets_min": 523 }, { "epoch": 3.104, "grad_norm": 0.7253704386655504, "learning_rate": 2.72891313425255e-05, "loss": 0.1011, "loss_nan_ranks": 0, "loss_rank_avg": 0.09447682648897171, "step": 1940, "valid_targets_mean": 813.2, "valid_targets_min": 535 }, { "epoch": 3.112, "grad_norm": 0.7999319248633481, "learning_rate": 2.7214765289927777e-05, "loss": 0.1154, "loss_nan_ranks": 0, "loss_rank_avg": 0.12414933741092682, "step": 1945, "valid_targets_mean": 857.9, "valid_targets_min": 476 }, { "epoch": 3.12, "grad_norm": 0.6796572603097303, "learning_rate": 2.714028438744746e-05, "loss": 0.1068, "loss_nan_ranks": 0, "loss_rank_avg": 0.10158456861972809, "step": 1950, "valid_targets_mean": 1028.1, "valid_targets_min": 506 }, { "epoch": 3.128, "grad_norm": 0.7284877435667799, "learning_rate": 2.706568982072573e-05, "loss": 0.0905, "loss_nan_ranks": 0, "loss_rank_avg": 0.10231685638427734, "step": 1955, "valid_targets_mean": 862.3, "valid_targets_min": 510 }, { "epoch": 3.136, "grad_norm": 0.7786727324959434, "learning_rate": 2.6990982777213174e-05, "loss": 0.1375, "loss_nan_ranks": 0, "loss_rank_avg": 0.13553225994110107, "step": 1960, "valid_targets_mean": 991.1, "valid_targets_min": 487 }, { "epoch": 3.144, "grad_norm": 0.6242705242724281, "learning_rate": 2.691616444615085e-05, "loss": 0.106, "loss_nan_ranks": 0, "loss_rank_avg": 0.08428169786930084, "step": 1965, "valid_targets_mean": 833.7, "valid_targets_min": 501 }, { "epoch": 3.152, "grad_norm": 0.797000991363135, "learning_rate": 2.6841236018551402e-05, "loss": 0.1187, "loss_nan_ranks": 0, "loss_rank_avg": 0.11778293550014496, "step": 1970, "valid_targets_mean": 869.9, "valid_targets_min": 441 }, { "epoch": 3.16, "grad_norm": 0.7417976078950065, "learning_rate": 2.6766198687180028e-05, "loss": 0.1305, "loss_nan_ranks": 0, "loss_rank_avg": 0.10415495932102203, "step": 1975, "valid_targets_mean": 898.8, "valid_targets_min": 471 }, { "epoch": 3.168, "grad_norm": 0.8089478184209437, "learning_rate": 2.6691053646535564e-05, "loss": 0.1623, "loss_nan_ranks": 0, "loss_rank_avg": 0.11006303131580353, "step": 1980, "valid_targets_mean": 914.4, "valid_targets_min": 543 }, { "epoch": 3.176, "grad_norm": 0.7090781876298934, "learning_rate": 2.6615802092831446e-05, "loss": 0.1047, "loss_nan_ranks": 0, "loss_rank_avg": 0.12003593146800995, "step": 1985, "valid_targets_mean": 847.3, "valid_targets_min": 509 }, { "epoch": 3.184, "grad_norm": 0.8798620396082656, "learning_rate": 2.6540445223976637e-05, "loss": 0.0974, "loss_nan_ranks": 0, "loss_rank_avg": 0.12840867042541504, "step": 1990, "valid_targets_mean": 868.6, "valid_targets_min": 576 }, { "epoch": 3.192, "grad_norm": 0.7587296343097659, "learning_rate": 2.6464984239556602e-05, "loss": 0.1094, "loss_nan_ranks": 0, "loss_rank_avg": 0.08521630614995956, "step": 1995, "valid_targets_mean": 819.2, "valid_targets_min": 454 }, { "epoch": 3.2, "grad_norm": 0.7323881770409771, "learning_rate": 2.63894203408142e-05, "loss": 0.1192, "loss_nan_ranks": 0, "loss_rank_avg": 0.10320921987295151, "step": 2000, "valid_targets_mean": 838.9, "valid_targets_min": 484 }, { "epoch": 3.208, "grad_norm": 0.7810349295950669, "learning_rate": 2.6313754730630528e-05, "loss": 0.139, "loss_nan_ranks": 0, "loss_rank_avg": 0.12769360840320587, "step": 2005, "valid_targets_mean": 969.1, "valid_targets_min": 577 }, { "epoch": 3.216, "grad_norm": 0.6965562856943036, "learning_rate": 2.623798861350582e-05, "loss": 0.1298, "loss_nan_ranks": 0, "loss_rank_avg": 0.11534813791513443, "step": 2010, "valid_targets_mean": 912.4, "valid_targets_min": 529 }, { "epoch": 3.224, "grad_norm": 0.721081345451744, "learning_rate": 2.6162123195540247e-05, "loss": 0.1231, "loss_nan_ranks": 0, "loss_rank_avg": 0.09381479024887085, "step": 2015, "valid_targets_mean": 704.2, "valid_targets_min": 480 }, { "epoch": 3.232, "grad_norm": 0.8473383362522039, "learning_rate": 2.6086159684414726e-05, "loss": 0.1359, "loss_nan_ranks": 0, "loss_rank_avg": 0.2669636309146881, "step": 2020, "valid_targets_mean": 1619.2, "valid_targets_min": 479 }, { "epoch": 3.24, "grad_norm": 0.688237535569076, "learning_rate": 2.6010099289371694e-05, "loss": 0.1218, "loss_nan_ranks": 0, "loss_rank_avg": 0.11964704096317291, "step": 2025, "valid_targets_mean": 1055.5, "valid_targets_min": 540 }, { "epoch": 3.248, "grad_norm": 0.6929958820942029, "learning_rate": 2.5933943221195844e-05, "loss": 0.1295, "loss_nan_ranks": 0, "loss_rank_avg": 0.08900363743305206, "step": 2030, "valid_targets_mean": 793.6, "valid_targets_min": 396 }, { "epoch": 3.2560000000000002, "grad_norm": 0.7775109141995964, "learning_rate": 2.5857692692194884e-05, "loss": 0.1055, "loss_nan_ranks": 0, "loss_rank_avg": 0.10303100943565369, "step": 2035, "valid_targets_mean": 882.3, "valid_targets_min": 480 }, { "epoch": 3.2640000000000002, "grad_norm": 0.7091217128524674, "learning_rate": 2.5781348916180195e-05, "loss": 0.0998, "loss_nan_ranks": 0, "loss_rank_avg": 0.098441481590271, "step": 2040, "valid_targets_mean": 1006.2, "valid_targets_min": 590 }, { "epoch": 3.2720000000000002, "grad_norm": 0.8897445923890558, "learning_rate": 2.570491310844755e-05, "loss": 0.1389, "loss_nan_ranks": 0, "loss_rank_avg": 0.15889273583889008, "step": 2045, "valid_targets_mean": 1019.2, "valid_targets_min": 509 }, { "epoch": 3.2800000000000002, "grad_norm": 0.7107182186416007, "learning_rate": 2.562838648575774e-05, "loss": 0.118, "loss_nan_ranks": 0, "loss_rank_avg": 0.11557357013225555, "step": 2050, "valid_targets_mean": 1069.4, "valid_targets_min": 546 }, { "epoch": 3.288, "grad_norm": 0.7129277983851455, "learning_rate": 2.5551770266317224e-05, "loss": 0.1252, "loss_nan_ranks": 0, "loss_rank_avg": 0.10616722702980042, "step": 2055, "valid_targets_mean": 885.9, "valid_targets_min": 467 }, { "epoch": 3.296, "grad_norm": 0.677295678050319, "learning_rate": 2.5475065669758713e-05, "loss": 0.1164, "loss_nan_ranks": 0, "loss_rank_avg": 0.08337932825088501, "step": 2060, "valid_targets_mean": 820.1, "valid_targets_min": 400 }, { "epoch": 3.304, "grad_norm": 0.7824949930209248, "learning_rate": 2.5398273917121786e-05, "loss": 0.0995, "loss_nan_ranks": 0, "loss_rank_avg": 0.09587116539478302, "step": 2065, "valid_targets_mean": 799.9, "valid_targets_min": 494 }, { "epoch": 3.312, "grad_norm": 1.0827163695354118, "learning_rate": 2.532139623083342e-05, "loss": 0.1472, "loss_nan_ranks": 0, "loss_rank_avg": 0.23146377503871918, "step": 2070, "valid_targets_mean": 1284.2, "valid_targets_min": 529 }, { "epoch": 3.32, "grad_norm": 0.7876540426113231, "learning_rate": 2.5244433834688552e-05, "loss": 0.1476, "loss_nan_ranks": 0, "loss_rank_avg": 0.2188844531774521, "step": 2075, "valid_targets_mean": 1293.9, "valid_targets_min": 510 }, { "epoch": 3.328, "grad_norm": 0.8039211820025277, "learning_rate": 2.5167387953830602e-05, "loss": 0.1103, "loss_nan_ranks": 0, "loss_rank_avg": 0.1387537121772766, "step": 2080, "valid_targets_mean": 830.8, "valid_targets_min": 394 }, { "epoch": 3.336, "grad_norm": 0.8150464261801589, "learning_rate": 2.5090259814731946e-05, "loss": 0.1188, "loss_nan_ranks": 0, "loss_rank_avg": 0.08860965073108673, "step": 2085, "valid_targets_mean": 765.8, "valid_targets_min": 465 }, { "epoch": 3.344, "grad_norm": 0.7721377677499426, "learning_rate": 2.5013050645174414e-05, "loss": 0.108, "loss_nan_ranks": 0, "loss_rank_avg": 0.12573741376399994, "step": 2090, "valid_targets_mean": 910.7, "valid_targets_min": 562 }, { "epoch": 3.352, "grad_norm": 1.051076408856922, "learning_rate": 2.4935761674229735e-05, "loss": 0.136, "loss_nan_ranks": 0, "loss_rank_avg": 0.17311853170394897, "step": 2095, "valid_targets_mean": 1102.4, "valid_targets_min": 580 }, { "epoch": 3.36, "grad_norm": 0.9010163902542431, "learning_rate": 2.4858394132239982e-05, "loss": 0.1274, "loss_nan_ranks": 0, "loss_rank_avg": 0.15161563456058502, "step": 2100, "valid_targets_mean": 930.6, "valid_targets_min": 486 }, { "epoch": 3.368, "grad_norm": 0.6627204250441622, "learning_rate": 2.4780949250797964e-05, "loss": 0.1309, "loss_nan_ranks": 0, "loss_rank_avg": 0.08458087593317032, "step": 2105, "valid_targets_mean": 834.9, "valid_targets_min": 476 }, { "epoch": 3.376, "grad_norm": 0.83019951899081, "learning_rate": 2.4703428262727656e-05, "loss": 0.1335, "loss_nan_ranks": 0, "loss_rank_avg": 0.10285459458827972, "step": 2110, "valid_targets_mean": 800.4, "valid_targets_min": 549 }, { "epoch": 3.384, "grad_norm": 0.8268436558039793, "learning_rate": 2.4625832402064525e-05, "loss": 0.1005, "loss_nan_ranks": 0, "loss_rank_avg": 0.1025954931974411, "step": 2115, "valid_targets_mean": 846.8, "valid_targets_min": 539 }, { "epoch": 3.392, "grad_norm": 0.8038334993067265, "learning_rate": 2.454816290403595e-05, "loss": 0.1267, "loss_nan_ranks": 0, "loss_rank_avg": 0.11060905456542969, "step": 2120, "valid_targets_mean": 811.8, "valid_targets_min": 516 }, { "epoch": 3.4, "grad_norm": 0.6898907065545627, "learning_rate": 2.4470421005041492e-05, "loss": 0.1041, "loss_nan_ranks": 0, "loss_rank_avg": 0.1327819973230362, "step": 2125, "valid_targets_mean": 1047.2, "valid_targets_min": 522 }, { "epoch": 3.408, "grad_norm": 4.330284104552601, "learning_rate": 2.4392607942633263e-05, "loss": 0.1015, "loss_nan_ranks": 0, "loss_rank_avg": 0.11577026546001434, "step": 2130, "valid_targets_mean": 1057.9, "valid_targets_min": 574 }, { "epoch": 3.416, "grad_norm": 0.678009922465366, "learning_rate": 2.43147249554962e-05, "loss": 0.1616, "loss_nan_ranks": 0, "loss_rank_avg": 0.08846744149923325, "step": 2135, "valid_targets_mean": 794.7, "valid_targets_min": 512 }, { "epoch": 3.424, "grad_norm": 0.7518169977411372, "learning_rate": 2.423677328342835e-05, "loss": 0.1038, "loss_nan_ranks": 0, "loss_rank_avg": 0.09468118846416473, "step": 2140, "valid_targets_mean": 853.1, "valid_targets_min": 485 }, { "epoch": 3.432, "grad_norm": 0.7984169304764213, "learning_rate": 2.415875416732113e-05, "loss": 0.1174, "loss_nan_ranks": 0, "loss_rank_avg": 0.1375335156917572, "step": 2145, "valid_targets_mean": 894.8, "valid_targets_min": 535 }, { "epoch": 3.44, "grad_norm": 0.8317134998385687, "learning_rate": 2.4080668849139603e-05, "loss": 0.118, "loss_nan_ranks": 0, "loss_rank_avg": 0.11628472805023193, "step": 2150, "valid_targets_mean": 955.4, "valid_targets_min": 516 }, { "epoch": 3.448, "grad_norm": 0.8125522664200509, "learning_rate": 2.4002518571902665e-05, "loss": 0.1242, "loss_nan_ranks": 0, "loss_rank_avg": 0.1283692717552185, "step": 2155, "valid_targets_mean": 873.8, "valid_targets_min": 444 }, { "epoch": 3.456, "grad_norm": 0.9200367233165645, "learning_rate": 2.392430457966328e-05, "loss": 0.1109, "loss_nan_ranks": 0, "loss_rank_avg": 0.1673322319984436, "step": 2160, "valid_targets_mean": 972.5, "valid_targets_min": 453 }, { "epoch": 3.464, "grad_norm": 0.7357592949714992, "learning_rate": 2.3846028117488686e-05, "loss": 0.1079, "loss_nan_ranks": 0, "loss_rank_avg": 0.1122480109333992, "step": 2165, "valid_targets_mean": 1010.8, "valid_targets_min": 563 }, { "epoch": 3.472, "grad_norm": 0.720650181938497, "learning_rate": 2.3767690431440533e-05, "loss": 0.1212, "loss_nan_ranks": 0, "loss_rank_avg": 0.09994740039110184, "step": 2170, "valid_targets_mean": 817.9, "valid_targets_min": 427 }, { "epoch": 3.48, "grad_norm": 0.823217288866293, "learning_rate": 2.368929276855512e-05, "loss": 0.1106, "loss_nan_ranks": 0, "loss_rank_avg": 0.14170852303504944, "step": 2175, "valid_targets_mean": 876.6, "valid_targets_min": 544 }, { "epoch": 3.488, "grad_norm": 1.058760731084061, "learning_rate": 2.361083637682347e-05, "loss": 0.132, "loss_nan_ranks": 0, "loss_rank_avg": 0.10451669245958328, "step": 2180, "valid_targets_mean": 871.3, "valid_targets_min": 508 }, { "epoch": 3.496, "grad_norm": 0.7717649786690791, "learning_rate": 2.3532322505171502e-05, "loss": 0.1136, "loss_nan_ranks": 0, "loss_rank_avg": 0.11375638842582703, "step": 2185, "valid_targets_mean": 828.2, "valid_targets_min": 519 }, { "epoch": 3.504, "grad_norm": 0.7183572277157173, "learning_rate": 2.3453752403440147e-05, "loss": 0.139, "loss_nan_ranks": 0, "loss_rank_avg": 0.1301645189523697, "step": 2190, "valid_targets_mean": 972.3, "valid_targets_min": 424 }, { "epoch": 3.512, "grad_norm": 0.7728123534193143, "learning_rate": 2.337512732236545e-05, "loss": 0.1058, "loss_nan_ranks": 0, "loss_rank_avg": 0.10168056190013885, "step": 2195, "valid_targets_mean": 756.6, "valid_targets_min": 398 }, { "epoch": 3.52, "grad_norm": 1.4339255219311247, "learning_rate": 2.3296448513558628e-05, "loss": 0.1062, "loss_nan_ranks": 0, "loss_rank_avg": 0.09760090708732605, "step": 2200, "valid_targets_mean": 897.6, "valid_targets_min": 522 }, { "epoch": 3.528, "grad_norm": 0.6868233777905957, "learning_rate": 2.321771722948622e-05, "loss": 0.1484, "loss_nan_ranks": 0, "loss_rank_avg": 0.1449117660522461, "step": 2205, "valid_targets_mean": 1630.0, "valid_targets_min": 536 }, { "epoch": 3.536, "grad_norm": 0.6806906401608229, "learning_rate": 2.3138934723450074e-05, "loss": 0.1165, "loss_nan_ranks": 0, "loss_rank_avg": 0.08565478026866913, "step": 2210, "valid_targets_mean": 856.7, "valid_targets_min": 495 }, { "epoch": 3.544, "grad_norm": 0.7627319393583615, "learning_rate": 2.306010224956744e-05, "loss": 0.1205, "loss_nan_ranks": 0, "loss_rank_avg": 0.1330558806657791, "step": 2215, "valid_targets_mean": 999.3, "valid_targets_min": 535 }, { "epoch": 3.552, "grad_norm": 0.8247596543816615, "learning_rate": 2.2981221062750986e-05, "loss": 0.1604, "loss_nan_ranks": 0, "loss_rank_avg": 0.14138635993003845, "step": 2220, "valid_targets_mean": 964.9, "valid_targets_min": 532 }, { "epoch": 3.56, "grad_norm": 0.7095954426144264, "learning_rate": 2.290229241868882e-05, "loss": 0.1188, "loss_nan_ranks": 0, "loss_rank_avg": 0.10400180518627167, "step": 2225, "valid_targets_mean": 905.8, "valid_targets_min": 494 }, { "epoch": 3.568, "grad_norm": 0.7841252716092892, "learning_rate": 2.282331757382454e-05, "loss": 0.1637, "loss_nan_ranks": 0, "loss_rank_avg": 0.12375243008136749, "step": 2230, "valid_targets_mean": 1084.1, "valid_targets_min": 521 }, { "epoch": 3.576, "grad_norm": 0.6258147218913532, "learning_rate": 2.2744297785337155e-05, "loss": 0.1236, "loss_nan_ranks": 0, "loss_rank_avg": 0.08287594467401505, "step": 2235, "valid_targets_mean": 893.2, "valid_targets_min": 594 }, { "epoch": 3.584, "grad_norm": 0.7826941670558715, "learning_rate": 2.2665234311121155e-05, "loss": 0.1209, "loss_nan_ranks": 0, "loss_rank_avg": 0.11382591724395752, "step": 2240, "valid_targets_mean": 926.1, "valid_targets_min": 562 }, { "epoch": 3.592, "grad_norm": 0.7325008965964993, "learning_rate": 2.258612840976645e-05, "loss": 0.1066, "loss_nan_ranks": 0, "loss_rank_avg": 0.09644539654254913, "step": 2245, "valid_targets_mean": 766.8, "valid_targets_min": 380 }, { "epoch": 3.6, "grad_norm": 0.682578876278063, "learning_rate": 2.2506981340538315e-05, "loss": 0.1069, "loss_nan_ranks": 0, "loss_rank_avg": 0.09101128578186035, "step": 2250, "valid_targets_mean": 852.6, "valid_targets_min": 505 }, { "epoch": 3.608, "grad_norm": 0.5290399948663445, "learning_rate": 2.2427794363357384e-05, "loss": 0.1158, "loss_nan_ranks": 0, "loss_rank_avg": 0.11653144657611847, "step": 2255, "valid_targets_mean": 2054.8, "valid_targets_min": 523 }, { "epoch": 3.616, "grad_norm": 0.7175384669599434, "learning_rate": 2.2348568738779566e-05, "loss": 0.1301, "loss_nan_ranks": 0, "loss_rank_avg": 0.10867810249328613, "step": 2260, "valid_targets_mean": 813.8, "valid_targets_min": 530 }, { "epoch": 3.624, "grad_norm": 0.8469177584073655, "learning_rate": 2.2269305727975993e-05, "loss": 0.1317, "loss_nan_ranks": 0, "loss_rank_avg": 0.14089643955230713, "step": 2265, "valid_targets_mean": 977.0, "valid_targets_min": 517 }, { "epoch": 3.632, "grad_norm": 0.7775610010446571, "learning_rate": 2.2190006592712927e-05, "loss": 0.1193, "loss_nan_ranks": 0, "loss_rank_avg": 0.13051855564117432, "step": 2270, "valid_targets_mean": 1009.5, "valid_targets_min": 492 }, { "epoch": 3.64, "grad_norm": 0.7374918588799454, "learning_rate": 2.2110672595331698e-05, "loss": 0.1192, "loss_nan_ranks": 0, "loss_rank_avg": 0.09962435066699982, "step": 2275, "valid_targets_mean": 892.7, "valid_targets_min": 450 }, { "epoch": 3.648, "grad_norm": 0.7705278585713413, "learning_rate": 2.2031304998728587e-05, "loss": 0.1043, "loss_nan_ranks": 0, "loss_rank_avg": 0.11635805666446686, "step": 2280, "valid_targets_mean": 872.3, "valid_targets_min": 533 }, { "epoch": 3.656, "grad_norm": 0.7795818552117938, "learning_rate": 2.1951905066334737e-05, "loss": 0.103, "loss_nan_ranks": 0, "loss_rank_avg": 0.10012821108102798, "step": 2285, "valid_targets_mean": 758.8, "valid_targets_min": 481 }, { "epoch": 3.664, "grad_norm": 4.9483858153946025, "learning_rate": 2.1872474062096046e-05, "loss": 0.1076, "loss_nan_ranks": 0, "loss_rank_avg": 0.09752249717712402, "step": 2290, "valid_targets_mean": 850.1, "valid_targets_min": 502 }, { "epoch": 3.672, "grad_norm": 0.8700115929950681, "learning_rate": 2.179301325045301e-05, "loss": 0.1273, "loss_nan_ranks": 0, "loss_rank_avg": 0.19074232876300812, "step": 2295, "valid_targets_mean": 1016.4, "valid_targets_min": 487 }, { "epoch": 3.68, "grad_norm": 1.5334691662476867, "learning_rate": 2.1713523896320647e-05, "loss": 0.1042, "loss_nan_ranks": 0, "loss_rank_avg": 0.08753467351198196, "step": 2300, "valid_targets_mean": 775.1, "valid_targets_min": 530 }, { "epoch": 3.6879999999999997, "grad_norm": 0.7194518070589961, "learning_rate": 2.163400726506832e-05, "loss": 0.1049, "loss_nan_ranks": 0, "loss_rank_avg": 0.08830483257770538, "step": 2305, "valid_targets_mean": 729.5, "valid_targets_min": 443 }, { "epoch": 3.6959999999999997, "grad_norm": 0.8815939342282404, "learning_rate": 2.155446462249961e-05, "loss": 0.1391, "loss_nan_ranks": 0, "loss_rank_avg": 0.1877199411392212, "step": 2310, "valid_targets_mean": 1014.9, "valid_targets_min": 467 }, { "epoch": 3.7039999999999997, "grad_norm": 0.7630816705563278, "learning_rate": 2.147489723483217e-05, "loss": 0.1023, "loss_nan_ranks": 0, "loss_rank_avg": 0.10068291425704956, "step": 2315, "valid_targets_mean": 772.1, "valid_targets_min": 518 }, { "epoch": 3.7119999999999997, "grad_norm": 0.7382517531107952, "learning_rate": 2.139530636867757e-05, "loss": 0.1038, "loss_nan_ranks": 0, "loss_rank_avg": 0.097693532705307, "step": 2320, "valid_targets_mean": 845.9, "valid_targets_min": 515 }, { "epoch": 3.7199999999999998, "grad_norm": 0.6952681638720215, "learning_rate": 2.1315693291021114e-05, "loss": 0.1044, "loss_nan_ranks": 0, "loss_rank_avg": 0.09656163305044174, "step": 2325, "valid_targets_mean": 779.2, "valid_targets_min": 501 }, { "epoch": 3.7279999999999998, "grad_norm": 0.7011125947204983, "learning_rate": 2.1236059269201686e-05, "loss": 0.0912, "loss_nan_ranks": 0, "loss_rank_avg": 0.08665567636489868, "step": 2330, "valid_targets_mean": 903.8, "valid_targets_min": 541 }, { "epoch": 3.7359999999999998, "grad_norm": 0.7596491570380602, "learning_rate": 2.1156405570891584e-05, "loss": 0.106, "loss_nan_ranks": 0, "loss_rank_avg": 0.16104748845100403, "step": 2335, "valid_targets_mean": 1337.7, "valid_targets_min": 515 }, { "epoch": 3.7439999999999998, "grad_norm": 0.7837544123142385, "learning_rate": 2.1076733464076322e-05, "loss": 0.1224, "loss_nan_ranks": 0, "loss_rank_avg": 0.10330668836832047, "step": 2340, "valid_targets_mean": 799.4, "valid_targets_min": 571 }, { "epoch": 3.752, "grad_norm": 0.7653711981254095, "learning_rate": 2.0997044217034462e-05, "loss": 0.1066, "loss_nan_ranks": 0, "loss_rank_avg": 0.1075320616364479, "step": 2345, "valid_targets_mean": 756.2, "valid_targets_min": 473 }, { "epoch": 3.76, "grad_norm": 0.8264468923738882, "learning_rate": 2.0917339098317405e-05, "loss": 0.1316, "loss_nan_ranks": 0, "loss_rank_avg": 0.1973426342010498, "step": 2350, "valid_targets_mean": 1276.9, "valid_targets_min": 445 }, { "epoch": 3.768, "grad_norm": 0.7585885229626271, "learning_rate": 2.083761937672922e-05, "loss": 0.1064, "loss_nan_ranks": 0, "loss_rank_avg": 0.09834052622318268, "step": 2355, "valid_targets_mean": 793.8, "valid_targets_min": 424 }, { "epoch": 3.776, "grad_norm": 0.743265105231493, "learning_rate": 2.0757886321306433e-05, "loss": 0.1122, "loss_nan_ranks": 0, "loss_rank_avg": 0.10782724618911743, "step": 2360, "valid_targets_mean": 869.9, "valid_targets_min": 541 }, { "epoch": 3.784, "grad_norm": 0.8016117024296018, "learning_rate": 2.0678141201297827e-05, "loss": 0.11, "loss_nan_ranks": 0, "loss_rank_avg": 0.12636038661003113, "step": 2365, "valid_targets_mean": 1038.0, "valid_targets_min": 537 }, { "epoch": 3.792, "grad_norm": 0.7710823001060885, "learning_rate": 2.059838528614423e-05, "loss": 0.1223, "loss_nan_ranks": 0, "loss_rank_avg": 0.10119035840034485, "step": 2370, "valid_targets_mean": 893.8, "valid_targets_min": 504 }, { "epoch": 3.8, "grad_norm": 0.7460626197721942, "learning_rate": 2.0518619845458322e-05, "loss": 0.1604, "loss_nan_ranks": 0, "loss_rank_avg": 0.10346980392932892, "step": 2375, "valid_targets_mean": 807.0, "valid_targets_min": 515 }, { "epoch": 3.808, "grad_norm": 0.8123087104330138, "learning_rate": 2.0438846149004426e-05, "loss": 0.144, "loss_nan_ranks": 0, "loss_rank_avg": 0.1365559697151184, "step": 2380, "valid_targets_mean": 992.1, "valid_targets_min": 548 }, { "epoch": 3.816, "grad_norm": 0.7412259924373668, "learning_rate": 2.0359065466678268e-05, "loss": 0.0945, "loss_nan_ranks": 0, "loss_rank_avg": 0.09502339363098145, "step": 2385, "valid_targets_mean": 788.4, "valid_targets_min": 451 }, { "epoch": 3.824, "grad_norm": 0.73069039856101, "learning_rate": 2.0279279068486795e-05, "loss": 0.1077, "loss_nan_ranks": 0, "loss_rank_avg": 0.1051429808139801, "step": 2390, "valid_targets_mean": 861.6, "valid_targets_min": 557 }, { "epoch": 3.832, "grad_norm": 0.6985213047175839, "learning_rate": 2.019948822452794e-05, "loss": 0.1115, "loss_nan_ranks": 0, "loss_rank_avg": 0.09317454695701599, "step": 2395, "valid_targets_mean": 854.2, "valid_targets_min": 503 }, { "epoch": 3.84, "grad_norm": 0.6558227489154802, "learning_rate": 2.0119694204970393e-05, "loss": 0.1105, "loss_nan_ranks": 0, "loss_rank_avg": 0.12951532006263733, "step": 2400, "valid_targets_mean": 1170.6, "valid_targets_min": 537 }, { "epoch": 3.848, "grad_norm": 0.7244860226360714, "learning_rate": 2.0039898280033414e-05, "loss": 0.1045, "loss_nan_ranks": 0, "loss_rank_avg": 0.10344484448432922, "step": 2405, "valid_targets_mean": 888.8, "valid_targets_min": 518 }, { "epoch": 3.856, "grad_norm": 0.6853248190879865, "learning_rate": 1.9960101719966592e-05, "loss": 0.1229, "loss_nan_ranks": 0, "loss_rank_avg": 0.12488465756177902, "step": 2410, "valid_targets_mean": 1122.1, "valid_targets_min": 458 }, { "epoch": 3.864, "grad_norm": 0.6472647160190197, "learning_rate": 1.9880305795029617e-05, "loss": 0.0977, "loss_nan_ranks": 0, "loss_rank_avg": 0.10344689339399338, "step": 2415, "valid_targets_mean": 1005.9, "valid_targets_min": 645 }, { "epoch": 3.872, "grad_norm": 0.7842785828649766, "learning_rate": 1.980051177547207e-05, "loss": 0.1287, "loss_nan_ranks": 0, "loss_rank_avg": 0.10108557343482971, "step": 2420, "valid_targets_mean": 900.6, "valid_targets_min": 462 }, { "epoch": 3.88, "grad_norm": 0.8140599499363703, "learning_rate": 1.9720720931513212e-05, "loss": 0.1084, "loss_nan_ranks": 0, "loss_rank_avg": 0.12927132844924927, "step": 2425, "valid_targets_mean": 966.9, "valid_targets_min": 506 }, { "epoch": 3.888, "grad_norm": 0.7386140762653786, "learning_rate": 1.9640934533321735e-05, "loss": 0.1165, "loss_nan_ranks": 0, "loss_rank_avg": 0.11146294325590134, "step": 2430, "valid_targets_mean": 841.1, "valid_targets_min": 495 }, { "epoch": 3.896, "grad_norm": 0.9449195329909676, "learning_rate": 1.9561153850995577e-05, "loss": 0.1333, "loss_nan_ranks": 0, "loss_rank_avg": 0.1926388293504715, "step": 2435, "valid_targets_mean": 1120.4, "valid_targets_min": 457 }, { "epoch": 3.904, "grad_norm": 0.7623301589954754, "learning_rate": 1.948138015454168e-05, "loss": 0.1303, "loss_nan_ranks": 0, "loss_rank_avg": 0.1025683581829071, "step": 2440, "valid_targets_mean": 899.2, "valid_targets_min": 505 }, { "epoch": 3.912, "grad_norm": 0.8466799342788941, "learning_rate": 1.9401614713855775e-05, "loss": 0.114, "loss_nan_ranks": 0, "loss_rank_avg": 0.1362592577934265, "step": 2445, "valid_targets_mean": 1037.2, "valid_targets_min": 554 }, { "epoch": 3.92, "grad_norm": 1.5796336446869077, "learning_rate": 1.932185879870218e-05, "loss": 0.1257, "loss_nan_ranks": 0, "loss_rank_avg": 0.12985458970069885, "step": 2450, "valid_targets_mean": 1039.4, "valid_targets_min": 490 }, { "epoch": 3.928, "grad_norm": 0.829075498076157, "learning_rate": 1.924211367869357e-05, "loss": 0.1156, "loss_nan_ranks": 0, "loss_rank_avg": 0.11858369410037994, "step": 2455, "valid_targets_mean": 892.6, "valid_targets_min": 438 }, { "epoch": 3.936, "grad_norm": 1.0985300935297633, "learning_rate": 1.9162380623270783e-05, "loss": 0.1139, "loss_nan_ranks": 0, "loss_rank_avg": 0.12260019034147263, "step": 2460, "valid_targets_mean": 891.1, "valid_targets_min": 494 }, { "epoch": 3.944, "grad_norm": 0.8323078229026958, "learning_rate": 1.90826609016826e-05, "loss": 0.1101, "loss_nan_ranks": 0, "loss_rank_avg": 0.1549152135848999, "step": 2465, "valid_targets_mean": 966.0, "valid_targets_min": 492 }, { "epoch": 3.952, "grad_norm": 0.7646826806626748, "learning_rate": 1.9002955782965548e-05, "loss": 0.1011, "loss_nan_ranks": 0, "loss_rank_avg": 0.10078634321689606, "step": 2470, "valid_targets_mean": 918.1, "valid_targets_min": 563 }, { "epoch": 3.96, "grad_norm": 0.9739262577751554, "learning_rate": 1.8923266535923688e-05, "loss": 0.1162, "loss_nan_ranks": 0, "loss_rank_avg": 0.15166492760181427, "step": 2475, "valid_targets_mean": 941.0, "valid_targets_min": 401 }, { "epoch": 3.968, "grad_norm": 0.7603308818023086, "learning_rate": 1.8843594429108426e-05, "loss": 0.1379, "loss_nan_ranks": 0, "loss_rank_avg": 0.16866816580295563, "step": 2480, "valid_targets_mean": 1221.4, "valid_targets_min": 566 }, { "epoch": 3.976, "grad_norm": 0.7914420062717339, "learning_rate": 1.8763940730798324e-05, "loss": 0.1063, "loss_nan_ranks": 0, "loss_rank_avg": 0.1709909737110138, "step": 2485, "valid_targets_mean": 1715.1, "valid_targets_min": 479 }, { "epoch": 3.984, "grad_norm": 0.6954145735909032, "learning_rate": 1.8684306708978896e-05, "loss": 0.129, "loss_nan_ranks": 0, "loss_rank_avg": 0.0953080952167511, "step": 2490, "valid_targets_mean": 783.3, "valid_targets_min": 533 }, { "epoch": 3.992, "grad_norm": 0.796723667327091, "learning_rate": 1.8604693631322433e-05, "loss": 0.1194, "loss_nan_ranks": 0, "loss_rank_avg": 0.1432575285434723, "step": 2495, "valid_targets_mean": 931.0, "valid_targets_min": 499 }, { "epoch": 4.0, "grad_norm": 0.7452630637139206, "learning_rate": 1.852510276516783e-05, "loss": 0.107, "loss_nan_ranks": 0, "loss_rank_avg": 0.09386100620031357, "step": 2500, "valid_targets_mean": 808.5, "valid_targets_min": 525 }, { "epoch": 4.008, "grad_norm": 0.6039705377236406, "learning_rate": 1.8445535377500393e-05, "loss": 0.0829, "loss_nan_ranks": 0, "loss_rank_avg": 0.07107095420360565, "step": 2505, "valid_targets_mean": 872.4, "valid_targets_min": 557 }, { "epoch": 4.016, "grad_norm": 0.7217484500470416, "learning_rate": 1.8365992734931686e-05, "loss": 0.0826, "loss_nan_ranks": 0, "loss_rank_avg": 0.06790906190872192, "step": 2510, "valid_targets_mean": 872.8, "valid_targets_min": 593 }, { "epoch": 4.024, "grad_norm": 0.9824074436234963, "learning_rate": 1.8286476103679356e-05, "loss": 0.0941, "loss_nan_ranks": 0, "loss_rank_avg": 0.0929323211312294, "step": 2515, "valid_targets_mean": 864.1, "valid_targets_min": 487 }, { "epoch": 4.032, "grad_norm": 1.0904545035112219, "learning_rate": 1.8206986749546992e-05, "loss": 0.1004, "loss_nan_ranks": 0, "loss_rank_avg": 0.07187691330909729, "step": 2520, "valid_targets_mean": 899.8, "valid_targets_min": 523 }, { "epoch": 4.04, "grad_norm": 0.6390439813848777, "learning_rate": 1.8127525937903957e-05, "loss": 0.0945, "loss_nan_ranks": 0, "loss_rank_avg": 0.06667788326740265, "step": 2525, "valid_targets_mean": 1012.2, "valid_targets_min": 484 }, { "epoch": 4.048, "grad_norm": 0.8624151009076726, "learning_rate": 1.8048094933665262e-05, "loss": 0.0796, "loss_nan_ranks": 0, "loss_rank_avg": 0.07868341356515884, "step": 2530, "valid_targets_mean": 920.8, "valid_targets_min": 480 }, { "epoch": 4.056, "grad_norm": 0.9692779244154147, "learning_rate": 1.7968695001271416e-05, "loss": 0.0808, "loss_nan_ranks": 0, "loss_rank_avg": 0.06797877699136734, "step": 2535, "valid_targets_mean": 882.9, "valid_targets_min": 538 }, { "epoch": 4.064, "grad_norm": 0.7879509422386841, "learning_rate": 1.7889327404668316e-05, "loss": 0.0889, "loss_nan_ranks": 0, "loss_rank_avg": 0.06811286509037018, "step": 2540, "valid_targets_mean": 657.0, "valid_targets_min": 417 }, { "epoch": 4.072, "grad_norm": 0.8343540443495483, "learning_rate": 1.7809993407287083e-05, "loss": 0.1143, "loss_nan_ranks": 0, "loss_rank_avg": 0.0867060124874115, "step": 2545, "valid_targets_mean": 962.3, "valid_targets_min": 400 }, { "epoch": 4.08, "grad_norm": 0.6517667540067708, "learning_rate": 1.7730694272024018e-05, "loss": 0.0755, "loss_nan_ranks": 0, "loss_rank_avg": 0.07166572660207748, "step": 2550, "valid_targets_mean": 792.6, "valid_targets_min": 425 }, { "epoch": 4.088, "grad_norm": 0.9409298974066815, "learning_rate": 1.765143126122044e-05, "loss": 0.0808, "loss_nan_ranks": 0, "loss_rank_avg": 0.07075785845518112, "step": 2555, "valid_targets_mean": 701.7, "valid_targets_min": 492 }, { "epoch": 4.096, "grad_norm": 0.9303683290460381, "learning_rate": 1.7572205636642622e-05, "loss": 0.0924, "loss_nan_ranks": 0, "loss_rank_avg": 0.077401302754879, "step": 2560, "valid_targets_mean": 734.2, "valid_targets_min": 446 }, { "epoch": 4.104, "grad_norm": 0.8323596502058777, "learning_rate": 1.749301865946169e-05, "loss": 0.1043, "loss_nan_ranks": 0, "loss_rank_avg": 0.07804550230503082, "step": 2565, "valid_targets_mean": 860.5, "valid_targets_min": 542 }, { "epoch": 4.112, "grad_norm": 0.8559619720793094, "learning_rate": 1.7413871590233557e-05, "loss": 0.0925, "loss_nan_ranks": 0, "loss_rank_avg": 0.0837281197309494, "step": 2570, "valid_targets_mean": 873.4, "valid_targets_min": 520 }, { "epoch": 4.12, "grad_norm": 0.7121674580406296, "learning_rate": 1.7334765688878848e-05, "loss": 0.0933, "loss_nan_ranks": 0, "loss_rank_avg": 0.06879082322120667, "step": 2575, "valid_targets_mean": 798.9, "valid_targets_min": 548 }, { "epoch": 4.128, "grad_norm": 0.9971120495500967, "learning_rate": 1.7255702214662852e-05, "loss": 0.086, "loss_nan_ranks": 0, "loss_rank_avg": 0.09851626306772232, "step": 2580, "valid_targets_mean": 781.4, "valid_targets_min": 365 }, { "epoch": 4.136, "grad_norm": 0.9219539955019028, "learning_rate": 1.7176682426175468e-05, "loss": 0.1081, "loss_nan_ranks": 0, "loss_rank_avg": 0.0875345915555954, "step": 2585, "valid_targets_mean": 974.8, "valid_targets_min": 510 }, { "epoch": 4.144, "grad_norm": 1.2717092381532147, "learning_rate": 1.709770758131118e-05, "loss": 0.0926, "loss_nan_ranks": 0, "loss_rank_avg": 0.1317518949508667, "step": 2590, "valid_targets_mean": 1149.6, "valid_targets_min": 469 }, { "epoch": 4.152, "grad_norm": 0.8800123242601117, "learning_rate": 1.7018778937249017e-05, "loss": 0.0876, "loss_nan_ranks": 0, "loss_rank_avg": 0.10916811227798462, "step": 2595, "valid_targets_mean": 989.7, "valid_targets_min": 657 }, { "epoch": 4.16, "grad_norm": 0.9112036604014394, "learning_rate": 1.6939897750432562e-05, "loss": 0.098, "loss_nan_ranks": 0, "loss_rank_avg": 0.09577897936105728, "step": 2600, "valid_targets_mean": 955.5, "valid_targets_min": 566 }, { "epoch": 4.168, "grad_norm": 0.8387591446603542, "learning_rate": 1.6861065276549933e-05, "loss": 0.0936, "loss_nan_ranks": 0, "loss_rank_avg": 0.07986228913068771, "step": 2605, "valid_targets_mean": 803.5, "valid_targets_min": 542 }, { "epoch": 4.176, "grad_norm": 0.7204257329644403, "learning_rate": 1.6782282770513788e-05, "loss": 0.098, "loss_nan_ranks": 0, "loss_rank_avg": 0.07463609427213669, "step": 2610, "valid_targets_mean": 854.4, "valid_targets_min": 462 }, { "epoch": 4.184, "grad_norm": 0.8612927199672735, "learning_rate": 1.6703551486441382e-05, "loss": 0.1031, "loss_nan_ranks": 0, "loss_rank_avg": 0.11520431935787201, "step": 2615, "valid_targets_mean": 975.1, "valid_targets_min": 458 }, { "epoch": 4.192, "grad_norm": 0.8664320213490005, "learning_rate": 1.6624872677634565e-05, "loss": 0.0913, "loss_nan_ranks": 0, "loss_rank_avg": 0.08751243352890015, "step": 2620, "valid_targets_mean": 972.4, "valid_targets_min": 581 }, { "epoch": 4.2, "grad_norm": 0.9681275417079883, "learning_rate": 1.654624759655986e-05, "loss": 0.0984, "loss_nan_ranks": 0, "loss_rank_avg": 0.09813649952411652, "step": 2625, "valid_targets_mean": 886.8, "valid_targets_min": 522 }, { "epoch": 4.208, "grad_norm": 1.0407521644002597, "learning_rate": 1.64676774948285e-05, "loss": 0.0926, "loss_nan_ranks": 0, "loss_rank_avg": 0.09159094095230103, "step": 2630, "valid_targets_mean": 846.4, "valid_targets_min": 513 }, { "epoch": 4.216, "grad_norm": 0.7173554015192248, "learning_rate": 1.6389163623176536e-05, "loss": 0.1215, "loss_nan_ranks": 0, "loss_rank_avg": 0.0797024741768837, "step": 2635, "valid_targets_mean": 901.9, "valid_targets_min": 566 }, { "epoch": 4.224, "grad_norm": 0.8592769401558998, "learning_rate": 1.6310707231444884e-05, "loss": 0.085, "loss_nan_ranks": 0, "loss_rank_avg": 0.07951955497264862, "step": 2640, "valid_targets_mean": 827.6, "valid_targets_min": 549 }, { "epoch": 4.232, "grad_norm": 0.8306119399846523, "learning_rate": 1.623230956855947e-05, "loss": 0.0723, "loss_nan_ranks": 0, "loss_rank_avg": 0.09050008654594421, "step": 2645, "valid_targets_mean": 761.6, "valid_targets_min": 458 }, { "epoch": 4.24, "grad_norm": 0.7665148049485115, "learning_rate": 1.6153971882511324e-05, "loss": 0.079, "loss_nan_ranks": 0, "loss_rank_avg": 0.07178527861833572, "step": 2650, "valid_targets_mean": 760.4, "valid_targets_min": 448 }, { "epoch": 4.248, "grad_norm": 0.7692520199355617, "learning_rate": 1.6075695420336724e-05, "loss": 0.1043, "loss_nan_ranks": 0, "loss_rank_avg": 0.07028958946466446, "step": 2655, "valid_targets_mean": 810.2, "valid_targets_min": 554 }, { "epoch": 4.256, "grad_norm": 0.7316939881648934, "learning_rate": 1.5997481428097338e-05, "loss": 0.1044, "loss_nan_ranks": 0, "loss_rank_avg": 0.13221722841262817, "step": 2660, "valid_targets_mean": 1782.2, "valid_targets_min": 499 }, { "epoch": 4.264, "grad_norm": 0.8770025162268091, "learning_rate": 1.5919331150860396e-05, "loss": 0.1006, "loss_nan_ranks": 0, "loss_rank_avg": 0.1358993798494339, "step": 2665, "valid_targets_mean": 1218.9, "valid_targets_min": 529 }, { "epoch": 4.272, "grad_norm": 0.746194385306678, "learning_rate": 1.5841245832678873e-05, "loss": 0.0975, "loss_nan_ranks": 0, "loss_rank_avg": 0.080272376537323, "step": 2670, "valid_targets_mean": 788.8, "valid_targets_min": 535 }, { "epoch": 4.28, "grad_norm": 0.7789493922430735, "learning_rate": 1.576322671657166e-05, "loss": 0.0842, "loss_nan_ranks": 0, "loss_rank_avg": 0.07402622699737549, "step": 2675, "valid_targets_mean": 819.1, "valid_targets_min": 484 }, { "epoch": 4.288, "grad_norm": 0.766158481713601, "learning_rate": 1.5685275044503804e-05, "loss": 0.0954, "loss_nan_ranks": 0, "loss_rank_avg": 0.0761728435754776, "step": 2680, "valid_targets_mean": 1043.2, "valid_targets_min": 512 }, { "epoch": 4.296, "grad_norm": 0.9738329205245164, "learning_rate": 1.560739205736674e-05, "loss": 0.0907, "loss_nan_ranks": 0, "loss_rank_avg": 0.08560563623905182, "step": 2685, "valid_targets_mean": 881.9, "valid_targets_min": 507 }, { "epoch": 4.304, "grad_norm": 0.917401924751551, "learning_rate": 1.552957899495851e-05, "loss": 0.1094, "loss_nan_ranks": 0, "loss_rank_avg": 0.15787991881370544, "step": 2690, "valid_targets_mean": 976.4, "valid_targets_min": 487 }, { "epoch": 4.312, "grad_norm": 0.8916162194334754, "learning_rate": 1.5451837095964054e-05, "loss": 0.0902, "loss_nan_ranks": 0, "loss_rank_avg": 0.09921112656593323, "step": 2695, "valid_targets_mean": 1003.8, "valid_targets_min": 596 }, { "epoch": 4.32, "grad_norm": 0.8708766115420277, "learning_rate": 1.5374167597935478e-05, "loss": 0.1017, "loss_nan_ranks": 0, "loss_rank_avg": 0.08169998228549957, "step": 2700, "valid_targets_mean": 932.9, "valid_targets_min": 484 }, { "epoch": 4.328, "grad_norm": 0.776023741893538, "learning_rate": 1.5296571737272354e-05, "loss": 0.0734, "loss_nan_ranks": 0, "loss_rank_avg": 0.08336486667394638, "step": 2705, "valid_targets_mean": 815.6, "valid_targets_min": 445 }, { "epoch": 4.336, "grad_norm": 0.9542252355636845, "learning_rate": 1.5219050749202037e-05, "loss": 0.1095, "loss_nan_ranks": 0, "loss_rank_avg": 0.19591785967350006, "step": 2710, "valid_targets_mean": 1389.9, "valid_targets_min": 593 }, { "epoch": 4.344, "grad_norm": 0.8864125490442216, "learning_rate": 1.5141605867760021e-05, "loss": 0.0798, "loss_nan_ranks": 0, "loss_rank_avg": 0.08587196469306946, "step": 2715, "valid_targets_mean": 822.1, "valid_targets_min": 478 }, { "epoch": 4.352, "grad_norm": 0.7527185671619987, "learning_rate": 1.5064238325770267e-05, "loss": 0.0789, "loss_nan_ranks": 0, "loss_rank_avg": 0.07073096930980682, "step": 2720, "valid_targets_mean": 861.9, "valid_targets_min": 504 }, { "epoch": 4.36, "grad_norm": 0.7512586587741376, "learning_rate": 1.498694935482559e-05, "loss": 0.0902, "loss_nan_ranks": 0, "loss_rank_avg": 0.07099224627017975, "step": 2725, "valid_targets_mean": 812.6, "valid_targets_min": 440 }, { "epoch": 4.368, "grad_norm": 0.9405061164636517, "learning_rate": 1.4909740185268056e-05, "loss": 0.1134, "loss_nan_ranks": 0, "loss_rank_avg": 0.09878502786159515, "step": 2730, "valid_targets_mean": 839.1, "valid_targets_min": 540 }, { "epoch": 4.376, "grad_norm": 0.9052795228667068, "learning_rate": 1.4832612046169408e-05, "loss": 0.0889, "loss_nan_ranks": 0, "loss_rank_avg": 0.11052730679512024, "step": 2735, "valid_targets_mean": 847.9, "valid_targets_min": 443 }, { "epoch": 4.384, "grad_norm": 0.7849642630781102, "learning_rate": 1.4755566165311455e-05, "loss": 0.0998, "loss_nan_ranks": 0, "loss_rank_avg": 0.08098804950714111, "step": 2740, "valid_targets_mean": 779.3, "valid_targets_min": 467 }, { "epoch": 4.392, "grad_norm": 0.9370098909939654, "learning_rate": 1.4678603769166591e-05, "loss": 0.0965, "loss_nan_ranks": 0, "loss_rank_avg": 0.10258492827415466, "step": 2745, "valid_targets_mean": 838.2, "valid_targets_min": 468 }, { "epoch": 4.4, "grad_norm": 0.6916768442414201, "learning_rate": 1.4601726082878226e-05, "loss": 0.099, "loss_nan_ranks": 0, "loss_rank_avg": 0.062106356024742126, "step": 2750, "valid_targets_mean": 851.3, "valid_targets_min": 557 }, { "epoch": 4.408, "grad_norm": 0.6729245043658673, "learning_rate": 1.4524934330241292e-05, "loss": 0.0953, "loss_nan_ranks": 0, "loss_rank_avg": 0.06780697405338287, "step": 2755, "valid_targets_mean": 753.4, "valid_targets_min": 427 }, { "epoch": 4.416, "grad_norm": 1.0448003759897928, "learning_rate": 1.4448229733682784e-05, "loss": 0.0796, "loss_nan_ranks": 0, "loss_rank_avg": 0.07199327647686005, "step": 2760, "valid_targets_mean": 857.9, "valid_targets_min": 545 }, { "epoch": 4.424, "grad_norm": 0.7783769295559022, "learning_rate": 1.4371613514242264e-05, "loss": 0.1168, "loss_nan_ranks": 0, "loss_rank_avg": 0.1220938190817833, "step": 2765, "valid_targets_mean": 1227.6, "valid_targets_min": 476 }, { "epoch": 4.432, "grad_norm": 0.8959322564560872, "learning_rate": 1.4295086891552457e-05, "loss": 0.0813, "loss_nan_ranks": 0, "loss_rank_avg": 0.08298659324645996, "step": 2770, "valid_targets_mean": 764.9, "valid_targets_min": 523 }, { "epoch": 4.44, "grad_norm": 0.9298947907055948, "learning_rate": 1.4218651083819811e-05, "loss": 0.0957, "loss_nan_ranks": 0, "loss_rank_avg": 0.07381783425807953, "step": 2775, "valid_targets_mean": 780.7, "valid_targets_min": 450 }, { "epoch": 4.448, "grad_norm": 0.7448045989894762, "learning_rate": 1.4142307307805125e-05, "loss": 0.0864, "loss_nan_ranks": 0, "loss_rank_avg": 0.06524106860160828, "step": 2780, "valid_targets_mean": 857.9, "valid_targets_min": 428 }, { "epoch": 4.456, "grad_norm": 0.8049920591319956, "learning_rate": 1.406605677880416e-05, "loss": 0.0862, "loss_nan_ranks": 0, "loss_rank_avg": 0.09014227241277695, "step": 2785, "valid_targets_mean": 1201.6, "valid_targets_min": 578 }, { "epoch": 4.464, "grad_norm": 0.8404309290675868, "learning_rate": 1.3989900710628313e-05, "loss": 0.0951, "loss_nan_ranks": 0, "loss_rank_avg": 0.09047380089759827, "step": 2790, "valid_targets_mean": 800.4, "valid_targets_min": 473 }, { "epoch": 4.4719999999999995, "grad_norm": 0.9066055301702886, "learning_rate": 1.3913840315585279e-05, "loss": 0.0816, "loss_nan_ranks": 0, "loss_rank_avg": 0.09277326613664627, "step": 2795, "valid_targets_mean": 829.0, "valid_targets_min": 407 }, { "epoch": 4.48, "grad_norm": 0.8598771752202647, "learning_rate": 1.3837876804459765e-05, "loss": 0.1227, "loss_nan_ranks": 0, "loss_rank_avg": 0.08901967108249664, "step": 2800, "valid_targets_mean": 868.4, "valid_targets_min": 479 }, { "epoch": 4.4879999999999995, "grad_norm": 0.783672572024054, "learning_rate": 1.3762011386494191e-05, "loss": 0.0828, "loss_nan_ranks": 0, "loss_rank_avg": 0.07599220424890518, "step": 2805, "valid_targets_mean": 744.5, "valid_targets_min": 537 }, { "epoch": 4.496, "grad_norm": 0.7141310435933745, "learning_rate": 1.3686245269369485e-05, "loss": 0.0915, "loss_nan_ranks": 0, "loss_rank_avg": 0.06869702786207199, "step": 2810, "valid_targets_mean": 940.9, "valid_targets_min": 559 }, { "epoch": 4.504, "grad_norm": 0.9025153880900855, "learning_rate": 1.3610579659185809e-05, "loss": 0.0945, "loss_nan_ranks": 0, "loss_rank_avg": 0.11102897673845291, "step": 2815, "valid_targets_mean": 844.6, "valid_targets_min": 423 }, { "epoch": 4.5120000000000005, "grad_norm": 0.800663468084433, "learning_rate": 1.35350157604434e-05, "loss": 0.085, "loss_nan_ranks": 0, "loss_rank_avg": 0.07541121542453766, "step": 2820, "valid_targets_mean": 790.8, "valid_targets_min": 455 }, { "epoch": 4.52, "grad_norm": 0.8081250786086973, "learning_rate": 1.345955477602337e-05, "loss": 0.0937, "loss_nan_ranks": 0, "loss_rank_avg": 0.08529967069625854, "step": 2825, "valid_targets_mean": 859.4, "valid_targets_min": 523 }, { "epoch": 4.5280000000000005, "grad_norm": 0.8960539536732155, "learning_rate": 1.3384197907168561e-05, "loss": 0.1003, "loss_nan_ranks": 0, "loss_rank_avg": 0.09535820782184601, "step": 2830, "valid_targets_mean": 790.9, "valid_targets_min": 476 }, { "epoch": 4.536, "grad_norm": 0.9356285004464241, "learning_rate": 1.3308946353464438e-05, "loss": 0.1165, "loss_nan_ranks": 0, "loss_rank_avg": 0.10715999454259872, "step": 2835, "valid_targets_mean": 1112.6, "valid_targets_min": 545 }, { "epoch": 4.5440000000000005, "grad_norm": 0.7125180425061677, "learning_rate": 1.3233801312819979e-05, "loss": 0.0882, "loss_nan_ranks": 0, "loss_rank_avg": 0.06889170408248901, "step": 2840, "valid_targets_mean": 868.6, "valid_targets_min": 484 }, { "epoch": 4.552, "grad_norm": 0.9666072232685547, "learning_rate": 1.3158763981448606e-05, "loss": 0.0868, "loss_nan_ranks": 0, "loss_rank_avg": 0.11292714625597, "step": 2845, "valid_targets_mean": 986.9, "valid_targets_min": 523 }, { "epoch": 4.5600000000000005, "grad_norm": 0.9204304591677285, "learning_rate": 1.3083835553849148e-05, "loss": 0.0797, "loss_nan_ranks": 0, "loss_rank_avg": 0.08996661007404327, "step": 2850, "valid_targets_mean": 919.4, "valid_targets_min": 497 }, { "epoch": 4.568, "grad_norm": 0.7098940896911101, "learning_rate": 1.3009017222786828e-05, "loss": 0.0943, "loss_nan_ranks": 0, "loss_rank_avg": 0.07091812789440155, "step": 2855, "valid_targets_mean": 887.1, "valid_targets_min": 487 }, { "epoch": 4.576, "grad_norm": 0.9075241215569958, "learning_rate": 1.2934310179274269e-05, "loss": 0.088, "loss_nan_ranks": 0, "loss_rank_avg": 0.10161441564559937, "step": 2860, "valid_targets_mean": 974.2, "valid_targets_min": 434 }, { "epoch": 4.584, "grad_norm": 0.9322718683943222, "learning_rate": 1.2859715612552541e-05, "loss": 0.105, "loss_nan_ranks": 0, "loss_rank_avg": 0.0803801566362381, "step": 2865, "valid_targets_mean": 951.9, "valid_targets_min": 488 }, { "epoch": 4.592, "grad_norm": 0.8313467086832508, "learning_rate": 1.278523471007223e-05, "loss": 0.091, "loss_nan_ranks": 0, "loss_rank_avg": 0.09433354437351227, "step": 2870, "valid_targets_mean": 1132.4, "valid_targets_min": 458 }, { "epoch": 4.6, "grad_norm": 0.8079314142812989, "learning_rate": 1.271086865747451e-05, "loss": 0.1164, "loss_nan_ranks": 0, "loss_rank_avg": 0.08496357500553131, "step": 2875, "valid_targets_mean": 833.5, "valid_targets_min": 572 }, { "epoch": 4.608, "grad_norm": 1.0236303127288213, "learning_rate": 1.2636618638572316e-05, "loss": 0.0905, "loss_nan_ranks": 0, "loss_rank_avg": 0.12658067047595978, "step": 2880, "valid_targets_mean": 881.5, "valid_targets_min": 472 }, { "epoch": 4.616, "grad_norm": 0.8333427076318738, "learning_rate": 1.2562485835331466e-05, "loss": 0.0808, "loss_nan_ranks": 0, "loss_rank_avg": 0.08028241991996765, "step": 2885, "valid_targets_mean": 748.9, "valid_targets_min": 434 }, { "epoch": 4.624, "grad_norm": 0.9768936929223744, "learning_rate": 1.2488471427851852e-05, "loss": 0.1008, "loss_nan_ranks": 0, "loss_rank_avg": 0.12225892394781113, "step": 2890, "valid_targets_mean": 919.8, "valid_targets_min": 477 }, { "epoch": 4.632, "grad_norm": 0.6897936650252694, "learning_rate": 1.241457659434866e-05, "loss": 0.0978, "loss_nan_ranks": 0, "loss_rank_avg": 0.0659201443195343, "step": 2895, "valid_targets_mean": 821.9, "valid_targets_min": 524 }, { "epoch": 4.64, "grad_norm": 0.8876098353391305, "learning_rate": 1.2340802511133605e-05, "loss": 0.0817, "loss_nan_ranks": 0, "loss_rank_avg": 0.09190486371517181, "step": 2900, "valid_targets_mean": 868.3, "valid_targets_min": 479 }, { "epoch": 4.648, "grad_norm": 1.0077771221444112, "learning_rate": 1.2267150352596216e-05, "loss": 0.1091, "loss_nan_ranks": 0, "loss_rank_avg": 0.1861191987991333, "step": 2905, "valid_targets_mean": 1313.7, "valid_targets_min": 528 }, { "epoch": 4.656, "grad_norm": 0.9368442988921992, "learning_rate": 1.2193621291185132e-05, "loss": 0.1175, "loss_nan_ranks": 0, "loss_rank_avg": 0.20001402497291565, "step": 2910, "valid_targets_mean": 1509.2, "valid_targets_min": 565 }, { "epoch": 4.664, "grad_norm": 0.8626802211102149, "learning_rate": 1.2120216497389446e-05, "loss": 0.1291, "loss_nan_ranks": 0, "loss_rank_avg": 0.13889957964420319, "step": 2915, "valid_targets_mean": 1055.9, "valid_targets_min": 602 }, { "epoch": 4.672, "grad_norm": 0.8381078363134903, "learning_rate": 1.2046937139720068e-05, "loss": 0.0911, "loss_nan_ranks": 0, "loss_rank_avg": 0.11396999657154083, "step": 2920, "valid_targets_mean": 952.6, "valid_targets_min": 457 }, { "epoch": 4.68, "grad_norm": 0.8337297954472803, "learning_rate": 1.1973784384691121e-05, "loss": 0.0975, "loss_nan_ranks": 0, "loss_rank_avg": 0.09346058964729309, "step": 2925, "valid_targets_mean": 856.2, "valid_targets_min": 473 }, { "epoch": 4.688, "grad_norm": 0.7029624221310162, "learning_rate": 1.1900759396801382e-05, "loss": 0.0907, "loss_nan_ranks": 0, "loss_rank_avg": 0.06849217414855957, "step": 2930, "valid_targets_mean": 898.8, "valid_targets_min": 602 }, { "epoch": 4.696, "grad_norm": 0.7818350801821872, "learning_rate": 1.1827863338515741e-05, "loss": 0.092, "loss_nan_ranks": 0, "loss_rank_avg": 0.07988164573907852, "step": 2935, "valid_targets_mean": 707.1, "valid_targets_min": 467 }, { "epoch": 4.704, "grad_norm": 0.7756061520131283, "learning_rate": 1.1755097370246669e-05, "loss": 0.0877, "loss_nan_ranks": 0, "loss_rank_avg": 0.07403562217950821, "step": 2940, "valid_targets_mean": 949.6, "valid_targets_min": 523 }, { "epoch": 4.712, "grad_norm": 0.7173002311933365, "learning_rate": 1.1682462650335791e-05, "loss": 0.098, "loss_nan_ranks": 0, "loss_rank_avg": 0.07594731450080872, "step": 2945, "valid_targets_mean": 799.7, "valid_targets_min": 498 }, { "epoch": 4.72, "grad_norm": 0.7896332184742297, "learning_rate": 1.1609960335035423e-05, "loss": 0.0818, "loss_nan_ranks": 0, "loss_rank_avg": 0.08279333263635635, "step": 2950, "valid_targets_mean": 984.9, "valid_targets_min": 502 }, { "epoch": 4.728, "grad_norm": 0.9776144738454718, "learning_rate": 1.1537591578490165e-05, "loss": 0.0998, "loss_nan_ranks": 0, "loss_rank_avg": 0.1456027776002884, "step": 2955, "valid_targets_mean": 1332.9, "valid_targets_min": 469 }, { "epoch": 4.736, "grad_norm": 0.9359507220580169, "learning_rate": 1.146535753271853e-05, "loss": 0.1202, "loss_nan_ranks": 0, "loss_rank_avg": 0.15099552273750305, "step": 2960, "valid_targets_mean": 1355.7, "valid_targets_min": 543 }, { "epoch": 4.744, "grad_norm": 0.7220819850208092, "learning_rate": 1.139325934759461e-05, "loss": 0.0822, "loss_nan_ranks": 0, "loss_rank_avg": 0.09187281131744385, "step": 2965, "valid_targets_mean": 1074.4, "valid_targets_min": 561 }, { "epoch": 4.752, "grad_norm": 0.5905807746665216, "learning_rate": 1.1321298170829768e-05, "loss": 0.0922, "loss_nan_ranks": 0, "loss_rank_avg": 0.10941527783870697, "step": 2970, "valid_targets_mean": 2202.9, "valid_targets_min": 532 }, { "epoch": 4.76, "grad_norm": 1.112799196030323, "learning_rate": 1.1249475147954363e-05, "loss": 0.0824, "loss_nan_ranks": 0, "loss_rank_avg": 0.10790747404098511, "step": 2975, "valid_targets_mean": 882.8, "valid_targets_min": 499 }, { "epoch": 4.768, "grad_norm": 0.6566387025187009, "learning_rate": 1.1177791422299528e-05, "loss": 0.0847, "loss_nan_ranks": 0, "loss_rank_avg": 0.06682457029819489, "step": 2980, "valid_targets_mean": 1061.5, "valid_targets_min": 526 }, { "epoch": 4.776, "grad_norm": 0.9252893663043001, "learning_rate": 1.1106248134978959e-05, "loss": 0.0821, "loss_nan_ranks": 0, "loss_rank_avg": 0.10607043653726578, "step": 2985, "valid_targets_mean": 961.8, "valid_targets_min": 481 }, { "epoch": 4.784, "grad_norm": 0.7026497377797631, "learning_rate": 1.1034846424870744e-05, "loss": 0.0901, "loss_nan_ranks": 0, "loss_rank_avg": 0.06845103204250336, "step": 2990, "valid_targets_mean": 963.6, "valid_targets_min": 622 }, { "epoch": 4.792, "grad_norm": 0.8778334419174398, "learning_rate": 1.0963587428599256e-05, "loss": 0.1168, "loss_nan_ranks": 0, "loss_rank_avg": 0.10423515737056732, "step": 2995, "valid_targets_mean": 910.9, "valid_targets_min": 486 }, { "epoch": 4.8, "grad_norm": 0.7950092737522141, "learning_rate": 1.089247228051704e-05, "loss": 0.0814, "loss_nan_ranks": 0, "loss_rank_avg": 0.08087144792079926, "step": 3000, "valid_targets_mean": 820.4, "valid_targets_min": 477 }, { "epoch": 4.808, "grad_norm": 0.8721682415243375, "learning_rate": 1.0821502112686753e-05, "loss": 0.0887, "loss_nan_ranks": 0, "loss_rank_avg": 0.07225702702999115, "step": 3005, "valid_targets_mean": 717.1, "valid_targets_min": 383 }, { "epoch": 4.816, "grad_norm": 0.9423894585093093, "learning_rate": 1.0750678054863158e-05, "loss": 0.0958, "loss_nan_ranks": 0, "loss_rank_avg": 0.09208646416664124, "step": 3010, "valid_targets_mean": 855.6, "valid_targets_min": 495 }, { "epoch": 4.824, "grad_norm": 0.7766246439543594, "learning_rate": 1.0680001234475127e-05, "loss": 0.1298, "loss_nan_ranks": 0, "loss_rank_avg": 0.08203871548175812, "step": 3015, "valid_targets_mean": 1001.3, "valid_targets_min": 517 }, { "epoch": 4.832, "grad_norm": 0.9878649492586721, "learning_rate": 1.0609472776607715e-05, "loss": 0.0943, "loss_nan_ranks": 0, "loss_rank_avg": 0.14963841438293457, "step": 3020, "valid_targets_mean": 881.9, "valid_targets_min": 467 }, { "epoch": 4.84, "grad_norm": 0.9599009488603156, "learning_rate": 1.0539093803984217e-05, "loss": 0.0789, "loss_nan_ranks": 0, "loss_rank_avg": 0.08646674454212189, "step": 3025, "valid_targets_mean": 1013.7, "valid_targets_min": 532 }, { "epoch": 4.848, "grad_norm": 0.7138781421940567, "learning_rate": 1.046886543694832e-05, "loss": 0.0859, "loss_nan_ranks": 0, "loss_rank_avg": 0.06895513832569122, "step": 3030, "valid_targets_mean": 867.2, "valid_targets_min": 486 }, { "epoch": 4.856, "grad_norm": 0.8515202976827628, "learning_rate": 1.0398788793446263e-05, "loss": 0.1115, "loss_nan_ranks": 0, "loss_rank_avg": 0.09606703370809555, "step": 3035, "valid_targets_mean": 1090.6, "valid_targets_min": 546 }, { "epoch": 4.864, "grad_norm": 1.4988346042882428, "learning_rate": 1.0328864989009037e-05, "loss": 0.1022, "loss_nan_ranks": 0, "loss_rank_avg": 0.07324632257223129, "step": 3040, "valid_targets_mean": 869.6, "valid_targets_min": 518 }, { "epoch": 4.872, "grad_norm": 0.8325983712860322, "learning_rate": 1.0259095136734634e-05, "loss": 0.1326, "loss_nan_ranks": 0, "loss_rank_avg": 0.1295246034860611, "step": 3045, "valid_targets_mean": 1129.4, "valid_targets_min": 439 }, { "epoch": 4.88, "grad_norm": 0.7862743933629226, "learning_rate": 1.0189480347270311e-05, "loss": 0.1005, "loss_nan_ranks": 0, "loss_rank_avg": 0.10198500752449036, "step": 3050, "valid_targets_mean": 923.4, "valid_targets_min": 474 }, { "epoch": 4.888, "grad_norm": 0.787405323498443, "learning_rate": 1.0120021728794938e-05, "loss": 0.0799, "loss_nan_ranks": 0, "loss_rank_avg": 0.08082190155982971, "step": 3055, "valid_targets_mean": 683.2, "valid_targets_min": 449 }, { "epoch": 4.896, "grad_norm": 0.9576234772943834, "learning_rate": 1.0050720387001334e-05, "loss": 0.1027, "loss_nan_ranks": 0, "loss_rank_avg": 0.09182494133710861, "step": 3060, "valid_targets_mean": 887.6, "valid_targets_min": 499 }, { "epoch": 4.904, "grad_norm": 0.9752361907473542, "learning_rate": 9.981577425078672e-06, "loss": 0.0971, "loss_nan_ranks": 0, "loss_rank_avg": 0.15568508207798004, "step": 3065, "valid_targets_mean": 1023.6, "valid_targets_min": 463 }, { "epoch": 4.912, "grad_norm": 0.8911558097766225, "learning_rate": 9.912593943694924e-06, "loss": 0.1129, "loss_nan_ranks": 0, "loss_rank_avg": 0.1109350174665451, "step": 3070, "valid_targets_mean": 1180.3, "valid_targets_min": 568 }, { "epoch": 4.92, "grad_norm": 0.9212926624311334, "learning_rate": 9.843771040979328e-06, "loss": 0.0871, "loss_nan_ranks": 0, "loss_rank_avg": 0.09667422622442245, "step": 3075, "valid_targets_mean": 950.1, "valid_targets_min": 512 }, { "epoch": 4.928, "grad_norm": 0.8573739269127677, "learning_rate": 9.775109812504922e-06, "loss": 0.0957, "loss_nan_ranks": 0, "loss_rank_avg": 0.1107478216290474, "step": 3080, "valid_targets_mean": 971.2, "valid_targets_min": 398 }, { "epoch": 4.936, "grad_norm": 0.8793036060018228, "learning_rate": 9.706611351271088e-06, "loss": 0.0772, "loss_nan_ranks": 0, "loss_rank_avg": 0.09450344741344452, "step": 3085, "valid_targets_mean": 806.3, "valid_targets_min": 502 }, { "epoch": 4.944, "grad_norm": 0.9522517797294872, "learning_rate": 9.638276747686169e-06, "loss": 0.105, "loss_nan_ranks": 0, "loss_rank_avg": 0.1285589337348938, "step": 3090, "valid_targets_mean": 1150.9, "valid_targets_min": 498 }, { "epoch": 4.952, "grad_norm": 0.9313589976377755, "learning_rate": 9.570107089550091e-06, "loss": 0.1143, "loss_nan_ranks": 0, "loss_rank_avg": 0.12102701514959335, "step": 3095, "valid_targets_mean": 858.7, "valid_targets_min": 478 }, { "epoch": 4.96, "grad_norm": 0.9163513791269962, "learning_rate": 9.502103462037074e-06, "loss": 0.1004, "loss_nan_ranks": 0, "loss_rank_avg": 0.1062905564904213, "step": 3100, "valid_targets_mean": 964.9, "valid_targets_min": 484 }, { "epoch": 4.968, "grad_norm": 1.0687818787426926, "learning_rate": 9.434266947678326e-06, "loss": 0.1087, "loss_nan_ranks": 0, "loss_rank_avg": 0.10951408743858337, "step": 3105, "valid_targets_mean": 981.2, "valid_targets_min": 474 }, { "epoch": 4.976, "grad_norm": 0.7184448345349286, "learning_rate": 9.366598626344836e-06, "loss": 0.082, "loss_nan_ranks": 0, "loss_rank_avg": 0.06675899028778076, "step": 3110, "valid_targets_mean": 834.8, "valid_targets_min": 560 }, { "epoch": 4.984, "grad_norm": 0.8654614629854598, "learning_rate": 9.299099575230172e-06, "loss": 0.1179, "loss_nan_ranks": 0, "loss_rank_avg": 0.08472222089767456, "step": 3115, "valid_targets_mean": 834.8, "valid_targets_min": 471 }, { "epoch": 4.992, "grad_norm": 0.5860181708153348, "learning_rate": 9.231770868833334e-06, "loss": 0.0864, "loss_nan_ranks": 0, "loss_rank_avg": 0.10731495916843414, "step": 3120, "valid_targets_mean": 2171.6, "valid_targets_min": 583 }, { "epoch": 5.0, "grad_norm": 0.8324406309143618, "learning_rate": 9.164613578941652e-06, "loss": 0.0861, "loss_nan_ranks": 0, "loss_rank_avg": 0.08765757828950882, "step": 3125, "valid_targets_mean": 918.4, "valid_targets_min": 506 }, { "epoch": 5.008, "grad_norm": 1.0510573946733208, "learning_rate": 9.097628774613732e-06, "loss": 0.0718, "loss_nan_ranks": 0, "loss_rank_avg": 0.06804905831813812, "step": 3130, "valid_targets_mean": 935.7, "valid_targets_min": 549 }, { "epoch": 5.016, "grad_norm": 0.7707638109753482, "learning_rate": 9.030817522162403e-06, "loss": 0.0702, "loss_nan_ranks": 0, "loss_rank_avg": 0.07694844156503677, "step": 3135, "valid_targets_mean": 979.8, "valid_targets_min": 450 }, { "epoch": 5.024, "grad_norm": 0.8982765492579255, "learning_rate": 8.964180885137797e-06, "loss": 0.0986, "loss_nan_ranks": 0, "loss_rank_avg": 0.09304551035165787, "step": 3140, "valid_targets_mean": 924.1, "valid_targets_min": 417 }, { "epoch": 5.032, "grad_norm": 0.8324731032677587, "learning_rate": 8.897719924310375e-06, "loss": 0.0694, "loss_nan_ranks": 0, "loss_rank_avg": 0.06775309890508652, "step": 3145, "valid_targets_mean": 908.8, "valid_targets_min": 512 }, { "epoch": 5.04, "grad_norm": 0.9396107459729739, "learning_rate": 8.831435697654068e-06, "loss": 0.0752, "loss_nan_ranks": 0, "loss_rank_avg": 0.08411605656147003, "step": 3150, "valid_targets_mean": 924.2, "valid_targets_min": 476 }, { "epoch": 5.048, "grad_norm": 0.6785395368878784, "learning_rate": 8.765329260329413e-06, "loss": 0.0698, "loss_nan_ranks": 0, "loss_rank_avg": 0.05708220601081848, "step": 3155, "valid_targets_mean": 869.2, "valid_targets_min": 537 }, { "epoch": 5.056, "grad_norm": 0.8586369742809761, "learning_rate": 8.699401664666774e-06, "loss": 0.0655, "loss_nan_ranks": 0, "loss_rank_avg": 0.06502902507781982, "step": 3160, "valid_targets_mean": 793.8, "valid_targets_min": 487 }, { "epoch": 5.064, "grad_norm": 0.8368101486858089, "learning_rate": 8.633653960149579e-06, "loss": 0.0758, "loss_nan_ranks": 0, "loss_rank_avg": 0.06350349634885788, "step": 3165, "valid_targets_mean": 855.6, "valid_targets_min": 568 }, { "epoch": 5.072, "grad_norm": 0.8412868112255628, "learning_rate": 8.56808719339762e-06, "loss": 0.0698, "loss_nan_ranks": 0, "loss_rank_avg": 0.057309672236442566, "step": 3170, "valid_targets_mean": 939.9, "valid_targets_min": 481 }, { "epoch": 5.08, "grad_norm": 0.9294442133648975, "learning_rate": 8.502702408150391e-06, "loss": 0.0855, "loss_nan_ranks": 0, "loss_rank_avg": 0.10744750499725342, "step": 3175, "valid_targets_mean": 1084.8, "valid_targets_min": 422 }, { "epoch": 5.088, "grad_norm": 0.8337209904777393, "learning_rate": 8.43750064525047e-06, "loss": 0.0809, "loss_nan_ranks": 0, "loss_rank_avg": 0.06538328528404236, "step": 3180, "valid_targets_mean": 731.6, "valid_targets_min": 444 }, { "epoch": 5.096, "grad_norm": 0.8657468914758869, "learning_rate": 8.372482942626952e-06, "loss": 0.064, "loss_nan_ranks": 0, "loss_rank_avg": 0.07184585928916931, "step": 3185, "valid_targets_mean": 859.4, "valid_targets_min": 523 }, { "epoch": 5.104, "grad_norm": 0.7209090350527402, "learning_rate": 8.307650335278927e-06, "loss": 0.0721, "loss_nan_ranks": 0, "loss_rank_avg": 0.05849084258079529, "step": 3190, "valid_targets_mean": 785.9, "valid_targets_min": 474 }, { "epoch": 5.112, "grad_norm": 0.7567765538916866, "learning_rate": 8.243003855259015e-06, "loss": 0.0852, "loss_nan_ranks": 0, "loss_rank_avg": 0.06685122102499008, "step": 3195, "valid_targets_mean": 824.2, "valid_targets_min": 521 }, { "epoch": 5.12, "grad_norm": 0.80639522105524, "learning_rate": 8.178544531656897e-06, "loss": 0.0803, "loss_nan_ranks": 0, "loss_rank_avg": 0.06725746393203735, "step": 3200, "valid_targets_mean": 877.0, "valid_targets_min": 452 }, { "epoch": 5.128, "grad_norm": 0.8589322565030704, "learning_rate": 8.11427339058299e-06, "loss": 0.0883, "loss_nan_ranks": 0, "loss_rank_avg": 0.067558653652668, "step": 3205, "valid_targets_mean": 834.4, "valid_targets_min": 465 }, { "epoch": 5.136, "grad_norm": 0.6812113565653257, "learning_rate": 8.050191455152072e-06, "loss": 0.0685, "loss_nan_ranks": 0, "loss_rank_avg": 0.058748096227645874, "step": 3210, "valid_targets_mean": 787.1, "valid_targets_min": 550 }, { "epoch": 5.144, "grad_norm": 0.7113832171090609, "learning_rate": 7.986299745467013e-06, "loss": 0.0798, "loss_nan_ranks": 0, "loss_rank_avg": 0.07876145839691162, "step": 3215, "valid_targets_mean": 1159.2, "valid_targets_min": 605 }, { "epoch": 5.152, "grad_norm": 0.8160806415654152, "learning_rate": 7.922599278602524e-06, "loss": 0.0671, "loss_nan_ranks": 0, "loss_rank_avg": 0.06620678305625916, "step": 3220, "valid_targets_mean": 763.9, "valid_targets_min": 449 }, { "epoch": 5.16, "grad_norm": 0.7484997198691409, "learning_rate": 7.859091068588987e-06, "loss": 0.0707, "loss_nan_ranks": 0, "loss_rank_avg": 0.05849097669124603, "step": 3225, "valid_targets_mean": 705.9, "valid_targets_min": 485 }, { "epoch": 5.168, "grad_norm": 0.7589106191581299, "learning_rate": 7.795776126396284e-06, "loss": 0.098, "loss_nan_ranks": 0, "loss_rank_avg": 0.06466265767812729, "step": 3230, "valid_targets_mean": 914.7, "valid_targets_min": 484 }, { "epoch": 5.176, "grad_norm": 0.9207333350083065, "learning_rate": 7.732655459917726e-06, "loss": 0.0864, "loss_nan_ranks": 0, "loss_rank_avg": 0.07856682687997818, "step": 3235, "valid_targets_mean": 1036.2, "valid_targets_min": 479 }, { "epoch": 5.184, "grad_norm": 0.7936348748602636, "learning_rate": 7.669730073954005e-06, "loss": 0.0711, "loss_nan_ranks": 0, "loss_rank_avg": 0.06282620131969452, "step": 3240, "valid_targets_mean": 904.2, "valid_targets_min": 553 }, { "epoch": 5.192, "grad_norm": 0.7253442301155842, "learning_rate": 7.607000970197194e-06, "loss": 0.094, "loss_nan_ranks": 0, "loss_rank_avg": 0.06407792121171951, "step": 3245, "valid_targets_mean": 831.5, "valid_targets_min": 462 }, { "epoch": 5.2, "grad_norm": 1.1209412847091478, "learning_rate": 7.544469147214797e-06, "loss": 0.0871, "loss_nan_ranks": 0, "loss_rank_avg": 0.13570475578308105, "step": 3250, "valid_targets_mean": 1172.7, "valid_targets_min": 502 }, { "epoch": 5.208, "grad_norm": 1.3916853073197575, "learning_rate": 7.482135600433868e-06, "loss": 0.0912, "loss_nan_ranks": 0, "loss_rank_avg": 0.09390473365783691, "step": 3255, "valid_targets_mean": 915.3, "valid_targets_min": 480 }, { "epoch": 5.216, "grad_norm": 0.824701167491673, "learning_rate": 7.420001322125156e-06, "loss": 0.0684, "loss_nan_ranks": 0, "loss_rank_avg": 0.07643882185220718, "step": 3260, "valid_targets_mean": 956.8, "valid_targets_min": 494 }, { "epoch": 5.224, "grad_norm": 0.7266209275554882, "learning_rate": 7.3580673013872946e-06, "loss": 0.0682, "loss_nan_ranks": 0, "loss_rank_avg": 0.058551251888275146, "step": 3265, "valid_targets_mean": 883.6, "valid_targets_min": 499 }, { "epoch": 5.232, "grad_norm": 0.894891668332706, "learning_rate": 7.2963345241310904e-06, "loss": 0.0803, "loss_nan_ranks": 0, "loss_rank_avg": 0.06584808230400085, "step": 3270, "valid_targets_mean": 772.3, "valid_targets_min": 491 }, { "epoch": 5.24, "grad_norm": 0.7138319729657315, "learning_rate": 7.234803973063797e-06, "loss": 0.0728, "loss_nan_ranks": 0, "loss_rank_avg": 0.060329943895339966, "step": 3275, "valid_targets_mean": 762.9, "valid_targets_min": 516 }, { "epoch": 5.248, "grad_norm": 0.9752596494461916, "learning_rate": 7.173476627673492e-06, "loss": 0.0693, "loss_nan_ranks": 0, "loss_rank_avg": 0.09661838412284851, "step": 3280, "valid_targets_mean": 1086.2, "valid_targets_min": 509 }, { "epoch": 5.256, "grad_norm": 0.9852883612298059, "learning_rate": 7.112353464213477e-06, "loss": 0.0953, "loss_nan_ranks": 0, "loss_rank_avg": 0.08339089155197144, "step": 3285, "valid_targets_mean": 1042.8, "valid_targets_min": 576 }, { "epoch": 5.264, "grad_norm": 0.7852849622597555, "learning_rate": 7.051435455686735e-06, "loss": 0.0651, "loss_nan_ranks": 0, "loss_rank_avg": 0.059544969350099564, "step": 3290, "valid_targets_mean": 856.1, "valid_targets_min": 469 }, { "epoch": 5.272, "grad_norm": 0.8604114481180384, "learning_rate": 6.990723571830438e-06, "loss": 0.0665, "loss_nan_ranks": 0, "loss_rank_avg": 0.0747007355093956, "step": 3295, "valid_targets_mean": 871.5, "valid_targets_min": 527 }, { "epoch": 5.28, "grad_norm": 0.9221542381832124, "learning_rate": 6.93021877910052e-06, "loss": 0.0965, "loss_nan_ranks": 0, "loss_rank_avg": 0.1211557537317276, "step": 3300, "valid_targets_mean": 1022.2, "valid_targets_min": 434 }, { "epoch": 5.288, "grad_norm": 0.7799908275852595, "learning_rate": 6.8699220406562985e-06, "loss": 0.0745, "loss_nan_ranks": 0, "loss_rank_avg": 0.06038517504930496, "step": 3305, "valid_targets_mean": 782.4, "valid_targets_min": 520 }, { "epoch": 5.296, "grad_norm": 0.7720999089439553, "learning_rate": 6.809834316345117e-06, "loss": 0.0666, "loss_nan_ranks": 0, "loss_rank_avg": 0.06718848645687103, "step": 3310, "valid_targets_mean": 683.2, "valid_targets_min": 496 }, { "epoch": 5.304, "grad_norm": 1.003311124904277, "learning_rate": 6.749956562687083e-06, "loss": 0.0664, "loss_nan_ranks": 0, "loss_rank_avg": 0.06412294507026672, "step": 3315, "valid_targets_mean": 765.4, "valid_targets_min": 441 }, { "epoch": 5.312, "grad_norm": 0.9176716702823916, "learning_rate": 6.690289732859841e-06, "loss": 0.0756, "loss_nan_ranks": 0, "loss_rank_avg": 0.06900150328874588, "step": 3320, "valid_targets_mean": 726.8, "valid_targets_min": 527 }, { "epoch": 5.32, "grad_norm": 0.8801976297445904, "learning_rate": 6.630834776683403e-06, "loss": 0.0743, "loss_nan_ranks": 0, "loss_rank_avg": 0.08224289119243622, "step": 3325, "valid_targets_mean": 870.4, "valid_targets_min": 464 }, { "epoch": 5.328, "grad_norm": 0.750674011177151, "learning_rate": 6.571592640605e-06, "loss": 0.0615, "loss_nan_ranks": 0, "loss_rank_avg": 0.056616879999637604, "step": 3330, "valid_targets_mean": 793.8, "valid_targets_min": 456 }, { "epoch": 5.336, "grad_norm": 0.8610292635057717, "learning_rate": 6.512564267684061e-06, "loss": 0.0929, "loss_nan_ranks": 0, "loss_rank_avg": 0.0650147795677185, "step": 3335, "valid_targets_mean": 1461.8, "valid_targets_min": 430 }, { "epoch": 5.344, "grad_norm": 1.0077653627076866, "learning_rate": 6.453750597577167e-06, "loss": 0.0665, "loss_nan_ranks": 0, "loss_rank_avg": 0.08667433261871338, "step": 3340, "valid_targets_mean": 968.2, "valid_targets_min": 560 }, { "epoch": 5.352, "grad_norm": 0.8467418168834477, "learning_rate": 6.395152566523106e-06, "loss": 0.0753, "loss_nan_ranks": 0, "loss_rank_avg": 0.09939449280500412, "step": 3345, "valid_targets_mean": 1185.5, "valid_targets_min": 458 }, { "epoch": 5.36, "grad_norm": 1.1761939041437515, "learning_rate": 6.336771107327966e-06, "loss": 0.0899, "loss_nan_ranks": 0, "loss_rank_avg": 0.12340512871742249, "step": 3350, "valid_targets_mean": 917.4, "valid_targets_min": 508 }, { "epoch": 5.368, "grad_norm": 1.137723815719334, "learning_rate": 6.278607149350289e-06, "loss": 0.072, "loss_nan_ranks": 0, "loss_rank_avg": 0.07936599850654602, "step": 3355, "valid_targets_mean": 831.8, "valid_targets_min": 509 }, { "epoch": 5.376, "grad_norm": 0.7671106717546039, "learning_rate": 6.220661618486268e-06, "loss": 0.0801, "loss_nan_ranks": 0, "loss_rank_avg": 0.06496668606996536, "step": 3360, "valid_targets_mean": 763.8, "valid_targets_min": 508 }, { "epoch": 5.384, "grad_norm": 0.7407932030120938, "learning_rate": 6.162935437155024e-06, "loss": 0.0922, "loss_nan_ranks": 0, "loss_rank_avg": 0.069102942943573, "step": 3365, "valid_targets_mean": 1027.3, "valid_targets_min": 565 }, { "epoch": 5.392, "grad_norm": 0.9869221609265355, "learning_rate": 6.105429524283901e-06, "loss": 0.0993, "loss_nan_ranks": 0, "loss_rank_avg": 0.1857997626066208, "step": 3370, "valid_targets_mean": 1458.3, "valid_targets_min": 402 }, { "epoch": 5.4, "grad_norm": 0.8059030956251092, "learning_rate": 6.04814479529386e-06, "loss": 0.0827, "loss_nan_ranks": 0, "loss_rank_avg": 0.09002557396888733, "step": 3375, "valid_targets_mean": 1050.7, "valid_targets_min": 537 }, { "epoch": 5.408, "grad_norm": 0.955351532425662, "learning_rate": 5.991082162084889e-06, "loss": 0.0731, "loss_nan_ranks": 0, "loss_rank_avg": 0.06369668245315552, "step": 3380, "valid_targets_mean": 1038.1, "valid_targets_min": 545 }, { "epoch": 5.416, "grad_norm": 0.834418232150413, "learning_rate": 5.934242533021499e-06, "loss": 0.1086, "loss_nan_ranks": 0, "loss_rank_avg": 0.05775479972362518, "step": 3385, "valid_targets_mean": 731.1, "valid_targets_min": 454 }, { "epoch": 5.424, "grad_norm": 0.987964590502229, "learning_rate": 5.877626812918258e-06, "loss": 0.0766, "loss_nan_ranks": 0, "loss_rank_avg": 0.13848869502544403, "step": 3390, "valid_targets_mean": 1440.2, "valid_targets_min": 495 }, { "epoch": 5.432, "grad_norm": 0.8334021563707787, "learning_rate": 5.821235903025378e-06, "loss": 0.0978, "loss_nan_ranks": 0, "loss_rank_avg": 0.0656728744506836, "step": 3395, "valid_targets_mean": 819.8, "valid_targets_min": 544 }, { "epoch": 5.44, "grad_norm": 0.7199157847990661, "learning_rate": 5.765070701014391e-06, "loss": 0.0704, "loss_nan_ranks": 0, "loss_rank_avg": 0.06393887847661972, "step": 3400, "valid_targets_mean": 926.3, "valid_targets_min": 464 }, { "epoch": 5.448, "grad_norm": 0.9044046663502227, "learning_rate": 5.709132100963841e-06, "loss": 0.0975, "loss_nan_ranks": 0, "loss_rank_avg": 0.08857779204845428, "step": 3405, "valid_targets_mean": 1049.2, "valid_targets_min": 547 }, { "epoch": 5.456, "grad_norm": 0.8155293751834836, "learning_rate": 5.653420993345062e-06, "loss": 0.0635, "loss_nan_ranks": 0, "loss_rank_avg": 0.07042507827281952, "step": 3410, "valid_targets_mean": 721.9, "valid_targets_min": 443 }, { "epoch": 5.464, "grad_norm": 0.7081990917119335, "learning_rate": 5.597938265007994e-06, "loss": 0.0628, "loss_nan_ranks": 0, "loss_rank_avg": 0.057499922811985016, "step": 3415, "valid_targets_mean": 916.9, "valid_targets_min": 602 }, { "epoch": 5.4719999999999995, "grad_norm": 0.9661217691084136, "learning_rate": 5.542684799167069e-06, "loss": 0.0796, "loss_nan_ranks": 0, "loss_rank_avg": 0.08886539936065674, "step": 3420, "valid_targets_mean": 1082.2, "valid_targets_min": 453 }, { "epoch": 5.48, "grad_norm": 0.8055935580691043, "learning_rate": 5.487661475387152e-06, "loss": 0.0837, "loss_nan_ranks": 0, "loss_rank_avg": 0.06585259735584259, "step": 3425, "valid_targets_mean": 797.6, "valid_targets_min": 470 }, { "epoch": 5.4879999999999995, "grad_norm": 0.8384437110022962, "learning_rate": 5.432869169569541e-06, "loss": 0.0667, "loss_nan_ranks": 0, "loss_rank_avg": 0.0917351022362709, "step": 3430, "valid_targets_mean": 1237.9, "valid_targets_min": 439 }, { "epoch": 5.496, "grad_norm": 0.7055350608763379, "learning_rate": 5.378308753938024e-06, "loss": 0.0672, "loss_nan_ranks": 0, "loss_rank_avg": 0.0545569509267807, "step": 3435, "valid_targets_mean": 821.5, "valid_targets_min": 523 }, { "epoch": 5.504, "grad_norm": 0.7938678596509943, "learning_rate": 5.323981097024986e-06, "loss": 0.0619, "loss_nan_ranks": 0, "loss_rank_avg": 0.05954422429203987, "step": 3440, "valid_targets_mean": 926.8, "valid_targets_min": 541 }, { "epoch": 5.5120000000000005, "grad_norm": 0.710376263121058, "learning_rate": 5.269887063657595e-06, "loss": 0.078, "loss_nan_ranks": 0, "loss_rank_avg": 0.0633171796798706, "step": 3445, "valid_targets_mean": 887.9, "valid_targets_min": 552 }, { "epoch": 5.52, "grad_norm": 0.9352971664606785, "learning_rate": 5.216027514944027e-06, "loss": 0.0789, "loss_nan_ranks": 0, "loss_rank_avg": 0.06568583101034164, "step": 3450, "valid_targets_mean": 782.4, "valid_targets_min": 565 }, { "epoch": 5.5280000000000005, "grad_norm": 1.0299067570372629, "learning_rate": 5.162403308259767e-06, "loss": 0.078, "loss_nan_ranks": 0, "loss_rank_avg": 0.06486299633979797, "step": 3455, "valid_targets_mean": 873.9, "valid_targets_min": 542 }, { "epoch": 5.536, "grad_norm": 0.9662261943295932, "learning_rate": 5.109015297233935e-06, "loss": 0.0773, "loss_nan_ranks": 0, "loss_rank_avg": 0.08379887789487839, "step": 3460, "valid_targets_mean": 1013.6, "valid_targets_min": 511 }, { "epoch": 5.5440000000000005, "grad_norm": 0.7998534820934543, "learning_rate": 5.055864331735736e-06, "loss": 0.0624, "loss_nan_ranks": 0, "loss_rank_avg": 0.06942769885063171, "step": 3465, "valid_targets_mean": 784.6, "valid_targets_min": 487 }, { "epoch": 5.552, "grad_norm": 0.9072820507949081, "learning_rate": 5.002951257860909e-06, "loss": 0.098, "loss_nan_ranks": 0, "loss_rank_avg": 0.09650293737649918, "step": 3470, "valid_targets_mean": 872.7, "valid_targets_min": 484 }, { "epoch": 5.5600000000000005, "grad_norm": 0.9556215264384249, "learning_rate": 4.950276917918256e-06, "loss": 0.0885, "loss_nan_ranks": 0, "loss_rank_avg": 0.08272415399551392, "step": 3475, "valid_targets_mean": 919.9, "valid_targets_min": 522 }, { "epoch": 5.568, "grad_norm": 0.7873277603382469, "learning_rate": 4.8978421504162385e-06, "loss": 0.0726, "loss_nan_ranks": 0, "loss_rank_avg": 0.06439163535833359, "step": 3480, "valid_targets_mean": 821.4, "valid_targets_min": 455 }, { "epoch": 5.576, "grad_norm": 0.9309358376083752, "learning_rate": 4.845647790049634e-06, "loss": 0.0777, "loss_nan_ranks": 0, "loss_rank_avg": 0.0956721156835556, "step": 3485, "valid_targets_mean": 1139.1, "valid_targets_min": 458 }, { "epoch": 5.584, "grad_norm": 1.32752398877127, "learning_rate": 4.793694667686244e-06, "loss": 0.0801, "loss_nan_ranks": 0, "loss_rank_avg": 0.11851969361305237, "step": 3490, "valid_targets_mean": 969.3, "valid_targets_min": 531 }, { "epoch": 5.592, "grad_norm": 0.7663546673030264, "learning_rate": 4.741983610353664e-06, "loss": 0.0832, "loss_nan_ranks": 0, "loss_rank_avg": 0.06600794941186905, "step": 3495, "valid_targets_mean": 809.7, "valid_targets_min": 514 }, { "epoch": 5.6, "grad_norm": 0.7934732627314547, "learning_rate": 4.690515441226122e-06, "loss": 0.07, "loss_nan_ranks": 0, "loss_rank_avg": 0.06453859806060791, "step": 3500, "valid_targets_mean": 764.8, "valid_targets_min": 471 }, { "epoch": 5.608, "grad_norm": 1.0745817460030198, "learning_rate": 4.639290979611379e-06, "loss": 0.074, "loss_nan_ranks": 0, "loss_rank_avg": 0.09399828314781189, "step": 3505, "valid_targets_mean": 1305.3, "valid_targets_min": 559 }, { "epoch": 5.616, "grad_norm": 0.8247645705611755, "learning_rate": 4.588311040937683e-06, "loss": 0.072, "loss_nan_ranks": 0, "loss_rank_avg": 0.06673099100589752, "step": 3510, "valid_targets_mean": 943.1, "valid_targets_min": 457 }, { "epoch": 5.624, "grad_norm": 0.9109017424927973, "learning_rate": 4.537576436740783e-06, "loss": 0.0815, "loss_nan_ranks": 0, "loss_rank_avg": 0.09812849014997482, "step": 3515, "valid_targets_mean": 932.1, "valid_targets_min": 558 }, { "epoch": 5.632, "grad_norm": 1.0092470799951956, "learning_rate": 4.487087974651016e-06, "loss": 0.0761, "loss_nan_ranks": 0, "loss_rank_avg": 0.08479701727628708, "step": 3520, "valid_targets_mean": 1049.4, "valid_targets_min": 464 }, { "epoch": 5.64, "grad_norm": 1.1229829135516711, "learning_rate": 4.436846458380455e-06, "loss": 0.074, "loss_nan_ranks": 0, "loss_rank_avg": 0.09786868095397949, "step": 3525, "valid_targets_mean": 964.7, "valid_targets_min": 453 }, { "epoch": 5.648, "grad_norm": 0.6833678630320003, "learning_rate": 4.386852687710104e-06, "loss": 0.062, "loss_nan_ranks": 0, "loss_rank_avg": 0.05484826862812042, "step": 3530, "valid_targets_mean": 882.2, "valid_targets_min": 538 }, { "epoch": 5.656, "grad_norm": 0.752457884182261, "learning_rate": 4.337107458477177e-06, "loss": 0.0777, "loss_nan_ranks": 0, "loss_rank_avg": 0.06667198240756989, "step": 3535, "valid_targets_mean": 829.9, "valid_targets_min": 540 }, { "epoch": 5.664, "grad_norm": 0.8183447240718775, "learning_rate": 4.287611562562422e-06, "loss": 0.0894, "loss_nan_ranks": 0, "loss_rank_avg": 0.06430254876613617, "step": 3540, "valid_targets_mean": 768.8, "valid_targets_min": 504 }, { "epoch": 5.672, "grad_norm": 0.7778594591440714, "learning_rate": 4.238365787877516e-06, "loss": 0.0879, "loss_nan_ranks": 0, "loss_rank_avg": 0.06610207259654999, "step": 3545, "valid_targets_mean": 774.2, "valid_targets_min": 416 }, { "epoch": 5.68, "grad_norm": 0.7408596941120574, "learning_rate": 4.189370918352531e-06, "loss": 0.0655, "loss_nan_ranks": 0, "loss_rank_avg": 0.06291748583316803, "step": 3550, "valid_targets_mean": 825.8, "valid_targets_min": 525 }, { "epoch": 5.688, "grad_norm": 0.9669008112490017, "learning_rate": 4.140627733923439e-06, "loss": 0.0792, "loss_nan_ranks": 0, "loss_rank_avg": 0.1275126039981842, "step": 3555, "valid_targets_mean": 1193.2, "valid_targets_min": 506 }, { "epoch": 5.696, "grad_norm": 0.8347158923249489, "learning_rate": 4.092137010519712e-06, "loss": 0.081, "loss_nan_ranks": 0, "loss_rank_avg": 0.08259904384613037, "step": 3560, "valid_targets_mean": 1022.1, "valid_targets_min": 468 }, { "epoch": 5.704, "grad_norm": 0.7263524739984435, "learning_rate": 4.043899520051964e-06, "loss": 0.0656, "loss_nan_ranks": 0, "loss_rank_avg": 0.062250278890132904, "step": 3565, "valid_targets_mean": 819.1, "valid_targets_min": 507 }, { "epoch": 5.712, "grad_norm": 0.8065410169235507, "learning_rate": 3.995916030399658e-06, "loss": 0.0773, "loss_nan_ranks": 0, "loss_rank_avg": 0.06370916962623596, "step": 3570, "valid_targets_mean": 722.7, "valid_targets_min": 458 }, { "epoch": 5.72, "grad_norm": 0.907021075972577, "learning_rate": 3.948187305398892e-06, "loss": 0.0853, "loss_nan_ranks": 0, "loss_rank_avg": 0.07696904242038727, "step": 3575, "valid_targets_mean": 868.0, "valid_targets_min": 556 }, { "epoch": 5.728, "grad_norm": 0.6402111741227499, "learning_rate": 3.90071410483023e-06, "loss": 0.0692, "loss_nan_ranks": 0, "loss_rank_avg": 0.05004892498254776, "step": 3580, "valid_targets_mean": 926.3, "valid_targets_min": 581 }, { "epoch": 5.736, "grad_norm": 1.113414574317508, "learning_rate": 3.853497184406623e-06, "loss": 0.0928, "loss_nan_ranks": 0, "loss_rank_avg": 0.07702088356018066, "step": 3585, "valid_targets_mean": 949.3, "valid_targets_min": 439 }, { "epoch": 5.744, "grad_norm": 0.6131949560769171, "learning_rate": 3.80653729576135e-06, "loss": 0.0656, "loss_nan_ranks": 0, "loss_rank_avg": 0.07584548741579056, "step": 3590, "valid_targets_mean": 1754.1, "valid_targets_min": 647 }, { "epoch": 5.752, "grad_norm": 15.857350207682247, "learning_rate": 3.7598351864360872e-06, "loss": 0.087, "loss_nan_ranks": 0, "loss_rank_avg": 0.05998915061354637, "step": 3595, "valid_targets_mean": 756.8, "valid_targets_min": 481 }, { "epoch": 5.76, "grad_norm": 0.9328208235959797, "learning_rate": 3.713391599868985e-06, "loss": 0.0693, "loss_nan_ranks": 0, "loss_rank_avg": 0.06560102850198746, "step": 3600, "valid_targets_mean": 1117.0, "valid_targets_min": 510 }, { "epoch": 5.768, "grad_norm": 0.7083644653133527, "learning_rate": 3.6672072753828424e-06, "loss": 0.0627, "loss_nan_ranks": 0, "loss_rank_avg": 0.05883736163377762, "step": 3605, "valid_targets_mean": 871.1, "valid_targets_min": 411 }, { "epoch": 5.776, "grad_norm": 0.7376928312531449, "learning_rate": 3.6212829481733368e-06, "loss": 0.1016, "loss_nan_ranks": 0, "loss_rank_avg": 0.05465541034936905, "step": 3610, "valid_targets_mean": 764.8, "valid_targets_min": 454 }, { "epoch": 5.784, "grad_norm": 0.7297951264279697, "learning_rate": 3.575619349297317e-06, "loss": 0.0709, "loss_nan_ranks": 0, "loss_rank_avg": 0.05742385983467102, "step": 3615, "valid_targets_mean": 872.8, "valid_targets_min": 511 }, { "epoch": 5.792, "grad_norm": 1.1192953705793156, "learning_rate": 3.5302172056611682e-06, "loss": 0.0811, "loss_nan_ranks": 0, "loss_rank_avg": 0.07923439145088196, "step": 3620, "valid_targets_mean": 951.4, "valid_targets_min": 535 }, { "epoch": 5.8, "grad_norm": 1.3256193080215943, "learning_rate": 3.485077240009247e-06, "loss": 0.0711, "loss_nan_ranks": 0, "loss_rank_avg": 0.09199804812669754, "step": 3625, "valid_targets_mean": 917.0, "valid_targets_min": 434 }, { "epoch": 5.808, "grad_norm": 0.8608702612415465, "learning_rate": 3.4402001709123643e-06, "loss": 0.0969, "loss_nan_ranks": 0, "loss_rank_avg": 0.07680759578943253, "step": 3630, "valid_targets_mean": 1030.2, "valid_targets_min": 533 }, { "epoch": 5.816, "grad_norm": 0.8272921235726557, "learning_rate": 3.3955867127563515e-06, "loss": 0.0696, "loss_nan_ranks": 0, "loss_rank_avg": 0.07520703971385956, "step": 3635, "valid_targets_mean": 956.5, "valid_targets_min": 583 }, { "epoch": 5.824, "grad_norm": 0.9595544410014674, "learning_rate": 3.351237575730695e-06, "loss": 0.0761, "loss_nan_ranks": 0, "loss_rank_avg": 0.057514142245054245, "step": 3640, "valid_targets_mean": 808.6, "valid_targets_min": 481 }, { "epoch": 5.832, "grad_norm": 1.0200301521244446, "learning_rate": 3.307153465817219e-06, "loss": 0.0652, "loss_nan_ranks": 0, "loss_rank_avg": 0.0669136494398117, "step": 3645, "valid_targets_mean": 921.6, "valid_targets_min": 451 }, { "epoch": 5.84, "grad_norm": 0.9250628512871966, "learning_rate": 3.263335084778856e-06, "loss": 0.0633, "loss_nan_ranks": 0, "loss_rank_avg": 0.06535211950540543, "step": 3650, "valid_targets_mean": 899.9, "valid_targets_min": 521 }, { "epoch": 5.848, "grad_norm": 0.7333030773657969, "learning_rate": 3.2197831301484816e-06, "loss": 0.0769, "loss_nan_ranks": 0, "loss_rank_avg": 0.058711059391498566, "step": 3655, "valid_targets_mean": 856.2, "valid_targets_min": 449 }, { "epoch": 5.856, "grad_norm": 0.9157327185000818, "learning_rate": 3.1764982952177805e-06, "loss": 0.071, "loss_nan_ranks": 0, "loss_rank_avg": 0.06972061097621918, "step": 3660, "valid_targets_mean": 853.6, "valid_targets_min": 492 }, { "epoch": 5.864, "grad_norm": 0.9031285032639718, "learning_rate": 3.1334812690262507e-06, "loss": 0.0713, "loss_nan_ranks": 0, "loss_rank_avg": 0.07708781957626343, "step": 3665, "valid_targets_mean": 831.5, "valid_targets_min": 496 }, { "epoch": 5.872, "grad_norm": 0.7166417969880111, "learning_rate": 3.0907327363502084e-06, "loss": 0.0687, "loss_nan_ranks": 0, "loss_rank_avg": 0.06776954233646393, "step": 3670, "valid_targets_mean": 950.5, "valid_targets_min": 554 }, { "epoch": 5.88, "grad_norm": 0.8507080895949477, "learning_rate": 3.0482533776918987e-06, "loss": 0.0683, "loss_nan_ranks": 0, "loss_rank_avg": 0.06763128936290741, "step": 3675, "valid_targets_mean": 866.5, "valid_targets_min": 556 }, { "epoch": 5.888, "grad_norm": 0.6964626437196108, "learning_rate": 3.0060438692686533e-06, "loss": 0.0664, "loss_nan_ranks": 0, "loss_rank_avg": 0.05540402978658676, "step": 3680, "valid_targets_mean": 845.1, "valid_targets_min": 512 }, { "epoch": 5.896, "grad_norm": 0.911090063477602, "learning_rate": 2.964104883002139e-06, "loss": 0.0664, "loss_nan_ranks": 0, "loss_rank_avg": 0.07893071323633194, "step": 3685, "valid_targets_mean": 862.6, "valid_targets_min": 479 }, { "epoch": 5.904, "grad_norm": 0.7225504639275782, "learning_rate": 2.9224370865076457e-06, "loss": 0.1052, "loss_nan_ranks": 0, "loss_rank_avg": 0.05786134675145149, "step": 3690, "valid_targets_mean": 898.9, "valid_targets_min": 540 }, { "epoch": 5.912, "grad_norm": 0.8157459567342921, "learning_rate": 2.8810411430834716e-06, "loss": 0.09, "loss_nan_ranks": 0, "loss_rank_avg": 0.10838191211223602, "step": 3695, "valid_targets_mean": 1518.1, "valid_targets_min": 513 }, { "epoch": 5.92, "grad_norm": 2.0440615035308776, "learning_rate": 2.8399177117003595e-06, "loss": 0.0741, "loss_nan_ranks": 0, "loss_rank_avg": 0.10617715120315552, "step": 3700, "valid_targets_mean": 907.9, "valid_targets_min": 465 }, { "epoch": 5.928, "grad_norm": 0.7016965628788885, "learning_rate": 2.7990674469910085e-06, "loss": 0.0627, "loss_nan_ranks": 0, "loss_rank_avg": 0.06436631828546524, "step": 3705, "valid_targets_mean": 888.1, "valid_targets_min": 394 }, { "epoch": 5.936, "grad_norm": 0.8356053222880082, "learning_rate": 2.7584909992396515e-06, "loss": 0.0683, "loss_nan_ranks": 0, "loss_rank_avg": 0.06576769798994064, "step": 3710, "valid_targets_mean": 802.6, "valid_targets_min": 520 }, { "epoch": 5.944, "grad_norm": 0.8700320179379732, "learning_rate": 2.7181890143716995e-06, "loss": 0.0831, "loss_nan_ranks": 0, "loss_rank_avg": 0.1040990948677063, "step": 3715, "valid_targets_mean": 1067.1, "valid_targets_min": 504 }, { "epoch": 5.952, "grad_norm": 0.6909915063769616, "learning_rate": 2.6781621339434717e-06, "loss": 0.0763, "loss_nan_ranks": 0, "loss_rank_avg": 0.0548010990023613, "step": 3720, "valid_targets_mean": 853.2, "valid_targets_min": 510 }, { "epoch": 5.96, "grad_norm": 0.8709969244384487, "learning_rate": 2.638410995131966e-06, "loss": 0.0887, "loss_nan_ranks": 0, "loss_rank_avg": 0.06444862484931946, "step": 3725, "valid_targets_mean": 900.3, "valid_targets_min": 468 }, { "epoch": 5.968, "grad_norm": 0.7819427220253011, "learning_rate": 2.5989362307247313e-06, "loss": 0.0632, "loss_nan_ranks": 0, "loss_rank_avg": 0.059864625334739685, "step": 3730, "valid_targets_mean": 990.4, "valid_targets_min": 479 }, { "epoch": 5.976, "grad_norm": 0.8859049708419192, "learning_rate": 2.5597384691097847e-06, "loss": 0.0778, "loss_nan_ranks": 0, "loss_rank_avg": 0.07407764345407486, "step": 3735, "valid_targets_mean": 701.7, "valid_targets_min": 466 }, { "epoch": 5.984, "grad_norm": 0.7938025501995064, "learning_rate": 2.520818334265611e-06, "loss": 0.0707, "loss_nan_ranks": 0, "loss_rank_avg": 0.06618218123912811, "step": 3740, "valid_targets_mean": 992.5, "valid_targets_min": 515 }, { "epoch": 5.992, "grad_norm": 0.8889342642276553, "learning_rate": 2.482176445751232e-06, "loss": 0.0995, "loss_nan_ranks": 0, "loss_rank_avg": 0.18800382316112518, "step": 3745, "valid_targets_mean": 1662.3, "valid_targets_min": 443 }, { "epoch": 6.0, "grad_norm": 0.7423988202026727, "learning_rate": 2.4438134186963415e-06, "loss": 0.0683, "loss_nan_ranks": 0, "loss_rank_avg": 0.07153650373220444, "step": 3750, "valid_targets_mean": 1792.0, "valid_targets_min": 498 }, { "epoch": 6.008, "grad_norm": 0.6846669028729305, "learning_rate": 2.4057298637915105e-06, "loss": 0.0745, "loss_nan_ranks": 0, "loss_rank_avg": 0.06927240639925003, "step": 3755, "valid_targets_mean": 938.9, "valid_targets_min": 469 }, { "epoch": 6.016, "grad_norm": 0.8174122246512885, "learning_rate": 2.3679263872784717e-06, "loss": 0.0715, "loss_nan_ranks": 0, "loss_rank_avg": 0.094500333070755, "step": 3760, "valid_targets_mean": 937.7, "valid_targets_min": 506 }, { "epoch": 6.024, "grad_norm": 0.731296665772432, "learning_rate": 2.330403590940471e-06, "loss": 0.0603, "loss_nan_ranks": 0, "loss_rank_avg": 0.057936131954193115, "step": 3765, "valid_targets_mean": 927.5, "valid_targets_min": 455 }, { "epoch": 6.032, "grad_norm": 0.7527270039190072, "learning_rate": 2.2931620720926717e-06, "loss": 0.08, "loss_nan_ranks": 0, "loss_rank_avg": 0.05649135261774063, "step": 3770, "valid_targets_mean": 735.4, "valid_targets_min": 365 }, { "epoch": 6.04, "grad_norm": 0.6607880693972132, "learning_rate": 2.256202423572669e-06, "loss": 0.0833, "loss_nan_ranks": 0, "loss_rank_avg": 0.052145324647426605, "step": 3775, "valid_targets_mean": 830.6, "valid_targets_min": 571 }, { "epoch": 6.048, "grad_norm": 0.7674629362054604, "learning_rate": 2.219525233731035e-06, "loss": 0.0829, "loss_nan_ranks": 0, "loss_rank_avg": 0.06597025692462921, "step": 3780, "valid_targets_mean": 860.8, "valid_targets_min": 423 }, { "epoch": 6.056, "grad_norm": 0.8067654335852509, "learning_rate": 2.183131086421961e-06, "loss": 0.0612, "loss_nan_ranks": 0, "loss_rank_avg": 0.06674222648143768, "step": 3785, "valid_targets_mean": 833.1, "valid_targets_min": 509 }, { "epoch": 6.064, "grad_norm": 0.798692828688171, "learning_rate": 2.1470205609939533e-06, "loss": 0.0585, "loss_nan_ranks": 0, "loss_rank_avg": 0.0634201243519783, "step": 3790, "valid_targets_mean": 828.0, "valid_targets_min": 456 }, { "epoch": 6.072, "grad_norm": 0.9309599530401287, "learning_rate": 2.1111942322806335e-06, "loss": 0.0636, "loss_nan_ranks": 0, "loss_rank_avg": 0.11342776566743851, "step": 3795, "valid_targets_mean": 1038.8, "valid_targets_min": 522 }, { "epoch": 6.08, "grad_norm": 0.6895626317148708, "learning_rate": 2.0756526705915635e-06, "loss": 0.0586, "loss_nan_ranks": 0, "loss_rank_avg": 0.05497216805815697, "step": 3800, "valid_targets_mean": 826.1, "valid_targets_min": 489 }, { "epoch": 6.088, "grad_norm": 0.8885058561936798, "learning_rate": 2.0403964417031764e-06, "loss": 0.0745, "loss_nan_ranks": 0, "loss_rank_avg": 0.10158699750900269, "step": 3805, "valid_targets_mean": 920.6, "valid_targets_min": 494 }, { "epoch": 6.096, "grad_norm": 0.6850905802220816, "learning_rate": 2.0054261068497773e-06, "loss": 0.0589, "loss_nan_ranks": 0, "loss_rank_avg": 0.05741908773779869, "step": 3810, "valid_targets_mean": 806.9, "valid_targets_min": 479 }, { "epoch": 6.104, "grad_norm": 0.8697899534749736, "learning_rate": 1.9707422227145922e-06, "loss": 0.066, "loss_nan_ranks": 0, "loss_rank_avg": 0.06361544132232666, "step": 3815, "valid_targets_mean": 1002.5, "valid_targets_min": 512 }, { "epoch": 6.112, "grad_norm": 0.6731926316527731, "learning_rate": 1.936345341420924e-06, "loss": 0.0644, "loss_nan_ranks": 0, "loss_rank_avg": 0.05815357714891434, "step": 3820, "valid_targets_mean": 903.8, "valid_targets_min": 536 }, { "epoch": 6.12, "grad_norm": 0.6944702540923555, "learning_rate": 1.9022360105233507e-06, "loss": 0.0553, "loss_nan_ranks": 0, "loss_rank_avg": 0.052839022129774094, "step": 3825, "valid_targets_mean": 839.6, "valid_targets_min": 549 }, { "epoch": 6.128, "grad_norm": 0.6697596876704403, "learning_rate": 1.8684147729990188e-06, "loss": 0.0628, "loss_nan_ranks": 0, "loss_rank_avg": 0.06421222537755966, "step": 3830, "valid_targets_mean": 1138.3, "valid_targets_min": 578 }, { "epoch": 6.136, "grad_norm": 0.70930236617704, "learning_rate": 1.8348821672389893e-06, "loss": 0.0744, "loss_nan_ranks": 0, "loss_rank_avg": 0.06564471125602722, "step": 3835, "valid_targets_mean": 1003.9, "valid_targets_min": 497 }, { "epoch": 6.144, "grad_norm": 0.7133363267380656, "learning_rate": 1.8016387270396784e-06, "loss": 0.0549, "loss_nan_ranks": 0, "loss_rank_avg": 0.05746018514037132, "step": 3840, "valid_targets_mean": 836.8, "valid_targets_min": 533 }, { "epoch": 6.152, "grad_norm": 1.393721527480361, "learning_rate": 1.7686849815943486e-06, "loss": 0.0612, "loss_nan_ranks": 0, "loss_rank_avg": 0.061105988919734955, "step": 3845, "valid_targets_mean": 855.7, "valid_targets_min": 434 }, { "epoch": 6.16, "grad_norm": 0.8997638717351509, "learning_rate": 1.7360214554847e-06, "loss": 0.0801, "loss_nan_ranks": 0, "loss_rank_avg": 0.06404763460159302, "step": 3850, "valid_targets_mean": 742.3, "valid_targets_min": 445 }, { "epoch": 6.168, "grad_norm": 0.7478734761337703, "learning_rate": 1.703648668672495e-06, "loss": 0.055, "loss_nan_ranks": 0, "loss_rank_avg": 0.05328584462404251, "step": 3855, "valid_targets_mean": 827.1, "valid_targets_min": 462 }, { "epoch": 6.176, "grad_norm": 0.5629299888001849, "learning_rate": 1.6715671364913077e-06, "loss": 0.0599, "loss_nan_ranks": 0, "loss_rank_avg": 0.04512747377157211, "step": 3860, "valid_targets_mean": 956.1, "valid_targets_min": 582 }, { "epoch": 6.184, "grad_norm": 0.7162074927852252, "learning_rate": 1.6397773696383091e-06, "loss": 0.0677, "loss_nan_ranks": 0, "loss_rank_avg": 0.055690545588731766, "step": 3865, "valid_targets_mean": 848.6, "valid_targets_min": 527 }, { "epoch": 6.192, "grad_norm": 0.8245228567205685, "learning_rate": 1.6082798741661321e-06, "loss": 0.0653, "loss_nan_ranks": 0, "loss_rank_avg": 0.05366797372698784, "step": 3870, "valid_targets_mean": 988.2, "valid_targets_min": 537 }, { "epoch": 6.2, "grad_norm": 0.7698977466565333, "learning_rate": 1.5770751514748273e-06, "loss": 0.0645, "loss_nan_ranks": 0, "loss_rank_avg": 0.06601998955011368, "step": 3875, "valid_targets_mean": 886.9, "valid_targets_min": 506 }, { "epoch": 6.208, "grad_norm": 0.861707757423707, "learning_rate": 1.5461636983038686e-06, "loss": 0.0571, "loss_nan_ranks": 0, "loss_rank_avg": 0.06016688793897629, "step": 3880, "valid_targets_mean": 869.3, "valid_targets_min": 573 }, { "epoch": 6.216, "grad_norm": 0.7843893872148268, "learning_rate": 1.5155460067242578e-06, "loss": 0.0579, "loss_nan_ranks": 0, "loss_rank_avg": 0.06555959582328796, "step": 3885, "valid_targets_mean": 822.2, "valid_targets_min": 528 }, { "epoch": 6.224, "grad_norm": 0.7226354746736298, "learning_rate": 1.4852225641306816e-06, "loss": 0.0658, "loss_nan_ranks": 0, "loss_rank_avg": 0.053480908274650574, "step": 3890, "valid_targets_mean": 833.5, "valid_targets_min": 511 }, { "epoch": 6.232, "grad_norm": 0.5802698962131153, "learning_rate": 1.4551938532337607e-06, "loss": 0.0559, "loss_nan_ranks": 0, "loss_rank_avg": 0.05033823102712631, "step": 3895, "valid_targets_mean": 961.6, "valid_targets_min": 475 }, { "epoch": 6.24, "grad_norm": 0.6730808501547918, "learning_rate": 1.4254603520523614e-06, "loss": 0.0643, "loss_nan_ranks": 0, "loss_rank_avg": 0.052310969680547714, "step": 3900, "valid_targets_mean": 1042.4, "valid_targets_min": 653 }, { "epoch": 6.248, "grad_norm": 0.8730341878293636, "learning_rate": 1.3960225339059875e-06, "loss": 0.0747, "loss_nan_ranks": 0, "loss_rank_avg": 0.11478105187416077, "step": 3905, "valid_targets_mean": 1050.5, "valid_targets_min": 506 }, { "epoch": 6.256, "grad_norm": 0.6535203685996509, "learning_rate": 1.3668808674072409e-06, "loss": 0.0657, "loss_nan_ranks": 0, "loss_rank_avg": 0.05904577672481537, "step": 3910, "valid_targets_mean": 892.3, "valid_targets_min": 494 }, { "epoch": 6.264, "grad_norm": 0.7846320544982055, "learning_rate": 1.338035816454375e-06, "loss": 0.0654, "loss_nan_ranks": 0, "loss_rank_avg": 0.05399632081389427, "step": 3915, "valid_targets_mean": 881.1, "valid_targets_min": 528 }, { "epoch": 6.272, "grad_norm": 0.7168968270055656, "learning_rate": 1.3094878402238887e-06, "loss": 0.0768, "loss_nan_ranks": 0, "loss_rank_avg": 0.05075981467962265, "step": 3920, "valid_targets_mean": 851.4, "valid_targets_min": 475 }, { "epoch": 6.28, "grad_norm": 0.8869465898236252, "learning_rate": 1.2812373931632371e-06, "loss": 0.0711, "loss_nan_ranks": 0, "loss_rank_avg": 0.0765652060508728, "step": 3925, "valid_targets_mean": 895.0, "valid_targets_min": 457 }, { "epoch": 6.288, "grad_norm": 0.8358586687918181, "learning_rate": 1.2532849249835932e-06, "loss": 0.0714, "loss_nan_ranks": 0, "loss_rank_avg": 0.06396108120679855, "step": 3930, "valid_targets_mean": 831.7, "valid_targets_min": 459 }, { "epoch": 6.296, "grad_norm": 0.8210165464346263, "learning_rate": 1.2256308806526774e-06, "loss": 0.0599, "loss_nan_ranks": 0, "loss_rank_avg": 0.0579555407166481, "step": 3935, "valid_targets_mean": 654.8, "valid_targets_min": 507 }, { "epoch": 6.304, "grad_norm": 0.6681203570944947, "learning_rate": 1.1982757003876855e-06, "loss": 0.0635, "loss_nan_ranks": 0, "loss_rank_avg": 0.048654213547706604, "step": 3940, "valid_targets_mean": 810.0, "valid_targets_min": 542 }, { "epoch": 6.312, "grad_norm": 0.6799088682049679, "learning_rate": 1.1712198196482793e-06, "loss": 0.0598, "loss_nan_ranks": 0, "loss_rank_avg": 0.05715302377939224, "step": 3945, "valid_targets_mean": 828.6, "valid_targets_min": 537 }, { "epoch": 6.32, "grad_norm": 0.6657347250611597, "learning_rate": 1.1444636691296518e-06, "loss": 0.0774, "loss_nan_ranks": 0, "loss_rank_avg": 0.050616826862096786, "step": 3950, "valid_targets_mean": 813.9, "valid_targets_min": 487 }, { "epoch": 6.328, "grad_norm": 0.7410752665142455, "learning_rate": 1.11800767475567e-06, "loss": 0.087, "loss_nan_ranks": 0, "loss_rank_avg": 0.06187132000923157, "step": 3955, "valid_targets_mean": 1100.0, "valid_targets_min": 431 }, { "epoch": 6.336, "grad_norm": 0.7669300808343251, "learning_rate": 1.0918522576721014e-06, "loss": 0.0587, "loss_nan_ranks": 0, "loss_rank_avg": 0.055733539164066315, "step": 3960, "valid_targets_mean": 838.1, "valid_targets_min": 454 }, { "epoch": 6.344, "grad_norm": 0.8961934706850629, "learning_rate": 1.0659978342399003e-06, "loss": 0.065, "loss_nan_ranks": 0, "loss_rank_avg": 0.08290403336286545, "step": 3965, "valid_targets_mean": 1151.6, "valid_targets_min": 465 }, { "epoch": 6.352, "grad_norm": 0.6391167316161919, "learning_rate": 1.0404448160285897e-06, "loss": 0.0622, "loss_nan_ranks": 0, "loss_rank_avg": 0.051166288554668427, "step": 3970, "valid_targets_mean": 880.2, "valid_targets_min": 519 }, { "epoch": 6.36, "grad_norm": 1.1900372923848188, "learning_rate": 1.0151936098097015e-06, "loss": 0.0932, "loss_nan_ranks": 0, "loss_rank_avg": 0.19820678234100342, "step": 3975, "valid_targets_mean": 1518.1, "valid_targets_min": 479 }, { "epoch": 6.368, "grad_norm": 0.8375789509534456, "learning_rate": 9.902446175503089e-07, "loss": 0.0649, "loss_nan_ranks": 0, "loss_rank_avg": 0.053763311356306076, "step": 3980, "valid_targets_mean": 869.1, "valid_targets_min": 521 }, { "epoch": 6.376, "grad_norm": 0.8871755308243114, "learning_rate": 9.655982364066197e-07, "loss": 0.083, "loss_nan_ranks": 0, "loss_rank_avg": 0.1606995314359665, "step": 3985, "valid_targets_mean": 1649.8, "valid_targets_min": 474 }, { "epoch": 6.384, "grad_norm": 0.8699835456903998, "learning_rate": 9.412548587176595e-07, "loss": 0.0873, "loss_nan_ranks": 0, "loss_rank_avg": 0.1351429671049118, "step": 3990, "valid_targets_mean": 1388.2, "valid_targets_min": 497 }, { "epoch": 6.392, "grad_norm": 0.719732729210347, "learning_rate": 9.172148719990237e-07, "loss": 0.0701, "loss_nan_ranks": 0, "loss_rank_avg": 0.06241889297962189, "step": 3995, "valid_targets_mean": 781.2, "valid_targets_min": 519 }, { "epoch": 6.4, "grad_norm": 0.6986415261986041, "learning_rate": 8.934786589367106e-07, "loss": 0.053, "loss_nan_ranks": 0, "loss_rank_avg": 0.04738050699234009, "step": 4000, "valid_targets_mean": 1008.9, "valid_targets_min": 479 }, { "epoch": 6.408, "grad_norm": 0.722758104299292, "learning_rate": 8.700465973810246e-07, "loss": 0.0595, "loss_nan_ranks": 0, "loss_rank_avg": 0.0537380613386631, "step": 4005, "valid_targets_mean": 900.3, "valid_targets_min": 498 }, { "epoch": 6.416, "grad_norm": 0.8558749091680005, "learning_rate": 8.469190603405719e-07, "loss": 0.0721, "loss_nan_ranks": 0, "loss_rank_avg": 0.0537833496928215, "step": 4010, "valid_targets_mean": 773.1, "valid_targets_min": 483 }, { "epoch": 6.424, "grad_norm": 0.7021990518704619, "learning_rate": 8.240964159763121e-07, "loss": 0.0738, "loss_nan_ranks": 0, "loss_rank_avg": 0.054406896233558655, "step": 4015, "valid_targets_mean": 892.9, "valid_targets_min": 419 }, { "epoch": 6.432, "grad_norm": 0.7542283622774374, "learning_rate": 8.015790275957003e-07, "loss": 0.061, "loss_nan_ranks": 0, "loss_rank_avg": 0.053682684898376465, "step": 4020, "valid_targets_mean": 818.3, "valid_targets_min": 498 }, { "epoch": 6.44, "grad_norm": 0.850281996059853, "learning_rate": 7.793672536469077e-07, "loss": 0.0603, "loss_nan_ranks": 0, "loss_rank_avg": 0.06802020967006683, "step": 4025, "valid_targets_mean": 839.2, "valid_targets_min": 429 }, { "epoch": 6.448, "grad_norm": 0.8076325556915775, "learning_rate": 7.574614477131081e-07, "loss": 0.0766, "loss_nan_ranks": 0, "loss_rank_avg": 0.06391239166259766, "step": 4030, "valid_targets_mean": 813.6, "valid_targets_min": 493 }, { "epoch": 6.456, "grad_norm": 0.9413722677898027, "learning_rate": 7.358619585068583e-07, "loss": 0.0864, "loss_nan_ranks": 0, "loss_rank_avg": 0.11960811913013458, "step": 4035, "valid_targets_mean": 1009.1, "valid_targets_min": 469 }, { "epoch": 6.464, "grad_norm": 0.8084183126035619, "learning_rate": 7.145691298645419e-07, "loss": 0.0586, "loss_nan_ranks": 0, "loss_rank_avg": 0.0709858387708664, "step": 4040, "valid_targets_mean": 964.0, "valid_targets_min": 545 }, { "epoch": 6.4719999999999995, "grad_norm": 0.6245381040239134, "learning_rate": 6.935833007408965e-07, "loss": 0.0537, "loss_nan_ranks": 0, "loss_rank_avg": 0.0447721928358078, "step": 4045, "valid_targets_mean": 1129.9, "valid_targets_min": 440 }, { "epoch": 6.48, "grad_norm": 0.7766133969307153, "learning_rate": 6.729048052036136e-07, "loss": 0.0614, "loss_nan_ranks": 0, "loss_rank_avg": 0.05909734219312668, "step": 4050, "valid_targets_mean": 746.9, "valid_targets_min": 486 }, { "epoch": 6.4879999999999995, "grad_norm": 0.7035951525278532, "learning_rate": 6.52533972428031e-07, "loss": 0.0591, "loss_nan_ranks": 0, "loss_rank_avg": 0.05800524726510048, "step": 4055, "valid_targets_mean": 1011.1, "valid_targets_min": 569 }, { "epoch": 6.496, "grad_norm": 0.8497993216044847, "learning_rate": 6.324711266918826e-07, "loss": 0.0667, "loss_nan_ranks": 0, "loss_rank_avg": 0.06494573503732681, "step": 4060, "valid_targets_mean": 794.0, "valid_targets_min": 512 }, { "epoch": 6.504, "grad_norm": 0.8177891523548564, "learning_rate": 6.127165873701457e-07, "loss": 0.0624, "loss_nan_ranks": 0, "loss_rank_avg": 0.06821263581514359, "step": 4065, "valid_targets_mean": 1191.3, "valid_targets_min": 583 }, { "epoch": 6.5120000000000005, "grad_norm": 0.8433139564163347, "learning_rate": 5.932706689299461e-07, "loss": 0.065, "loss_nan_ranks": 0, "loss_rank_avg": 0.06629593670368195, "step": 4070, "valid_targets_mean": 766.3, "valid_targets_min": 524 }, { "epoch": 6.52, "grad_norm": 0.696116550154397, "learning_rate": 5.741336809255615e-07, "loss": 0.0587, "loss_nan_ranks": 0, "loss_rank_avg": 0.058657385408878326, "step": 4075, "valid_targets_mean": 1665.0, "valid_targets_min": 559 }, { "epoch": 6.5280000000000005, "grad_norm": 0.9008199904333769, "learning_rate": 5.553059279934902e-07, "loss": 0.0643, "loss_nan_ranks": 0, "loss_rank_avg": 0.09079106897115707, "step": 4080, "valid_targets_mean": 1069.2, "valid_targets_min": 456 }, { "epoch": 6.536, "grad_norm": 0.8115733452505096, "learning_rate": 5.36787709847597e-07, "loss": 0.0673, "loss_nan_ranks": 0, "loss_rank_avg": 0.08065254241228104, "step": 4085, "valid_targets_mean": 1330.5, "valid_targets_min": 537 }, { "epoch": 6.5440000000000005, "grad_norm": 0.8006341156338428, "learning_rate": 5.185793212743529e-07, "loss": 0.0716, "loss_nan_ranks": 0, "loss_rank_avg": 0.08026818931102753, "step": 4090, "valid_targets_mean": 861.7, "valid_targets_min": 411 }, { "epoch": 6.552, "grad_norm": 0.6837803065916436, "learning_rate": 5.006810521281335e-07, "loss": 0.0611, "loss_nan_ranks": 0, "loss_rank_avg": 0.04462552070617676, "step": 4095, "valid_targets_mean": 843.3, "valid_targets_min": 490 }, { "epoch": 6.5600000000000005, "grad_norm": 0.9019291713010887, "learning_rate": 4.830931873266065e-07, "loss": 0.1052, "loss_nan_ranks": 0, "loss_rank_avg": 0.11825674772262573, "step": 4100, "valid_targets_mean": 1240.2, "valid_targets_min": 497 }, { "epoch": 6.568, "grad_norm": 0.827696103249896, "learning_rate": 4.658160068462025e-07, "loss": 0.0734, "loss_nan_ranks": 0, "loss_rank_avg": 0.06389281153678894, "step": 4105, "valid_targets_mean": 695.1, "valid_targets_min": 462 }, { "epoch": 6.576, "grad_norm": 0.6539205464048427, "learning_rate": 4.488497857176466e-07, "loss": 0.061, "loss_nan_ranks": 0, "loss_rank_avg": 0.05515346676111221, "step": 4110, "valid_targets_mean": 827.1, "valid_targets_min": 524 }, { "epoch": 6.584, "grad_norm": 0.695414684968317, "learning_rate": 4.321947940215898e-07, "loss": 0.073, "loss_nan_ranks": 0, "loss_rank_avg": 0.05857130512595177, "step": 4115, "valid_targets_mean": 823.6, "valid_targets_min": 531 }, { "epoch": 6.592, "grad_norm": 0.9637059592515878, "learning_rate": 4.1585129688430425e-07, "loss": 0.0554, "loss_nan_ranks": 0, "loss_rank_avg": 0.06732626259326935, "step": 4120, "valid_targets_mean": 868.4, "valid_targets_min": 496 }, { "epoch": 6.6, "grad_norm": 0.7770312997823519, "learning_rate": 3.998195544734706e-07, "loss": 0.0581, "loss_nan_ranks": 0, "loss_rank_avg": 0.050231486558914185, "step": 4125, "valid_targets_mean": 777.0, "valid_targets_min": 452 }, { "epoch": 6.608, "grad_norm": 0.8563576811599827, "learning_rate": 3.840998219940284e-07, "loss": 0.0596, "loss_nan_ranks": 0, "loss_rank_avg": 0.0819033831357956, "step": 4130, "valid_targets_mean": 1120.9, "valid_targets_min": 474 }, { "epoch": 6.616, "grad_norm": 0.7824387896671777, "learning_rate": 3.6869234968411214e-07, "loss": 0.0748, "loss_nan_ranks": 0, "loss_rank_avg": 0.0632677674293518, "step": 4135, "valid_targets_mean": 854.2, "valid_targets_min": 575 }, { "epoch": 6.624, "grad_norm": 0.821997875998823, "learning_rate": 3.5359738281107504e-07, "loss": 0.0573, "loss_nan_ranks": 0, "loss_rank_avg": 0.05922164022922516, "step": 4140, "valid_targets_mean": 923.0, "valid_targets_min": 434 }, { "epoch": 6.632, "grad_norm": 0.8827562626339426, "learning_rate": 3.38815161667585e-07, "loss": 0.0605, "loss_nan_ranks": 0, "loss_rank_avg": 0.062201905995607376, "step": 4145, "valid_targets_mean": 782.9, "valid_targets_min": 487 }, { "epoch": 6.64, "grad_norm": 0.7921463346038955, "learning_rate": 3.24345921567788e-07, "loss": 0.0719, "loss_nan_ranks": 0, "loss_rank_avg": 0.05580519512295723, "step": 4150, "valid_targets_mean": 1073.8, "valid_targets_min": 510 }, { "epoch": 6.648, "grad_norm": 0.7165813690063456, "learning_rate": 3.101898928435754e-07, "loss": 0.0769, "loss_nan_ranks": 0, "loss_rank_avg": 0.08175031840801239, "step": 4155, "valid_targets_mean": 1565.8, "valid_targets_min": 555 }, { "epoch": 6.656, "grad_norm": 0.6656321494894558, "learning_rate": 2.9634730084091343e-07, "loss": 0.0745, "loss_nan_ranks": 0, "loss_rank_avg": 0.05006745457649231, "step": 4160, "valid_targets_mean": 786.4, "valid_targets_min": 503 }, { "epoch": 6.664, "grad_norm": 0.939829256612867, "learning_rate": 2.8281836591624865e-07, "loss": 0.0641, "loss_nan_ranks": 0, "loss_rank_avg": 0.07113055884838104, "step": 4165, "valid_targets_mean": 894.0, "valid_targets_min": 396 }, { "epoch": 6.672, "grad_norm": 0.7696682925719788, "learning_rate": 2.6960330343301033e-07, "loss": 0.063, "loss_nan_ranks": 0, "loss_rank_avg": 0.0579502247273922, "step": 4170, "valid_targets_mean": 786.8, "valid_targets_min": 426 }, { "epoch": 6.68, "grad_norm": 0.9182508899662383, "learning_rate": 2.5670232375817784e-07, "loss": 0.0763, "loss_nan_ranks": 0, "loss_rank_avg": 0.09177736937999725, "step": 4175, "valid_targets_mean": 1085.4, "valid_targets_min": 486 }, { "epoch": 6.688, "grad_norm": 0.8160880687662895, "learning_rate": 2.441156322589322e-07, "loss": 0.1075, "loss_nan_ranks": 0, "loss_rank_avg": 0.11923444271087646, "step": 4180, "valid_targets_mean": 1245.9, "valid_targets_min": 514 }, { "epoch": 6.696, "grad_norm": 0.6590045004958579, "learning_rate": 2.318434292993832e-07, "loss": 0.057, "loss_nan_ranks": 0, "loss_rank_avg": 0.04918127506971359, "step": 4185, "valid_targets_mean": 838.5, "valid_targets_min": 508 }, { "epoch": 6.704, "grad_norm": 0.7931935096125178, "learning_rate": 2.1988591023738514e-07, "loss": 0.0737, "loss_nan_ranks": 0, "loss_rank_avg": 0.05984684079885483, "step": 4190, "valid_targets_mean": 636.6, "valid_targets_min": 526 }, { "epoch": 6.712, "grad_norm": 0.8409861634776767, "learning_rate": 2.0824326542142835e-07, "loss": 0.0752, "loss_nan_ranks": 0, "loss_rank_avg": 0.0892910361289978, "step": 4195, "valid_targets_mean": 1090.1, "valid_targets_min": 420 }, { "epoch": 6.72, "grad_norm": 0.7840113248676499, "learning_rate": 1.9691568018759931e-07, "loss": 0.0794, "loss_nan_ranks": 0, "loss_rank_avg": 0.05649395287036896, "step": 4200, "valid_targets_mean": 911.5, "valid_targets_min": 479 }, { "epoch": 6.728, "grad_norm": 0.8684225809470628, "learning_rate": 1.8590333485664525e-07, "loss": 0.0589, "loss_nan_ranks": 0, "loss_rank_avg": 0.06985966861248016, "step": 4205, "valid_targets_mean": 1026.4, "valid_targets_min": 521 }, { "epoch": 6.736, "grad_norm": 1.273545703399396, "learning_rate": 1.752064047310853e-07, "loss": 0.074, "loss_nan_ranks": 0, "loss_rank_avg": 0.12074144184589386, "step": 4210, "valid_targets_mean": 1021.5, "valid_targets_min": 394 }, { "epoch": 6.744, "grad_norm": 0.843992482033791, "learning_rate": 1.6482506009243949e-07, "loss": 0.0661, "loss_nan_ranks": 0, "loss_rank_avg": 0.06381138414144516, "step": 4215, "valid_targets_mean": 786.6, "valid_targets_min": 536 }, { "epoch": 6.752, "grad_norm": 0.8415630384133468, "learning_rate": 1.5475946619850192e-07, "loss": 0.0668, "loss_nan_ranks": 0, "loss_rank_avg": 0.07749566435813904, "step": 4220, "valid_targets_mean": 989.6, "valid_targets_min": 469 }, { "epoch": 6.76, "grad_norm": 0.7940286483649288, "learning_rate": 1.4500978328071845e-07, "loss": 0.0555, "loss_nan_ranks": 0, "loss_rank_avg": 0.06216109171509743, "step": 4225, "valid_targets_mean": 753.4, "valid_targets_min": 448 }, { "epoch": 6.768, "grad_norm": 0.9055893995439169, "learning_rate": 1.3557616654163775e-07, "loss": 0.0567, "loss_nan_ranks": 0, "loss_rank_avg": 0.06529085338115692, "step": 4230, "valid_targets_mean": 949.4, "valid_targets_min": 482 }, { "epoch": 6.776, "grad_norm": 0.8453635654072907, "learning_rate": 1.264587661524308e-07, "loss": 0.0642, "loss_nan_ranks": 0, "loss_rank_avg": 0.0687166303396225, "step": 4235, "valid_targets_mean": 933.4, "valid_targets_min": 506 }, { "epoch": 6.784, "grad_norm": 1.0395756007158212, "learning_rate": 1.1765772725051084e-07, "loss": 0.0729, "loss_nan_ranks": 0, "loss_rank_avg": 0.08578158169984818, "step": 4240, "valid_targets_mean": 962.8, "valid_targets_min": 459 }, { "epoch": 6.792, "grad_norm": 0.6995744786603656, "learning_rate": 1.0917318993721726e-07, "loss": 0.0724, "loss_nan_ranks": 0, "loss_rank_avg": 0.06010109558701515, "step": 4245, "valid_targets_mean": 1372.9, "valid_targets_min": 615 }, { "epoch": 6.8, "grad_norm": 0.9882137918825006, "learning_rate": 1.0100528927558861e-07, "loss": 0.0947, "loss_nan_ranks": 0, "loss_rank_avg": 0.22277545928955078, "step": 4250, "valid_targets_mean": 1339.2, "valid_targets_min": 443 }, { "epoch": 6.808, "grad_norm": 0.9456999313109349, "learning_rate": 9.31541552882087e-08, "loss": 0.077, "loss_nan_ranks": 0, "loss_rank_avg": 0.11338469386100769, "step": 4255, "valid_targets_mean": 1125.1, "valid_targets_min": 445 }, { "epoch": 6.816, "grad_norm": 0.781828119425798, "learning_rate": 8.561991295514161e-08, "loss": 0.0605, "loss_nan_ranks": 0, "loss_rank_avg": 0.06928466260433197, "step": 4260, "valid_targets_mean": 916.7, "valid_targets_min": 447 }, { "epoch": 6.824, "grad_norm": 0.6906982313965497, "learning_rate": 7.840268221193548e-08, "loss": 0.0662, "loss_nan_ranks": 0, "loss_rank_avg": 0.04883572459220886, "step": 4265, "valid_targets_mean": 926.1, "valid_targets_min": 495 }, { "epoch": 6.832, "grad_norm": 0.6959903444394521, "learning_rate": 7.150257794772186e-08, "loss": 0.1008, "loss_nan_ranks": 0, "loss_rank_avg": 0.048169322311878204, "step": 4270, "valid_targets_mean": 747.8, "valid_targets_min": 442 }, { "epoch": 6.84, "grad_norm": 1.0189241815550472, "learning_rate": 6.491971000337938e-08, "loss": 0.0682, "loss_nan_ranks": 0, "loss_rank_avg": 0.07251983880996704, "step": 4275, "valid_targets_mean": 1026.7, "valid_targets_min": 508 }, { "epoch": 6.848, "grad_norm": 0.7930936377631168, "learning_rate": 5.8654183169788435e-08, "loss": 0.0562, "loss_nan_ranks": 0, "loss_rank_avg": 0.05123412609100342, "step": 4280, "valid_targets_mean": 734.1, "valid_targets_min": 443 }, { "epoch": 6.856, "grad_norm": 0.845901888009316, "learning_rate": 5.270609718616593e-08, "loss": 0.0769, "loss_nan_ranks": 0, "loss_rank_avg": 0.05024448037147522, "step": 4285, "valid_targets_mean": 1004.8, "valid_targets_min": 498 }, { "epoch": 6.864, "grad_norm": 0.7677871662641823, "learning_rate": 4.70755467384687e-08, "loss": 0.0744, "loss_nan_ranks": 0, "loss_rank_avg": 0.058658793568611145, "step": 4290, "valid_targets_mean": 911.6, "valid_targets_min": 533 }, { "epoch": 6.872, "grad_norm": 0.9839547388014913, "learning_rate": 4.176262145789478e-08, "loss": 0.0849, "loss_nan_ranks": 0, "loss_rank_avg": 0.15095025300979614, "step": 4295, "valid_targets_mean": 1179.4, "valid_targets_min": 501 }, { "epoch": 6.88, "grad_norm": 0.6814461361019861, "learning_rate": 3.676740591945782e-08, "loss": 0.0667, "loss_nan_ranks": 0, "loss_rank_avg": 0.05614306405186653, "step": 4300, "valid_targets_mean": 849.9, "valid_targets_min": 443 }, { "epoch": 6.888, "grad_norm": 0.7725328157167015, "learning_rate": 3.208997964062821e-08, "loss": 0.0735, "loss_nan_ranks": 0, "loss_rank_avg": 0.055312514305114746, "step": 4305, "valid_targets_mean": 731.9, "valid_targets_min": 476 }, { "epoch": 6.896, "grad_norm": 0.9230695486583556, "learning_rate": 2.773041708008295e-08, "loss": 0.0654, "loss_nan_ranks": 0, "loss_rank_avg": 0.09205324947834015, "step": 4310, "valid_targets_mean": 1044.9, "valid_targets_min": 555 }, { "epoch": 6.904, "grad_norm": 0.9005169578053986, "learning_rate": 2.3688787636511057e-08, "loss": 0.058, "loss_nan_ranks": 0, "loss_rank_avg": 0.08456471562385559, "step": 4315, "valid_targets_mean": 1237.9, "valid_targets_min": 444 }, { "epoch": 6.912, "grad_norm": 1.7069733059573364, "learning_rate": 1.9965155647507782e-08, "loss": 0.0698, "loss_nan_ranks": 0, "loss_rank_avg": 0.05954921245574951, "step": 4320, "valid_targets_mean": 886.6, "valid_targets_min": 501 }, { "epoch": 6.92, "grad_norm": 0.9240349041779561, "learning_rate": 1.655958038855765e-08, "loss": 0.0886, "loss_nan_ranks": 0, "loss_rank_avg": 0.07848560810089111, "step": 4325, "valid_targets_mean": 1234.1, "valid_targets_min": 558 }, { "epoch": 6.928, "grad_norm": 0.7716220389789027, "learning_rate": 1.3472116072084096e-08, "loss": 0.0569, "loss_nan_ranks": 0, "loss_rank_avg": 0.05979716777801514, "step": 4330, "valid_targets_mean": 837.9, "valid_targets_min": 441 }, { "epoch": 6.936, "grad_norm": 0.7776719180845648, "learning_rate": 1.0702811846590167e-08, "loss": 0.0629, "loss_nan_ranks": 0, "loss_rank_avg": 0.0593021959066391, "step": 4335, "valid_targets_mean": 819.6, "valid_targets_min": 478 }, { "epoch": 6.944, "grad_norm": 0.797242975709299, "learning_rate": 8.251711795876916e-09, "loss": 0.0627, "loss_nan_ranks": 0, "loss_rank_avg": 0.060170456767082214, "step": 4340, "valid_targets_mean": 807.1, "valid_targets_min": 418 }, { "epoch": 6.952, "grad_norm": 0.7339526517022201, "learning_rate": 6.1188549383373044e-09, "loss": 0.0589, "loss_nan_ranks": 0, "loss_rank_avg": 0.06010982766747475, "step": 4345, "valid_targets_mean": 789.3, "valid_targets_min": 474 }, { "epoch": 6.96, "grad_norm": 0.8389381067646968, "learning_rate": 4.304275226338916e-09, "loss": 0.0679, "loss_nan_ranks": 0, "loss_rank_avg": 0.06441819667816162, "step": 4350, "valid_targets_mean": 1073.7, "valid_targets_min": 383 }, { "epoch": 6.968, "grad_norm": 0.7659262300618347, "learning_rate": 2.8080015456799503e-09, "loss": 0.0542, "loss_nan_ranks": 0, "loss_rank_avg": 0.05551183596253395, "step": 4355, "valid_targets_mean": 826.9, "valid_targets_min": 509 }, { "epoch": 6.976, "grad_norm": 0.7634346959399079, "learning_rate": 1.6300577151340257e-09, "loss": 0.0598, "loss_nan_ranks": 0, "loss_rank_avg": 0.05888133496046066, "step": 4360, "valid_targets_mean": 951.8, "valid_targets_min": 417 }, { "epoch": 6.984, "grad_norm": 0.8460671107111588, "learning_rate": 7.70462486070489e-10, "loss": 0.0554, "loss_nan_ranks": 0, "loss_rank_avg": 0.04984348267316818, "step": 4365, "valid_targets_mean": 901.7, "valid_targets_min": 678 }, { "epoch": 6.992, "grad_norm": 0.7850119216540071, "learning_rate": 2.2922954214799065e-10, "loss": 0.0632, "loss_nan_ranks": 0, "loss_rank_avg": 0.06389277428388596, "step": 4370, "valid_targets_mean": 891.9, "valid_targets_min": 583 }, { "epoch": 7.0, "grad_norm": 0.9340733771796962, "learning_rate": 6.367499107984288e-12, "loss": 0.0571, "loss_nan_ranks": 0, "loss_rank_avg": 0.05963999778032303, "step": 4375, "valid_targets_mean": 783.6, "valid_targets_min": 551 }, { "epoch": 7.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.05963999778032303, "step": 4375, "total_flos": 262473517694976.0, "train_loss": 0.13307374988964626, "train_runtime": 6395.8234, "train_samples_per_second": 10.941, "train_steps_per_second": 0.684, "valid_targets_mean": 783.6, "valid_targets_min": 551 } ], "logging_steps": 5, "max_steps": 4375, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 262473517694976.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }