9673 lines
257 KiB
JSON
9673 lines
257 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 4375,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.008,
|
||
|
|
"grad_norm": 18.4569905168907,
|
||
|
|
"learning_rate": 3.6529680365296803e-07,
|
||
|
|
"loss": 0.7031,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6569720506668091,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 1009.4,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.016,
|
||
|
|
"grad_norm": 18.37728431878093,
|
||
|
|
"learning_rate": 8.219178082191781e-07,
|
||
|
|
"loss": 0.6996,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6515499353408813,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 991.0,
|
||
|
|
"valid_targets_min": 471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.024,
|
||
|
|
"grad_norm": 16.431678071793698,
|
||
|
|
"learning_rate": 1.278538812785388e-06,
|
||
|
|
"loss": 0.6774,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6113685965538025,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 998.6,
|
||
|
|
"valid_targets_min": 562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.032,
|
||
|
|
"grad_norm": 15.86978299079565,
|
||
|
|
"learning_rate": 1.7351598173515982e-06,
|
||
|
|
"loss": 0.6387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.609423816204071,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 895.1,
|
||
|
|
"valid_targets_min": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04,
|
||
|
|
"grad_norm": 9.041629780491458,
|
||
|
|
"learning_rate": 2.191780821917808e-06,
|
||
|
|
"loss": 0.5928,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5422854423522949,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 998.1,
|
||
|
|
"valid_targets_min": 459
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.048,
|
||
|
|
"grad_norm": 5.754764448188752,
|
||
|
|
"learning_rate": 2.6484018264840183e-06,
|
||
|
|
"loss": 0.523,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4981729984283447,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 965.2,
|
||
|
|
"valid_targets_min": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.056,
|
||
|
|
"grad_norm": 3.3907319203399213,
|
||
|
|
"learning_rate": 3.1050228310502285e-06,
|
||
|
|
"loss": 0.496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6502364873886108,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 1243.8,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.064,
|
||
|
|
"grad_norm": 3.282314265195484,
|
||
|
|
"learning_rate": 3.5616438356164386e-06,
|
||
|
|
"loss": 0.4792,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4102541208267212,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 827.4,
|
||
|
|
"valid_targets_min": 584
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.072,
|
||
|
|
"grad_norm": 2.2441663968283954,
|
||
|
|
"learning_rate": 4.018264840182649e-06,
|
||
|
|
"loss": 0.4362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4924893379211426,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 1066.7,
|
||
|
|
"valid_targets_min": 563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08,
|
||
|
|
"grad_norm": 2.2276595339792884,
|
||
|
|
"learning_rate": 4.4748858447488585e-06,
|
||
|
|
"loss": 0.4237,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33928295969963074,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 683.0,
|
||
|
|
"valid_targets_min": 472
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.088,
|
||
|
|
"grad_norm": 1.5634112203072432,
|
||
|
|
"learning_rate": 4.931506849315069e-06,
|
||
|
|
"loss": 0.3485,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3636552691459656,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 839.6,
|
||
|
|
"valid_targets_min": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.096,
|
||
|
|
"grad_norm": 1.4191405974580216,
|
||
|
|
"learning_rate": 5.388127853881279e-06,
|
||
|
|
"loss": 0.327,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2820422649383545,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 844.1,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.104,
|
||
|
|
"grad_norm": 1.23315150232887,
|
||
|
|
"learning_rate": 5.8447488584474885e-06,
|
||
|
|
"loss": 0.3257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2875695824623108,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 892.1,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.112,
|
||
|
|
"grad_norm": 1.0958845246431541,
|
||
|
|
"learning_rate": 6.301369863013699e-06,
|
||
|
|
"loss": 0.2519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3263506293296814,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 1024.4,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12,
|
||
|
|
"grad_norm": 1.4675627271539373,
|
||
|
|
"learning_rate": 6.757990867579909e-06,
|
||
|
|
"loss": 0.293,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28500908613204956,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 928.7,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.128,
|
||
|
|
"grad_norm": 0.9999302683084342,
|
||
|
|
"learning_rate": 7.214611872146119e-06,
|
||
|
|
"loss": 0.3002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28047606348991394,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 969.8,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.136,
|
||
|
|
"grad_norm": 1.2285241043015935,
|
||
|
|
"learning_rate": 7.671232876712329e-06,
|
||
|
|
"loss": 0.2784,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26190608739852905,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 707.2,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.144,
|
||
|
|
"grad_norm": 1.1912365240631553,
|
||
|
|
"learning_rate": 8.127853881278539e-06,
|
||
|
|
"loss": 0.3047,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.38986343145370483,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 1021.1,
|
||
|
|
"valid_targets_min": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.152,
|
||
|
|
"grad_norm": 0.9470584016240954,
|
||
|
|
"learning_rate": 8.584474885844748e-06,
|
||
|
|
"loss": 0.2826,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20783933997154236,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 854.7,
|
||
|
|
"valid_targets_min": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16,
|
||
|
|
"grad_norm": 1.0266730827392707,
|
||
|
|
"learning_rate": 9.04109589041096e-06,
|
||
|
|
"loss": 0.2661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.257321834564209,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 810.8,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.168,
|
||
|
|
"grad_norm": 0.9796631800596888,
|
||
|
|
"learning_rate": 9.49771689497717e-06,
|
||
|
|
"loss": 0.2297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2218109667301178,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 875.9,
|
||
|
|
"valid_targets_min": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.176,
|
||
|
|
"grad_norm": 0.9414981219144624,
|
||
|
|
"learning_rate": 9.95433789954338e-06,
|
||
|
|
"loss": 0.2492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2698684632778168,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 966.4,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.184,
|
||
|
|
"grad_norm": 0.9364497568626377,
|
||
|
|
"learning_rate": 1.0410958904109589e-05,
|
||
|
|
"loss": 0.2368,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2731756269931793,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 974.8,
|
||
|
|
"valid_targets_min": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.192,
|
||
|
|
"grad_norm": 1.0965744906125983,
|
||
|
|
"learning_rate": 1.08675799086758e-05,
|
||
|
|
"loss": 0.2206,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26202571392059326,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 929.9,
|
||
|
|
"valid_targets_min": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2,
|
||
|
|
"grad_norm": 1.0127475793360836,
|
||
|
|
"learning_rate": 1.132420091324201e-05,
|
||
|
|
"loss": 0.2676,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3210471272468567,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 904.8,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.208,
|
||
|
|
"grad_norm": 0.8732913297809469,
|
||
|
|
"learning_rate": 1.178082191780822e-05,
|
||
|
|
"loss": 0.2073,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15821754932403564,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 789.3,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.216,
|
||
|
|
"grad_norm": 1.1510599786111335,
|
||
|
|
"learning_rate": 1.223744292237443e-05,
|
||
|
|
"loss": 0.2252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24508722126483917,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 804.2,
|
||
|
|
"valid_targets_min": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.224,
|
||
|
|
"grad_norm": 1.996701902148517,
|
||
|
|
"learning_rate": 1.2694063926940641e-05,
|
||
|
|
"loss": 0.2402,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2965832054615021,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 902.7,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.232,
|
||
|
|
"grad_norm": 1.0173663666021984,
|
||
|
|
"learning_rate": 1.3150684931506849e-05,
|
||
|
|
"loss": 0.2633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35601651668548584,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 1106.1,
|
||
|
|
"valid_targets_min": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24,
|
||
|
|
"grad_norm": 0.9949771832156836,
|
||
|
|
"learning_rate": 1.360730593607306e-05,
|
||
|
|
"loss": 0.2359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2783992290496826,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 911.1,
|
||
|
|
"valid_targets_min": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.248,
|
||
|
|
"grad_norm": 0.9181491942298239,
|
||
|
|
"learning_rate": 1.406392694063927e-05,
|
||
|
|
"loss": 0.2558,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16628001630306244,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 814.1,
|
||
|
|
"valid_targets_min": 503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.256,
|
||
|
|
"grad_norm": 0.974921377150203,
|
||
|
|
"learning_rate": 1.4520547945205482e-05,
|
||
|
|
"loss": 0.2168,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2327520251274109,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 975.6,
|
||
|
|
"valid_targets_min": 539
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.264,
|
||
|
|
"grad_norm": 0.8330947154576838,
|
||
|
|
"learning_rate": 1.497716894977169e-05,
|
||
|
|
"loss": 0.2905,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3683410882949829,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 1346.4,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.272,
|
||
|
|
"grad_norm": 1.071730464304723,
|
||
|
|
"learning_rate": 1.54337899543379e-05,
|
||
|
|
"loss": 0.2268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32522037625312805,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 993.7,
|
||
|
|
"valid_targets_min": 547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28,
|
||
|
|
"grad_norm": 1.0956676683570343,
|
||
|
|
"learning_rate": 1.589041095890411e-05,
|
||
|
|
"loss": 0.229,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19170644879341125,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 701.1,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.288,
|
||
|
|
"grad_norm": 0.8852839703904614,
|
||
|
|
"learning_rate": 1.634703196347032e-05,
|
||
|
|
"loss": 0.2205,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1616896688938141,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 836.2,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.296,
|
||
|
|
"grad_norm": 0.9638350793584443,
|
||
|
|
"learning_rate": 1.680365296803653e-05,
|
||
|
|
"loss": 0.1893,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18771511316299438,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 769.9,
|
||
|
|
"valid_targets_min": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.304,
|
||
|
|
"grad_norm": 1.063824391310194,
|
||
|
|
"learning_rate": 1.726027397260274e-05,
|
||
|
|
"loss": 0.2607,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22938010096549988,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 948.7,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.312,
|
||
|
|
"grad_norm": 0.9153693008635697,
|
||
|
|
"learning_rate": 1.771689497716895e-05,
|
||
|
|
"loss": 0.2549,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30292022228240967,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 1185.6,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32,
|
||
|
|
"grad_norm": 1.2046470357044166,
|
||
|
|
"learning_rate": 1.8173515981735163e-05,
|
||
|
|
"loss": 0.1737,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1771753877401352,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 823.1,
|
||
|
|
"valid_targets_min": 406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.328,
|
||
|
|
"grad_norm": 0.9946763380079855,
|
||
|
|
"learning_rate": 1.863013698630137e-05,
|
||
|
|
"loss": 0.2218,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21841683983802795,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 911.6,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.336,
|
||
|
|
"grad_norm": 0.9553594171492042,
|
||
|
|
"learning_rate": 1.9086757990867582e-05,
|
||
|
|
"loss": 0.2307,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17415133118629456,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 757.9,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.344,
|
||
|
|
"grad_norm": 1.0370039920938834,
|
||
|
|
"learning_rate": 1.954337899543379e-05,
|
||
|
|
"loss": 0.2457,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17233221232891083,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 731.9,
|
||
|
|
"valid_targets_min": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.352,
|
||
|
|
"grad_norm": 1.0056651249289874,
|
||
|
|
"learning_rate": 2e-05,
|
||
|
|
"loss": 0.216,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23098325729370117,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 868.7,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36,
|
||
|
|
"grad_norm": 0.8927780696688123,
|
||
|
|
"learning_rate": 2.045662100456621e-05,
|
||
|
|
"loss": 0.2311,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20995758473873138,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 893.7,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.368,
|
||
|
|
"grad_norm": 0.9329266167157003,
|
||
|
|
"learning_rate": 2.0913242009132424e-05,
|
||
|
|
"loss": 0.1906,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24100714921951294,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 965.1,
|
||
|
|
"valid_targets_min": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.376,
|
||
|
|
"grad_norm": 0.9597984786382444,
|
||
|
|
"learning_rate": 2.1369863013698632e-05,
|
||
|
|
"loss": 0.2111,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18597939610481262,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 772.9,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.384,
|
||
|
|
"grad_norm": 0.9528637578725856,
|
||
|
|
"learning_rate": 2.182648401826484e-05,
|
||
|
|
"loss": 0.2189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16806253790855408,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 764.3,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.392,
|
||
|
|
"grad_norm": 1.134206080982455,
|
||
|
|
"learning_rate": 2.2283105022831052e-05,
|
||
|
|
"loss": 0.25,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18358182907104492,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 800.2,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4,
|
||
|
|
"grad_norm": 0.8772583543749906,
|
||
|
|
"learning_rate": 2.2739726027397263e-05,
|
||
|
|
"loss": 0.1896,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1544710397720337,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 854.1,
|
||
|
|
"valid_targets_min": 558
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.408,
|
||
|
|
"grad_norm": 1.1118411973916886,
|
||
|
|
"learning_rate": 2.3196347031963475e-05,
|
||
|
|
"loss": 0.1909,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17419582605361938,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 780.8,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.416,
|
||
|
|
"grad_norm": 0.9732009947953545,
|
||
|
|
"learning_rate": 2.3652968036529683e-05,
|
||
|
|
"loss": 0.2025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18934506177902222,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 709.7,
|
||
|
|
"valid_targets_min": 466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.424,
|
||
|
|
"grad_norm": 0.7748866871282665,
|
||
|
|
"learning_rate": 2.410958904109589e-05,
|
||
|
|
"loss": 0.2181,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18687203526496887,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 1078.9,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.432,
|
||
|
|
"grad_norm": 0.9657748196334329,
|
||
|
|
"learning_rate": 2.4566210045662106e-05,
|
||
|
|
"loss": 0.221,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15517109632492065,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 790.0,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44,
|
||
|
|
"grad_norm": 0.8792635823194007,
|
||
|
|
"learning_rate": 2.5022831050228314e-05,
|
||
|
|
"loss": 0.2554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3027264475822449,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 1207.8,
|
||
|
|
"valid_targets_min": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.448,
|
||
|
|
"grad_norm": 0.9037898067976381,
|
||
|
|
"learning_rate": 2.547945205479452e-05,
|
||
|
|
"loss": 0.2211,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2351336032152176,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 988.6,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.456,
|
||
|
|
"grad_norm": 0.8560337432929743,
|
||
|
|
"learning_rate": 2.593607305936073e-05,
|
||
|
|
"loss": 0.1959,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18937864899635315,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 1041.7,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.464,
|
||
|
|
"grad_norm": 0.8737234325775536,
|
||
|
|
"learning_rate": 2.6392694063926944e-05,
|
||
|
|
"loss": 0.1977,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18464210629463196,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 934.6,
|
||
|
|
"valid_targets_min": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.472,
|
||
|
|
"grad_norm": 0.905138785163492,
|
||
|
|
"learning_rate": 2.6849315068493153e-05,
|
||
|
|
"loss": 0.2144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2070557177066803,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 959.8,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48,
|
||
|
|
"grad_norm": 0.9681305277619024,
|
||
|
|
"learning_rate": 2.7305936073059364e-05,
|
||
|
|
"loss": 0.2019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1667182743549347,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 756.1,
|
||
|
|
"valid_targets_min": 447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.488,
|
||
|
|
"grad_norm": 0.7668950133552566,
|
||
|
|
"learning_rate": 2.7762557077625572e-05,
|
||
|
|
"loss": 0.2044,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16860172152519226,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 972.9,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.496,
|
||
|
|
"grad_norm": 0.923077750951701,
|
||
|
|
"learning_rate": 2.8219178082191783e-05,
|
||
|
|
"loss": 0.2591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2699459195137024,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 1024.6,
|
||
|
|
"valid_targets_min": 432
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.504,
|
||
|
|
"grad_norm": 0.8907714935859932,
|
||
|
|
"learning_rate": 2.8675799086757995e-05,
|
||
|
|
"loss": 0.2433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26105940341949463,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 979.0,
|
||
|
|
"valid_targets_min": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.512,
|
||
|
|
"grad_norm": 1.0158109379952605,
|
||
|
|
"learning_rate": 2.9132420091324203e-05,
|
||
|
|
"loss": 0.2125,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17170169949531555,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 846.8,
|
||
|
|
"valid_targets_min": 418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.52,
|
||
|
|
"grad_norm": 0.8773836099643207,
|
||
|
|
"learning_rate": 2.958904109589041e-05,
|
||
|
|
"loss": 0.1986,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21758343279361725,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 1011.7,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.528,
|
||
|
|
"grad_norm": 0.8877924719800544,
|
||
|
|
"learning_rate": 3.0045662100456626e-05,
|
||
|
|
"loss": 0.1905,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20519107580184937,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 780.4,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.536,
|
||
|
|
"grad_norm": 0.8401077516113821,
|
||
|
|
"learning_rate": 3.0502283105022834e-05,
|
||
|
|
"loss": 0.2333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16374966502189636,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 917.1,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.544,
|
||
|
|
"grad_norm": 0.9699448470038969,
|
||
|
|
"learning_rate": 3.0958904109589045e-05,
|
||
|
|
"loss": 0.2358,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2687930762767792,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 944.8,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.552,
|
||
|
|
"grad_norm": 1.0452101409572365,
|
||
|
|
"learning_rate": 3.141552511415525e-05,
|
||
|
|
"loss": 0.2214,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23149526119232178,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 867.1,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.56,
|
||
|
|
"grad_norm": 1.1104839128029447,
|
||
|
|
"learning_rate": 3.187214611872147e-05,
|
||
|
|
"loss": 0.2009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16360512375831604,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 791.6,
|
||
|
|
"valid_targets_min": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.568,
|
||
|
|
"grad_norm": 0.9640804406336958,
|
||
|
|
"learning_rate": 3.2328767123287676e-05,
|
||
|
|
"loss": 0.2185,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23180308938026428,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 931.9,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.576,
|
||
|
|
"grad_norm": 0.951770582924293,
|
||
|
|
"learning_rate": 3.2785388127853884e-05,
|
||
|
|
"loss": 0.2156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23412451148033142,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 845.6,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.584,
|
||
|
|
"grad_norm": 0.8066551360509446,
|
||
|
|
"learning_rate": 3.324200913242009e-05,
|
||
|
|
"loss": 0.2161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16541028022766113,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 892.4,
|
||
|
|
"valid_targets_min": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.592,
|
||
|
|
"grad_norm": 0.8950754907204298,
|
||
|
|
"learning_rate": 3.369863013698631e-05,
|
||
|
|
"loss": 0.2108,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2536185681819916,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 959.8,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6,
|
||
|
|
"grad_norm": 0.8557198986382014,
|
||
|
|
"learning_rate": 3.4155251141552515e-05,
|
||
|
|
"loss": 0.1744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14356130361557007,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 779.0,
|
||
|
|
"valid_targets_min": 553
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.608,
|
||
|
|
"grad_norm": 0.80119065982916,
|
||
|
|
"learning_rate": 3.461187214611872e-05,
|
||
|
|
"loss": 0.2034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.273703932762146,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 1140.9,
|
||
|
|
"valid_targets_min": 454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.616,
|
||
|
|
"grad_norm": 0.8725319150358779,
|
||
|
|
"learning_rate": 3.506849315068493e-05,
|
||
|
|
"loss": 0.2366,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18104848265647888,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 840.5,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.624,
|
||
|
|
"grad_norm": 0.8081587096181386,
|
||
|
|
"learning_rate": 3.5525114155251146e-05,
|
||
|
|
"loss": 0.2282,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16609951853752136,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 862.6,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.632,
|
||
|
|
"grad_norm": 0.8859866609772926,
|
||
|
|
"learning_rate": 3.5981735159817354e-05,
|
||
|
|
"loss": 0.178,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17506490647792816,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 830.5,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.64,
|
||
|
|
"grad_norm": 0.839676869764214,
|
||
|
|
"learning_rate": 3.643835616438356e-05,
|
||
|
|
"loss": 0.218,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14028877019882202,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 722.8,
|
||
|
|
"valid_targets_min": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.648,
|
||
|
|
"grad_norm": 0.9365083173383925,
|
||
|
|
"learning_rate": 3.689497716894977e-05,
|
||
|
|
"loss": 0.1808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17637063562870026,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 786.9,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.656,
|
||
|
|
"grad_norm": 0.8793841415231017,
|
||
|
|
"learning_rate": 3.7351598173515985e-05,
|
||
|
|
"loss": 0.2499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18461337685585022,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 849.4,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.664,
|
||
|
|
"grad_norm": 0.8290466124079698,
|
||
|
|
"learning_rate": 3.780821917808219e-05,
|
||
|
|
"loss": 0.1796,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1359868049621582,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 780.6,
|
||
|
|
"valid_targets_min": 472
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.672,
|
||
|
|
"grad_norm": 0.8870519393979674,
|
||
|
|
"learning_rate": 3.82648401826484e-05,
|
||
|
|
"loss": 0.2658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.45135176181793213,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 1707.6,
|
||
|
|
"valid_targets_min": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.68,
|
||
|
|
"grad_norm": 0.789933869448683,
|
||
|
|
"learning_rate": 3.8721461187214615e-05,
|
||
|
|
"loss": 0.1907,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15668487548828125,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 866.2,
|
||
|
|
"valid_targets_min": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.688,
|
||
|
|
"grad_norm": 0.7871956491176277,
|
||
|
|
"learning_rate": 3.9178082191780823e-05,
|
||
|
|
"loss": 0.2072,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20149065554141998,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 987.6,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.696,
|
||
|
|
"grad_norm": 0.6989689379036534,
|
||
|
|
"learning_rate": 3.963470319634704e-05,
|
||
|
|
"loss": 0.2233,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1556272655725479,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 929.2,
|
||
|
|
"valid_targets_min": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.704,
|
||
|
|
"grad_norm": 0.8014076411649944,
|
||
|
|
"learning_rate": 3.99999936325009e-05,
|
||
|
|
"loss": 0.2199,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21174079179763794,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 881.0,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.712,
|
||
|
|
"grad_norm": 0.8201622776606732,
|
||
|
|
"learning_rate": 3.9999770770457856e-05,
|
||
|
|
"loss": 0.2333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22932517528533936,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 1038.4,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.72,
|
||
|
|
"grad_norm": 0.8487813327477935,
|
||
|
|
"learning_rate": 3.9999229537513936e-05,
|
||
|
|
"loss": 0.1749,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15758301317691803,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 783.1,
|
||
|
|
"valid_targets_min": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.728,
|
||
|
|
"grad_norm": 0.8952764089193642,
|
||
|
|
"learning_rate": 3.999836994228487e-05,
|
||
|
|
"loss": 0.1984,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26837146282196045,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 901.9,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.736,
|
||
|
|
"grad_norm": 0.8779288729886306,
|
||
|
|
"learning_rate": 3.999719199845432e-05,
|
||
|
|
"loss": 0.1818,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17460289597511292,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 866.8,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.744,
|
||
|
|
"grad_norm": 0.7572742289270304,
|
||
|
|
"learning_rate": 3.999569572477366e-05,
|
||
|
|
"loss": 0.1905,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14431582391262054,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 799.2,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.752,
|
||
|
|
"grad_norm": 0.8242200271581344,
|
||
|
|
"learning_rate": 3.999388114506166e-05,
|
||
|
|
"loss": 0.2043,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14205431938171387,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 804.6,
|
||
|
|
"valid_targets_min": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.76,
|
||
|
|
"grad_norm": 0.790064793882924,
|
||
|
|
"learning_rate": 3.999174828820413e-05,
|
||
|
|
"loss": 0.2032,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25519803166389465,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 1264.1,
|
||
|
|
"valid_targets_min": 586
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.768,
|
||
|
|
"grad_norm": 0.8293696975789525,
|
||
|
|
"learning_rate": 3.998929718815341e-05,
|
||
|
|
"loss": 0.2135,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2700884938240051,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 1072.2,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.776,
|
||
|
|
"grad_norm": 0.7187946308165557,
|
||
|
|
"learning_rate": 3.998652788392792e-05,
|
||
|
|
"loss": 0.1796,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16771897673606873,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 765.6,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.784,
|
||
|
|
"grad_norm": 1.2061081366763156,
|
||
|
|
"learning_rate": 3.9983440419611445e-05,
|
||
|
|
"loss": 0.1842,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.203563392162323,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 874.2,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.792,
|
||
|
|
"grad_norm": 0.8127919624289628,
|
||
|
|
"learning_rate": 3.9980034844352494e-05,
|
||
|
|
"loss": 0.1818,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22209608554840088,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 968.9,
|
||
|
|
"valid_targets_min": 493
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8,
|
||
|
|
"grad_norm": 0.7264376380855042,
|
||
|
|
"learning_rate": 3.9976311212363495e-05,
|
||
|
|
"loss": 0.2122,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19169259071350098,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 931.7,
|
||
|
|
"valid_targets_min": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.808,
|
||
|
|
"grad_norm": 0.7044398835028907,
|
||
|
|
"learning_rate": 3.997226958291992e-05,
|
||
|
|
"loss": 0.162,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19893905520439148,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 1076.0,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.816,
|
||
|
|
"grad_norm": 0.9410261251551351,
|
||
|
|
"learning_rate": 3.996791002035937e-05,
|
||
|
|
"loss": 0.2024,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21313059329986572,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 912.6,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.824,
|
||
|
|
"grad_norm": 0.8084364192923831,
|
||
|
|
"learning_rate": 3.996323259408055e-05,
|
||
|
|
"loss": 0.1981,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17545118927955627,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 806.0,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.832,
|
||
|
|
"grad_norm": 0.7177048085138111,
|
||
|
|
"learning_rate": 3.995823737854211e-05,
|
||
|
|
"loss": 0.1707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17910686135292053,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 897.8,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.84,
|
||
|
|
"grad_norm": 0.7475898854985661,
|
||
|
|
"learning_rate": 3.9952924453261534e-05,
|
||
|
|
"loss": 0.209,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17634199559688568,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 926.7,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.848,
|
||
|
|
"grad_norm": 0.7546628398117033,
|
||
|
|
"learning_rate": 3.994729390281384e-05,
|
||
|
|
"loss": 0.2101,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13323059678077698,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 780.4,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.856,
|
||
|
|
"grad_norm": 0.8075157474709176,
|
||
|
|
"learning_rate": 3.994134581683021e-05,
|
||
|
|
"loss": 0.2108,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36277538537979126,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 1380.0,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.864,
|
||
|
|
"grad_norm": 0.7426841895384861,
|
||
|
|
"learning_rate": 3.9935080289996626e-05,
|
||
|
|
"loss": 0.166,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1710471212863922,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 933.2,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.872,
|
||
|
|
"grad_norm": 0.7543785500593825,
|
||
|
|
"learning_rate": 3.992849742205228e-05,
|
||
|
|
"loss": 0.1949,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14548061788082123,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 746.6,
|
||
|
|
"valid_targets_min": 396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.88,
|
||
|
|
"grad_norm": 0.733905548864994,
|
||
|
|
"learning_rate": 3.9921597317788065e-05,
|
||
|
|
"loss": 0.2205,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15254877507686615,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 755.8,
|
||
|
|
"valid_targets_min": 485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.888,
|
||
|
|
"grad_norm": 0.5814823219075821,
|
||
|
|
"learning_rate": 3.991438008704486e-05,
|
||
|
|
"loss": 0.1882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2061724066734314,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 1354.9,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.896,
|
||
|
|
"grad_norm": 0.8896697436915326,
|
||
|
|
"learning_rate": 3.990684584471179e-05,
|
||
|
|
"loss": 0.2223,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28819096088409424,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 1018.8,
|
||
|
|
"valid_targets_min": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.904,
|
||
|
|
"grad_norm": 0.7027146233089345,
|
||
|
|
"learning_rate": 3.989899471072441e-05,
|
||
|
|
"loss": 0.2032,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22908379137516022,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 1084.9,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.912,
|
||
|
|
"grad_norm": 0.7297407172134633,
|
||
|
|
"learning_rate": 3.9890826810062784e-05,
|
||
|
|
"loss": 0.2577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.162847101688385,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 836.3,
|
||
|
|
"valid_targets_min": 449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.92,
|
||
|
|
"grad_norm": 0.7246071620305752,
|
||
|
|
"learning_rate": 3.988234227274949e-05,
|
||
|
|
"loss": 0.1578,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17553573846817017,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 854.9,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.928,
|
||
|
|
"grad_norm": 0.7329086051551755,
|
||
|
|
"learning_rate": 3.987354123384757e-05,
|
||
|
|
"loss": 0.1694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17792247235774994,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 827.2,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.936,
|
||
|
|
"grad_norm": 0.7630014106827517,
|
||
|
|
"learning_rate": 3.9864423833458364e-05,
|
||
|
|
"loss": 0.218,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18731489777565002,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 843.2,
|
||
|
|
"valid_targets_min": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.944,
|
||
|
|
"grad_norm": 0.7339005237798694,
|
||
|
|
"learning_rate": 3.9854990216719285e-05,
|
||
|
|
"loss": 0.1951,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19711682200431824,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 838.9,
|
||
|
|
"valid_targets_min": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.952,
|
||
|
|
"grad_norm": 0.7090853251170041,
|
||
|
|
"learning_rate": 3.98452405338015e-05,
|
||
|
|
"loss": 0.1735,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18280717730522156,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 835.9,
|
||
|
|
"valid_targets_min": 492
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.96,
|
||
|
|
"grad_norm": 0.6730331550707466,
|
||
|
|
"learning_rate": 3.983517493990756e-05,
|
||
|
|
"loss": 0.2228,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16648909449577332,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 987.8,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.968,
|
||
|
|
"grad_norm": 0.6671466020202659,
|
||
|
|
"learning_rate": 3.982479359526892e-05,
|
||
|
|
"loss": 0.2181,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12919144332408905,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 851.2,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.976,
|
||
|
|
"grad_norm": 0.7468131098588212,
|
||
|
|
"learning_rate": 3.981409666514336e-05,
|
||
|
|
"loss": 0.2178,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24386389553546906,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 1015.8,
|
||
|
|
"valid_targets_min": 466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.984,
|
||
|
|
"grad_norm": 0.7165572848472425,
|
||
|
|
"learning_rate": 3.98030843198124e-05,
|
||
|
|
"loss": 0.16,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17434345185756683,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 874.8,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.992,
|
||
|
|
"grad_norm": 0.7155565677019673,
|
||
|
|
"learning_rate": 3.979175673457858e-05,
|
||
|
|
"loss": 0.1677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16234704852104187,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 787.7,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 0.7163938049205832,
|
||
|
|
"learning_rate": 3.9780114089762616e-05,
|
||
|
|
"loss": 0.1584,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.156229630112648,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 925.1,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.008,
|
||
|
|
"grad_norm": 0.7163952448739522,
|
||
|
|
"learning_rate": 3.976815657070062e-05,
|
||
|
|
"loss": 0.2132,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13931968808174133,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 825.9,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.016,
|
||
|
|
"grad_norm": 0.8471080789416268,
|
||
|
|
"learning_rate": 3.975588436774107e-05,
|
||
|
|
"loss": 0.179,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14833077788352966,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 767.4,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.024,
|
||
|
|
"grad_norm": 0.8554813868151919,
|
||
|
|
"learning_rate": 3.9743297676241826e-05,
|
||
|
|
"loss": 0.188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21545778214931488,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 992.5,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.032,
|
||
|
|
"grad_norm": 0.8308020439403382,
|
||
|
|
"learning_rate": 3.9730396696566994e-05,
|
||
|
|
"loss": 0.1479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1487816572189331,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 791.4,
|
||
|
|
"valid_targets_min": 538
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.04,
|
||
|
|
"grad_norm": 0.7933045581970818,
|
||
|
|
"learning_rate": 3.971718163408375e-05,
|
||
|
|
"loss": 0.1591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16302749514579773,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 819.8,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.048,
|
||
|
|
"grad_norm": 0.7252431785653002,
|
||
|
|
"learning_rate": 3.9703652699159093e-05,
|
||
|
|
"loss": 0.2051,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13497385382652283,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 881.0,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.056,
|
||
|
|
"grad_norm": 0.7663840270716306,
|
||
|
|
"learning_rate": 3.9689810107156425e-05,
|
||
|
|
"loss": 0.1767,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27258244156837463,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 1186.0,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.064,
|
||
|
|
"grad_norm": 0.7650802553886993,
|
||
|
|
"learning_rate": 3.967565407843222e-05,
|
||
|
|
"loss": 0.2198,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1957072615623474,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 877.8,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.072,
|
||
|
|
"grad_norm": 0.7555776836490888,
|
||
|
|
"learning_rate": 3.966118483833242e-05,
|
||
|
|
"loss": 0.1928,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1382647156715393,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 735.2,
|
||
|
|
"valid_targets_min": 491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.08,
|
||
|
|
"grad_norm": 0.7202017893256426,
|
||
|
|
"learning_rate": 3.964640261718893e-05,
|
||
|
|
"loss": 0.16,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13862335681915283,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 868.3,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.088,
|
||
|
|
"grad_norm": 0.6865668130814834,
|
||
|
|
"learning_rate": 3.963130765031589e-05,
|
||
|
|
"loss": 0.2171,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1527758687734604,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 822.8,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.096,
|
||
|
|
"grad_norm": 0.7466684411513757,
|
||
|
|
"learning_rate": 3.961590017800598e-05,
|
||
|
|
"loss": 0.2372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13745608925819397,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 807.3,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.104,
|
||
|
|
"grad_norm": 0.730266233475475,
|
||
|
|
"learning_rate": 3.960018044552653e-05,
|
||
|
|
"loss": 0.1958,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15390340983867645,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 899.9,
|
||
|
|
"valid_targets_min": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.112,
|
||
|
|
"grad_norm": 0.7389053816779148,
|
||
|
|
"learning_rate": 3.9584148703115704e-05,
|
||
|
|
"loss": 0.1544,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14118877053260803,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 842.4,
|
||
|
|
"valid_targets_min": 441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.12,
|
||
|
|
"grad_norm": 0.7579424093822917,
|
||
|
|
"learning_rate": 3.956780520597842e-05,
|
||
|
|
"loss": 0.1707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16270402073860168,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 919.3,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1280000000000001,
|
||
|
|
"grad_norm": 0.7565287647746204,
|
||
|
|
"learning_rate": 3.955115021428236e-05,
|
||
|
|
"loss": 0.1604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18007123470306396,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 873.1,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1360000000000001,
|
||
|
|
"grad_norm": 0.7677756015390184,
|
||
|
|
"learning_rate": 3.95341839931538e-05,
|
||
|
|
"loss": 0.1956,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24875490367412567,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 1183.9,
|
||
|
|
"valid_targets_min": 468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.144,
|
||
|
|
"grad_norm": 0.7396285700137841,
|
||
|
|
"learning_rate": 3.95169068126734e-05,
|
||
|
|
"loss": 0.1873,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19427287578582764,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 889.6,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.152,
|
||
|
|
"grad_norm": 0.7465845123157784,
|
||
|
|
"learning_rate": 3.949931894787187e-05,
|
||
|
|
"loss": 0.2645,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18916454911231995,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 892.0,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.16,
|
||
|
|
"grad_norm": 0.8513718868816662,
|
||
|
|
"learning_rate": 3.948142067872565e-05,
|
||
|
|
"loss": 0.1771,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18964636325836182,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 874.4,
|
||
|
|
"valid_targets_min": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.168,
|
||
|
|
"grad_norm": 0.8462870831677214,
|
||
|
|
"learning_rate": 3.946321229015241e-05,
|
||
|
|
"loss": 0.1604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2392217218875885,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 990.5,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.176,
|
||
|
|
"grad_norm": 0.7205212704585172,
|
||
|
|
"learning_rate": 3.944469407200652e-05,
|
||
|
|
"loss": 0.1474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1376579999923706,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 763.5,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.184,
|
||
|
|
"grad_norm": 0.9037787051653652,
|
||
|
|
"learning_rate": 3.942586631907444e-05,
|
||
|
|
"loss": 0.1952,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2241126149892807,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 978.2,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.192,
|
||
|
|
"grad_norm": 0.7386674551344024,
|
||
|
|
"learning_rate": 3.9406729331070054e-05,
|
||
|
|
"loss": 0.1863,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19754649698734283,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 854.7,
|
||
|
|
"valid_targets_min": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2,
|
||
|
|
"grad_norm": 0.7729001315896438,
|
||
|
|
"learning_rate": 3.938728341262985e-05,
|
||
|
|
"loss": 0.1973,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2784135341644287,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 1191.6,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.208,
|
||
|
|
"grad_norm": 0.7838951426901545,
|
||
|
|
"learning_rate": 3.936752887330812e-05,
|
||
|
|
"loss": 0.1662,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21011705696582794,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 876.6,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.216,
|
||
|
|
"grad_norm": 0.6669012023304911,
|
||
|
|
"learning_rate": 3.9347466027571975e-05,
|
||
|
|
"loss": 0.1624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1563045084476471,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 1017.4,
|
||
|
|
"valid_targets_min": 615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.224,
|
||
|
|
"grad_norm": 0.6694182335646301,
|
||
|
|
"learning_rate": 3.932709519479639e-05,
|
||
|
|
"loss": 0.153,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.160549595952034,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 932.6,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.232,
|
||
|
|
"grad_norm": 0.7915346441577366,
|
||
|
|
"learning_rate": 3.930641669925911e-05,
|
||
|
|
"loss": 0.1804,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18584375083446503,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 1022.6,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.24,
|
||
|
|
"grad_norm": 0.7002389484036419,
|
||
|
|
"learning_rate": 3.928543087013546e-05,
|
||
|
|
"loss": 0.1638,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15864118933677673,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 941.6,
|
||
|
|
"valid_targets_min": 503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.248,
|
||
|
|
"grad_norm": 0.8419494208862676,
|
||
|
|
"learning_rate": 3.926413804149315e-05,
|
||
|
|
"loss": 0.17,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14137229323387146,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 681.3,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.256,
|
||
|
|
"grad_norm": 0.8364682773807548,
|
||
|
|
"learning_rate": 3.9242538552286894e-05,
|
||
|
|
"loss": 0.1559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17337577044963837,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 968.3,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.264,
|
||
|
|
"grad_norm": 0.6961189538122279,
|
||
|
|
"learning_rate": 3.9220632746353096e-05,
|
||
|
|
"loss": 0.1708,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13661912083625793,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 863.1,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.272,
|
||
|
|
"grad_norm": 0.715740620782587,
|
||
|
|
"learning_rate": 3.91984209724043e-05,
|
||
|
|
"loss": 0.1912,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14010174572467804,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 782.7,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.28,
|
||
|
|
"grad_norm": 0.7346689256256704,
|
||
|
|
"learning_rate": 3.917590358402369e-05,
|
||
|
|
"loss": 0.195,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28644707798957825,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 1305.2,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.288,
|
||
|
|
"grad_norm": 0.7385141102122526,
|
||
|
|
"learning_rate": 3.915308093965943e-05,
|
||
|
|
"loss": 0.1888,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14231915771961212,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 743.3,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.296,
|
||
|
|
"grad_norm": 0.7629780064003469,
|
||
|
|
"learning_rate": 3.9129953402618976e-05,
|
||
|
|
"loss": 0.1511,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18741099536418915,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 986.6,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.304,
|
||
|
|
"grad_norm": 0.7269492317726601,
|
||
|
|
"learning_rate": 3.91065213410633e-05,
|
||
|
|
"loss": 0.1635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25146985054016113,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 1202.8,
|
||
|
|
"valid_targets_min": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.312,
|
||
|
|
"grad_norm": 0.7343886255914776,
|
||
|
|
"learning_rate": 3.908278512800098e-05,
|
||
|
|
"loss": 0.2026,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14359501004219055,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 782.1,
|
||
|
|
"valid_targets_min": 466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.32,
|
||
|
|
"grad_norm": 0.6799273008618566,
|
||
|
|
"learning_rate": 3.905874514128235e-05,
|
||
|
|
"loss": 0.1859,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2700965702533722,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 1318.8,
|
||
|
|
"valid_targets_min": 566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.328,
|
||
|
|
"grad_norm": 0.6929688539379448,
|
||
|
|
"learning_rate": 3.903440176359338e-05,
|
||
|
|
"loss": 0.2076,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1730683296918869,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 943.1,
|
||
|
|
"valid_targets_min": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.336,
|
||
|
|
"grad_norm": 0.7245659858484952,
|
||
|
|
"learning_rate": 3.90097553824497e-05,
|
||
|
|
"loss": 0.1678,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17537246644496918,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 894.2,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3439999999999999,
|
||
|
|
"grad_norm": 0.7823444209829755,
|
||
|
|
"learning_rate": 3.8984806390190304e-05,
|
||
|
|
"loss": 0.1484,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12793993949890137,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 716.6,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3519999999999999,
|
||
|
|
"grad_norm": 0.7109404586284385,
|
||
|
|
"learning_rate": 3.895955518397141e-05,
|
||
|
|
"loss": 0.1502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15655386447906494,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 845.7,
|
||
|
|
"valid_targets_min": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3599999999999999,
|
||
|
|
"grad_norm": 0.6871711965474857,
|
||
|
|
"learning_rate": 3.893400216576011e-05,
|
||
|
|
"loss": 0.2019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15780571103096008,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 861.4,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3679999999999999,
|
||
|
|
"grad_norm": 0.5783865771624349,
|
||
|
|
"learning_rate": 3.89081477423279e-05,
|
||
|
|
"loss": 0.2152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1397193968296051,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 965.9,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.376,
|
||
|
|
"grad_norm": 0.7111643170486737,
|
||
|
|
"learning_rate": 3.888199232524434e-05,
|
||
|
|
"loss": 0.1536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1263478398323059,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 860.1,
|
||
|
|
"valid_targets_min": 556
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.384,
|
||
|
|
"grad_norm": 0.6880233126552526,
|
||
|
|
"learning_rate": 3.8855536330870354e-05,
|
||
|
|
"loss": 0.1449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12957754731178284,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 769.5,
|
||
|
|
"valid_targets_min": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.392,
|
||
|
|
"grad_norm": 0.7196773389821709,
|
||
|
|
"learning_rate": 3.882878018035173e-05,
|
||
|
|
"loss": 0.1792,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23339521884918213,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 1114.1,
|
||
|
|
"valid_targets_min": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4,
|
||
|
|
"grad_norm": 0.5175206065306407,
|
||
|
|
"learning_rate": 3.880172429961232e-05,
|
||
|
|
"loss": 0.1784,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19581802189350128,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 1740.6,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.408,
|
||
|
|
"grad_norm": 0.7027122734134265,
|
||
|
|
"learning_rate": 3.877436911934733e-05,
|
||
|
|
"loss": 0.1567,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21533304452896118,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 1119.6,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.416,
|
||
|
|
"grad_norm": 0.6879782852818223,
|
||
|
|
"learning_rate": 3.874671507501641e-05,
|
||
|
|
"loss": 0.1779,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2163260579109192,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 985.2,
|
||
|
|
"valid_targets_min": 562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.424,
|
||
|
|
"grad_norm": 0.6932470125732457,
|
||
|
|
"learning_rate": 3.871876260683677e-05,
|
||
|
|
"loss": 0.1687,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1334119737148285,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 764.4,
|
||
|
|
"valid_targets_min": 448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.432,
|
||
|
|
"grad_norm": 0.6148520291402507,
|
||
|
|
"learning_rate": 3.869051215977612e-05,
|
||
|
|
"loss": 0.1503,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16043812036514282,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 1053.5,
|
||
|
|
"valid_targets_min": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.44,
|
||
|
|
"grad_norm": 0.5982261878444286,
|
||
|
|
"learning_rate": 3.8661964183545634e-05,
|
||
|
|
"loss": 0.16,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15015794336795807,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 862.0,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.448,
|
||
|
|
"grad_norm": 0.713865426155975,
|
||
|
|
"learning_rate": 3.863311913259276e-05,
|
||
|
|
"loss": 0.1671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16693344712257385,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 970.3,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.456,
|
||
|
|
"grad_norm": 0.7241685968365615,
|
||
|
|
"learning_rate": 3.860397746609402e-05,
|
||
|
|
"loss": 0.1539,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13500142097473145,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 716.9,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.464,
|
||
|
|
"grad_norm": 0.7054645907135928,
|
||
|
|
"learning_rate": 3.857453964794764e-05,
|
||
|
|
"loss": 0.1913,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14917078614234924,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 762.1,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.472,
|
||
|
|
"grad_norm": 0.667178052044947,
|
||
|
|
"learning_rate": 3.854480614676624e-05,
|
||
|
|
"loss": 0.1551,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12918347120285034,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 716.3,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.48,
|
||
|
|
"grad_norm": 0.6908927945761476,
|
||
|
|
"learning_rate": 3.851477743586932e-05,
|
||
|
|
"loss": 0.1873,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22470583021640778,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 1062.8,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.488,
|
||
|
|
"grad_norm": 0.778072768317643,
|
||
|
|
"learning_rate": 3.8484453993275746e-05,
|
||
|
|
"loss": 0.1841,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19133061170578003,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 816.6,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.496,
|
||
|
|
"grad_norm": 0.6708195314357452,
|
||
|
|
"learning_rate": 3.8453836301696134e-05,
|
||
|
|
"loss": 0.1677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14336860179901123,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 908.8,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.504,
|
||
|
|
"grad_norm": 0.6726051866296197,
|
||
|
|
"learning_rate": 3.842292484852518e-05,
|
||
|
|
"loss": 0.1715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2406589686870575,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 1354.1,
|
||
|
|
"valid_targets_min": 539
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.512,
|
||
|
|
"grad_norm": 0.8150253436875796,
|
||
|
|
"learning_rate": 3.8391720125833875e-05,
|
||
|
|
"loss": 0.2038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2769927978515625,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 1018.1,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.52,
|
||
|
|
"grad_norm": 0.6891761844067167,
|
||
|
|
"learning_rate": 3.83602226303617e-05,
|
||
|
|
"loss": 0.1629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15847039222717285,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 838.3,
|
||
|
|
"valid_targets_min": 466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.528,
|
||
|
|
"grad_norm": 0.6608491869189769,
|
||
|
|
"learning_rate": 3.83284328635087e-05,
|
||
|
|
"loss": 0.1488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15309180319309235,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 971.8,
|
||
|
|
"valid_targets_min": 436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.536,
|
||
|
|
"grad_norm": 0.6554841706714233,
|
||
|
|
"learning_rate": 3.829635133132751e-05,
|
||
|
|
"loss": 0.1507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17172548174858093,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 1017.0,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.544,
|
||
|
|
"grad_norm": 0.6466612083633514,
|
||
|
|
"learning_rate": 3.8263978544515304e-05,
|
||
|
|
"loss": 0.2526,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16498011350631714,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 910.4,
|
||
|
|
"valid_targets_min": 566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.552,
|
||
|
|
"grad_norm": 0.631142435859698,
|
||
|
|
"learning_rate": 3.823131501840565e-05,
|
||
|
|
"loss": 0.1688,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10579565167427063,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 761.9,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.56,
|
||
|
|
"grad_norm": 0.6819743081635528,
|
||
|
|
"learning_rate": 3.819836127296032e-05,
|
||
|
|
"loss": 0.1907,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12957951426506042,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 872.1,
|
||
|
|
"valid_targets_min": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.568,
|
||
|
|
"grad_norm": 0.674719954457241,
|
||
|
|
"learning_rate": 3.8165117832761016e-05,
|
||
|
|
"loss": 0.179,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2050054520368576,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 971.7,
|
||
|
|
"valid_targets_min": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.576,
|
||
|
|
"grad_norm": 0.9896649808276848,
|
||
|
|
"learning_rate": 3.813158522700098e-05,
|
||
|
|
"loss": 0.1757,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18996283411979675,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 987.2,
|
||
|
|
"valid_targets_min": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.584,
|
||
|
|
"grad_norm": 0.661148717635557,
|
||
|
|
"learning_rate": 3.809776398947665e-05,
|
||
|
|
"loss": 0.1917,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11855830997228622,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 741.8,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.592,
|
||
|
|
"grad_norm": 0.7254948219099878,
|
||
|
|
"learning_rate": 3.806365465857908e-05,
|
||
|
|
"loss": 0.1584,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16585005819797516,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 789.2,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6,
|
||
|
|
"grad_norm": 0.7277818146668404,
|
||
|
|
"learning_rate": 3.802925777728541e-05,
|
||
|
|
"loss": 0.1855,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2311524599790573,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 1143.7,
|
||
|
|
"valid_targets_min": 531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.608,
|
||
|
|
"grad_norm": 0.7923440526648224,
|
||
|
|
"learning_rate": 3.799457389315023e-05,
|
||
|
|
"loss": 0.1982,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33600425720214844,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 1205.4,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.616,
|
||
|
|
"grad_norm": 0.657240434703054,
|
||
|
|
"learning_rate": 3.795960355829683e-05,
|
||
|
|
"loss": 0.1901,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15497495234012604,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 873.0,
|
||
|
|
"valid_targets_min": 429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.624,
|
||
|
|
"grad_norm": 0.7919659404982559,
|
||
|
|
"learning_rate": 3.7924347329408444e-05,
|
||
|
|
"loss": 0.1779,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13609406352043152,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 794.4,
|
||
|
|
"valid_targets_min": 442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6320000000000001,
|
||
|
|
"grad_norm": 0.7020518413812868,
|
||
|
|
"learning_rate": 3.788880576771937e-05,
|
||
|
|
"loss": 0.1663,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23722708225250244,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 1074.6,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6400000000000001,
|
||
|
|
"grad_norm": 0.7170819930313959,
|
||
|
|
"learning_rate": 3.785297943900605e-05,
|
||
|
|
"loss": 0.1814,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20049814879894257,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 979.7,
|
||
|
|
"valid_targets_min": 555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6480000000000001,
|
||
|
|
"grad_norm": 0.5529666573219981,
|
||
|
|
"learning_rate": 3.7816868913578044e-05,
|
||
|
|
"loss": 0.1388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12410497665405273,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 946.4,
|
||
|
|
"valid_targets_min": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6560000000000001,
|
||
|
|
"grad_norm": 0.7104488195204124,
|
||
|
|
"learning_rate": 3.778047476626897e-05,
|
||
|
|
"loss": 0.1799,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13769274950027466,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 825.1,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6640000000000001,
|
||
|
|
"grad_norm": 0.7224254406062387,
|
||
|
|
"learning_rate": 3.7743797576427335e-05,
|
||
|
|
"loss": 0.1809,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1998235285282135,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 992.2,
|
||
|
|
"valid_targets_min": 541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6720000000000002,
|
||
|
|
"grad_norm": 0.6372839611313066,
|
||
|
|
"learning_rate": 3.770683792790733e-05,
|
||
|
|
"loss": 0.1496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1310766339302063,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 862.4,
|
||
|
|
"valid_targets_min": 463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6800000000000002,
|
||
|
|
"grad_norm": 0.7426837827375632,
|
||
|
|
"learning_rate": 3.766959640905954e-05,
|
||
|
|
"loss": 0.1647,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13092140853405,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 804.3,
|
||
|
|
"valid_targets_min": 441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.688,
|
||
|
|
"grad_norm": 0.700407597454358,
|
||
|
|
"learning_rate": 3.763207361272153e-05,
|
||
|
|
"loss": 0.1827,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16554605960845947,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 919.0,
|
||
|
|
"valid_targets_min": 604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.696,
|
||
|
|
"grad_norm": 0.701310339235854,
|
||
|
|
"learning_rate": 3.759427013620849e-05,
|
||
|
|
"loss": 0.1768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14189761877059937,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 728.5,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.704,
|
||
|
|
"grad_norm": 0.7706015818198613,
|
||
|
|
"learning_rate": 3.755618658130366e-05,
|
||
|
|
"loss": 0.1961,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2144859880208969,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 979.9,
|
||
|
|
"valid_targets_min": 541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.712,
|
||
|
|
"grad_norm": 0.7389928113492108,
|
||
|
|
"learning_rate": 3.751782355424877e-05,
|
||
|
|
"loss": 0.1719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1575872302055359,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 858.1,
|
||
|
|
"valid_targets_min": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.72,
|
||
|
|
"grad_norm": 0.7535959567378652,
|
||
|
|
"learning_rate": 3.7479181665734395e-05,
|
||
|
|
"loss": 0.1748,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2564152777194977,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 1000.1,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.728,
|
||
|
|
"grad_norm": 0.6333921431272092,
|
||
|
|
"learning_rate": 3.7440261530890213e-05,
|
||
|
|
"loss": 0.1629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12830719351768494,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 871.8,
|
||
|
|
"valid_targets_min": 438
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.736,
|
||
|
|
"grad_norm": 0.9380322383200947,
|
||
|
|
"learning_rate": 3.740106376927527e-05,
|
||
|
|
"loss": 0.2729,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.37827736139297485,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 1459.2,
|
||
|
|
"valid_targets_min": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.744,
|
||
|
|
"grad_norm": 0.7438467929313883,
|
||
|
|
"learning_rate": 3.7361589004868035e-05,
|
||
|
|
"loss": 0.1951,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26724135875701904,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 1121.4,
|
||
|
|
"valid_targets_min": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.752,
|
||
|
|
"grad_norm": 0.6105440147560874,
|
||
|
|
"learning_rate": 3.7321837866056535e-05,
|
||
|
|
"loss": 0.1603,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14079777896404266,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 904.3,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.76,
|
||
|
|
"grad_norm": 0.5593597328714256,
|
||
|
|
"learning_rate": 3.728181098562831e-05,
|
||
|
|
"loss": 0.1658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1442725956439972,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 900.8,
|
||
|
|
"valid_targets_min": 566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.768,
|
||
|
|
"grad_norm": 0.64740837468299,
|
||
|
|
"learning_rate": 3.7241509000760355e-05,
|
||
|
|
"loss": 0.1876,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14557820558547974,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 851.6,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.776,
|
||
|
|
"grad_norm": 0.6640831943887205,
|
||
|
|
"learning_rate": 3.720093255300899e-05,
|
||
|
|
"loss": 0.1871,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1812448352575302,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 1060.0,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.784,
|
||
|
|
"grad_norm": 0.5691045293886683,
|
||
|
|
"learning_rate": 3.7160082288299645e-05,
|
||
|
|
"loss": 0.1827,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13262201845645905,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 823.4,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.792,
|
||
|
|
"grad_norm": 0.6836751894395403,
|
||
|
|
"learning_rate": 3.7118958856916534e-05,
|
||
|
|
"loss": 0.1915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1724395751953125,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 890.5,
|
||
|
|
"valid_targets_min": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8,
|
||
|
|
"grad_norm": 0.7712927602843659,
|
||
|
|
"learning_rate": 3.707756291349237e-05,
|
||
|
|
"loss": 0.1824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2417829930782318,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 1004.9,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.808,
|
||
|
|
"grad_norm": 0.6092761609855996,
|
||
|
|
"learning_rate": 3.703589511699787e-05,
|
||
|
|
"loss": 0.1424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11884502321481705,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 842.1,
|
||
|
|
"valid_targets_min": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8159999999999998,
|
||
|
|
"grad_norm": 0.6943283284639649,
|
||
|
|
"learning_rate": 3.6993956130731355e-05,
|
||
|
|
"loss": 0.1677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1568162441253662,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 893.6,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8239999999999998,
|
||
|
|
"grad_norm": 0.5735978116252018,
|
||
|
|
"learning_rate": 3.6951746622308106e-05,
|
||
|
|
"loss": 0.2083,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23380722105503082,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 1723.0,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8319999999999999,
|
||
|
|
"grad_norm": 0.7198338854179046,
|
||
|
|
"learning_rate": 3.69092672636498e-05,
|
||
|
|
"loss": 0.1459,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19796811044216156,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 849.2,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8399999999999999,
|
||
|
|
"grad_norm": 0.7408908195363425,
|
||
|
|
"learning_rate": 3.686651873097375e-05,
|
||
|
|
"loss": 0.1735,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19666478037834167,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 850.2,
|
||
|
|
"valid_targets_min": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8479999999999999,
|
||
|
|
"grad_norm": 0.6762739154058124,
|
||
|
|
"learning_rate": 3.682350170478223e-05,
|
||
|
|
"loss": 0.1381,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15073895454406738,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 817.7,
|
||
|
|
"valid_targets_min": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8559999999999999,
|
||
|
|
"grad_norm": 0.6793788519811231,
|
||
|
|
"learning_rate": 3.678021686985153e-05,
|
||
|
|
"loss": 0.1448,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16136187314987183,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 838.0,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8639999999999999,
|
||
|
|
"grad_norm": 0.6810417595416728,
|
||
|
|
"learning_rate": 3.6736664915221144e-05,
|
||
|
|
"loss": 0.1848,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18239787220954895,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 936.2,
|
||
|
|
"valid_targets_min": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8719999999999999,
|
||
|
|
"grad_norm": 0.6866199170569222,
|
||
|
|
"learning_rate": 3.669284653418278e-05,
|
||
|
|
"loss": 0.1585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18783831596374512,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 939.8,
|
||
|
|
"valid_targets_min": 405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.88,
|
||
|
|
"grad_norm": 0.6419374253103914,
|
||
|
|
"learning_rate": 3.6648762424269306e-05,
|
||
|
|
"loss": 0.2158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20050100982189178,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 994.1,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.888,
|
||
|
|
"grad_norm": 0.6102929503441015,
|
||
|
|
"learning_rate": 3.660441328724365e-05,
|
||
|
|
"loss": 0.1407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1384953111410141,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 914.1,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.896,
|
||
|
|
"grad_norm": 0.9113657887405613,
|
||
|
|
"learning_rate": 3.655979982908764e-05,
|
||
|
|
"loss": 0.1608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19064557552337646,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 976.2,
|
||
|
|
"valid_targets_min": 507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.904,
|
||
|
|
"grad_norm": 0.6209355511318521,
|
||
|
|
"learning_rate": 3.6514922759990756e-05,
|
||
|
|
"loss": 0.1455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12727031111717224,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 743.3,
|
||
|
|
"valid_targets_min": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.912,
|
||
|
|
"grad_norm": 0.7706200982444233,
|
||
|
|
"learning_rate": 3.646978279433883e-05,
|
||
|
|
"loss": 0.221,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18883904814720154,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 913.3,
|
||
|
|
"valid_targets_min": 461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.92,
|
||
|
|
"grad_norm": 0.6451225901538964,
|
||
|
|
"learning_rate": 3.6424380650702685e-05,
|
||
|
|
"loss": 0.1689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12537574768066406,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 807.4,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.928,
|
||
|
|
"grad_norm": 0.7540037123180529,
|
||
|
|
"learning_rate": 3.637871705182667e-05,
|
||
|
|
"loss": 0.1824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19827613234519958,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 1028.9,
|
||
|
|
"valid_targets_min": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.936,
|
||
|
|
"grad_norm": 0.6307659588381288,
|
||
|
|
"learning_rate": 3.633279272461717e-05,
|
||
|
|
"loss": 0.159,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14449161291122437,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 832.3,
|
||
|
|
"valid_targets_min": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.944,
|
||
|
|
"grad_norm": 0.5870419604308534,
|
||
|
|
"learning_rate": 3.628660840013102e-05,
|
||
|
|
"loss": 0.148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12478692829608917,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 798.2,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.952,
|
||
|
|
"grad_norm": 0.682646779731463,
|
||
|
|
"learning_rate": 3.624016481356392e-05,
|
||
|
|
"loss": 0.2556,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2125765085220337,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 1070.2,
|
||
|
|
"valid_targets_min": 621
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.96,
|
||
|
|
"grad_norm": 0.6057329961642336,
|
||
|
|
"learning_rate": 3.619346270423866e-05,
|
||
|
|
"loss": 0.1598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13245423138141632,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 852.5,
|
||
|
|
"valid_targets_min": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.968,
|
||
|
|
"grad_norm": 0.6869070863200438,
|
||
|
|
"learning_rate": 3.6146502815593384e-05,
|
||
|
|
"loss": 0.1348,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15041041374206543,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 780.8,
|
||
|
|
"valid_targets_min": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.976,
|
||
|
|
"grad_norm": 0.6438842436430051,
|
||
|
|
"learning_rate": 3.609928589516977e-05,
|
||
|
|
"loss": 0.1615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17005938291549683,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 904.0,
|
||
|
|
"valid_targets_min": 488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.984,
|
||
|
|
"grad_norm": 0.8188644878480772,
|
||
|
|
"learning_rate": 3.6051812694601114e-05,
|
||
|
|
"loss": 0.1386,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11581555008888245,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 744.3,
|
||
|
|
"valid_targets_min": 503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.992,
|
||
|
|
"grad_norm": 0.6677337021986096,
|
||
|
|
"learning_rate": 3.6004083969600346e-05,
|
||
|
|
"loss": 0.2029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14588750898838043,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 892.2,
|
||
|
|
"valid_targets_min": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 0.675639545335633,
|
||
|
|
"learning_rate": 3.595610047994804e-05,
|
||
|
|
"loss": 0.1651,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14435610175132751,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 788.2,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.008,
|
||
|
|
"grad_norm": 0.6671909434793415,
|
||
|
|
"learning_rate": 3.5907862989480285e-05,
|
||
|
|
"loss": 0.1488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13127434253692627,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 808.4,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.016,
|
||
|
|
"grad_norm": 0.6695140708226522,
|
||
|
|
"learning_rate": 3.585937226607656e-05,
|
||
|
|
"loss": 0.1381,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1698671281337738,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 1084.6,
|
||
|
|
"valid_targets_min": 611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.024,
|
||
|
|
"grad_norm": 0.812996750262902,
|
||
|
|
"learning_rate": 3.5810629081647476e-05,
|
||
|
|
"loss": 0.1701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20692437887191772,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 963.8,
|
||
|
|
"valid_targets_min": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.032,
|
||
|
|
"grad_norm": 0.6142695293984198,
|
||
|
|
"learning_rate": 3.576163421212249e-05,
|
||
|
|
"loss": 0.1688,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.109217070043087,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 769.1,
|
||
|
|
"valid_targets_min": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.04,
|
||
|
|
"grad_norm": 0.704398806152235,
|
||
|
|
"learning_rate": 3.5712388437437576e-05,
|
||
|
|
"loss": 0.1204,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12265762686729431,
|
||
|
|
"step": 1275,
|
||
|
|
"valid_targets_mean": 816.1,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.048,
|
||
|
|
"grad_norm": 0.777803434971521,
|
||
|
|
"learning_rate": 3.566289254152283e-05,
|
||
|
|
"loss": 0.1641,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15761694312095642,
|
||
|
|
"step": 1280,
|
||
|
|
"valid_targets_mean": 829.4,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.056,
|
||
|
|
"grad_norm": 0.6217244928443194,
|
||
|
|
"learning_rate": 3.56131473122899e-05,
|
||
|
|
"loss": 0.1208,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11285565793514252,
|
||
|
|
"step": 1285,
|
||
|
|
"valid_targets_mean": 900.2,
|
||
|
|
"valid_targets_min": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.064,
|
||
|
|
"grad_norm": 0.6970991090986864,
|
||
|
|
"learning_rate": 3.556315354161955e-05,
|
||
|
|
"loss": 0.146,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1326259821653366,
|
||
|
|
"step": 1290,
|
||
|
|
"valid_targets_mean": 820.5,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.072,
|
||
|
|
"grad_norm": 0.6227996898139345,
|
||
|
|
"learning_rate": 3.551291202534899e-05,
|
||
|
|
"loss": 0.1331,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12502652406692505,
|
||
|
|
"step": 1295,
|
||
|
|
"valid_targets_mean": 941.1,
|
||
|
|
"valid_targets_min": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.08,
|
||
|
|
"grad_norm": 0.6163809129412077,
|
||
|
|
"learning_rate": 3.546242356325922e-05,
|
||
|
|
"loss": 0.1601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11981011182069778,
|
||
|
|
"step": 1300,
|
||
|
|
"valid_targets_mean": 909.6,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.088,
|
||
|
|
"grad_norm": 0.7139872878534804,
|
||
|
|
"learning_rate": 3.5411688959062323e-05,
|
||
|
|
"loss": 0.1211,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13094274699687958,
|
||
|
|
"step": 1305,
|
||
|
|
"valid_targets_mean": 752.4,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.096,
|
||
|
|
"grad_norm": 0.701035601155758,
|
||
|
|
"learning_rate": 3.5360709020388625e-05,
|
||
|
|
"loss": 0.1514,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12007572501897812,
|
||
|
|
"step": 1310,
|
||
|
|
"valid_targets_mean": 811.1,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.104,
|
||
|
|
"grad_norm": 0.6739881647901436,
|
||
|
|
"learning_rate": 3.530948455877388e-05,
|
||
|
|
"loss": 0.1313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12018238753080368,
|
||
|
|
"step": 1315,
|
||
|
|
"valid_targets_mean": 749.4,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.112,
|
||
|
|
"grad_norm": 0.6968055383560433,
|
||
|
|
"learning_rate": 3.525801638964634e-05,
|
||
|
|
"loss": 0.1377,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1314522922039032,
|
||
|
|
"step": 1320,
|
||
|
|
"valid_targets_mean": 842.8,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.12,
|
||
|
|
"grad_norm": 0.6624716484945574,
|
||
|
|
"learning_rate": 3.520630533231376e-05,
|
||
|
|
"loss": 0.1439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13978487253189087,
|
||
|
|
"step": 1325,
|
||
|
|
"valid_targets_mean": 907.9,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.128,
|
||
|
|
"grad_norm": 0.6585719327478531,
|
||
|
|
"learning_rate": 3.5154352209950376e-05,
|
||
|
|
"loss": 0.1217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11899503320455551,
|
||
|
|
"step": 1330,
|
||
|
|
"valid_targets_mean": 748.1,
|
||
|
|
"valid_targets_min": 419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.136,
|
||
|
|
"grad_norm": 0.7568797476611201,
|
||
|
|
"learning_rate": 3.510215784958376e-05,
|
||
|
|
"loss": 0.1449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1821085512638092,
|
||
|
|
"step": 1335,
|
||
|
|
"valid_targets_mean": 1131.6,
|
||
|
|
"valid_targets_min": 629
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.144,
|
||
|
|
"grad_norm": 0.6989537757915661,
|
||
|
|
"learning_rate": 3.5049723082081755e-05,
|
||
|
|
"loss": 0.1648,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11788707226514816,
|
||
|
|
"step": 1340,
|
||
|
|
"valid_targets_mean": 832.4,
|
||
|
|
"valid_targets_min": 544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.152,
|
||
|
|
"grad_norm": 0.6341811561632761,
|
||
|
|
"learning_rate": 3.49970487421391e-05,
|
||
|
|
"loss": 0.1367,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19004957377910614,
|
||
|
|
"step": 1345,
|
||
|
|
"valid_targets_mean": 1656.5,
|
||
|
|
"valid_targets_min": 468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.16,
|
||
|
|
"grad_norm": 0.6634817410417032,
|
||
|
|
"learning_rate": 3.494413566826427e-05,
|
||
|
|
"loss": 0.1594,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14211371541023254,
|
||
|
|
"step": 1350,
|
||
|
|
"valid_targets_mean": 1018.9,
|
||
|
|
"valid_targets_min": 612
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.168,
|
||
|
|
"grad_norm": 0.6959937182985245,
|
||
|
|
"learning_rate": 3.489098470276608e-05,
|
||
|
|
"loss": 0.1342,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12617823481559753,
|
||
|
|
"step": 1355,
|
||
|
|
"valid_targets_mean": 830.5,
|
||
|
|
"valid_targets_min": 473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.176,
|
||
|
|
"grad_norm": 0.6581171678360322,
|
||
|
|
"learning_rate": 3.483759669174024e-05,
|
||
|
|
"loss": 0.1462,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11195407807826996,
|
||
|
|
"step": 1360,
|
||
|
|
"valid_targets_mean": 854.4,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.184,
|
||
|
|
"grad_norm": 0.8403619558072984,
|
||
|
|
"learning_rate": 3.478397248505598e-05,
|
||
|
|
"loss": 0.1919,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2636897563934326,
|
||
|
|
"step": 1365,
|
||
|
|
"valid_targets_mean": 1145.6,
|
||
|
|
"valid_targets_min": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.192,
|
||
|
|
"grad_norm": 0.6544919553846851,
|
||
|
|
"learning_rate": 3.473011293634241e-05,
|
||
|
|
"loss": 0.1444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16438347101211548,
|
||
|
|
"step": 1370,
|
||
|
|
"valid_targets_mean": 1205.9,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2,
|
||
|
|
"grad_norm": 0.715642039819752,
|
||
|
|
"learning_rate": 3.467601890297502e-05,
|
||
|
|
"loss": 0.1883,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12462947517633438,
|
||
|
|
"step": 1375,
|
||
|
|
"valid_targets_mean": 835.9,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.208,
|
||
|
|
"grad_norm": 0.5887672811038482,
|
||
|
|
"learning_rate": 3.4621691246061976e-05,
|
||
|
|
"loss": 0.1289,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09311959147453308,
|
||
|
|
"step": 1380,
|
||
|
|
"valid_targets_mean": 925.4,
|
||
|
|
"valid_targets_min": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.216,
|
||
|
|
"grad_norm": 0.722356368405542,
|
||
|
|
"learning_rate": 3.456713083043046e-05,
|
||
|
|
"loss": 0.1398,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12428110092878342,
|
||
|
|
"step": 1385,
|
||
|
|
"valid_targets_mean": 813.9,
|
||
|
|
"valid_targets_min": 556
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.224,
|
||
|
|
"grad_norm": 0.6761216165323534,
|
||
|
|
"learning_rate": 3.451233852461285e-05,
|
||
|
|
"loss": 0.1336,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12943808734416962,
|
||
|
|
"step": 1390,
|
||
|
|
"valid_targets_mean": 883.8,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.232,
|
||
|
|
"grad_norm": 0.6108070076628657,
|
||
|
|
"learning_rate": 3.4457315200832935e-05,
|
||
|
|
"loss": 0.1182,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10476785898208618,
|
||
|
|
"step": 1395,
|
||
|
|
"valid_targets_mean": 756.9,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.24,
|
||
|
|
"grad_norm": 0.7036553476885127,
|
||
|
|
"learning_rate": 3.440206173499201e-05,
|
||
|
|
"loss": 0.1575,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15050138533115387,
|
||
|
|
"step": 1400,
|
||
|
|
"valid_targets_mean": 900.4,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.248,
|
||
|
|
"grad_norm": 0.7032647871620187,
|
||
|
|
"learning_rate": 3.4346579006654945e-05,
|
||
|
|
"loss": 0.1304,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1485375314950943,
|
||
|
|
"step": 1405,
|
||
|
|
"valid_targets_mean": 894.5,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2560000000000002,
|
||
|
|
"grad_norm": 0.7955675404987136,
|
||
|
|
"learning_rate": 3.4290867899036166e-05,
|
||
|
|
"loss": 0.2338,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31520774960517883,
|
||
|
|
"step": 1410,
|
||
|
|
"valid_targets_mean": 1497.8,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2640000000000002,
|
||
|
|
"grad_norm": 0.805423853291883,
|
||
|
|
"learning_rate": 3.4234929298985614e-05,
|
||
|
|
"loss": 0.1588,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2818862795829773,
|
||
|
|
"step": 1415,
|
||
|
|
"valid_targets_mean": 1244.6,
|
||
|
|
"valid_targets_min": 466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2720000000000002,
|
||
|
|
"grad_norm": 0.7128978513934985,
|
||
|
|
"learning_rate": 3.417876409697463e-05,
|
||
|
|
"loss": 0.1383,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18899387121200562,
|
||
|
|
"step": 1420,
|
||
|
|
"valid_targets_mean": 1032.2,
|
||
|
|
"valid_targets_min": 507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2800000000000002,
|
||
|
|
"grad_norm": 0.6379976195130551,
|
||
|
|
"learning_rate": 3.412237318708175e-05,
|
||
|
|
"loss": 0.1311,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11107778549194336,
|
||
|
|
"step": 1425,
|
||
|
|
"valid_targets_mean": 791.8,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.288,
|
||
|
|
"grad_norm": 0.6338543183979924,
|
||
|
|
"learning_rate": 3.4065757466978504e-05,
|
||
|
|
"loss": 0.1812,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1256306767463684,
|
||
|
|
"step": 1430,
|
||
|
|
"valid_targets_mean": 817.4,
|
||
|
|
"valid_targets_min": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.296,
|
||
|
|
"grad_norm": 0.702151402068189,
|
||
|
|
"learning_rate": 3.400891783791511e-05,
|
||
|
|
"loss": 0.146,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11320549249649048,
|
||
|
|
"step": 1435,
|
||
|
|
"valid_targets_mean": 712.9,
|
||
|
|
"valid_targets_min": 435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.304,
|
||
|
|
"grad_norm": 0.6217875950926312,
|
||
|
|
"learning_rate": 3.395185520470614e-05,
|
||
|
|
"loss": 0.1639,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11201746016740799,
|
||
|
|
"step": 1440,
|
||
|
|
"valid_targets_mean": 803.2,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.312,
|
||
|
|
"grad_norm": 0.706308661700129,
|
||
|
|
"learning_rate": 3.38945704757161e-05,
|
||
|
|
"loss": 0.1624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13947395980358124,
|
||
|
|
"step": 1445,
|
||
|
|
"valid_targets_mean": 937.0,
|
||
|
|
"valid_targets_min": 417
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.32,
|
||
|
|
"grad_norm": 0.6919298828515554,
|
||
|
|
"learning_rate": 3.383706456284498e-05,
|
||
|
|
"loss": 0.143,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1306343674659729,
|
||
|
|
"step": 1450,
|
||
|
|
"valid_targets_mean": 843.5,
|
||
|
|
"valid_targets_min": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.328,
|
||
|
|
"grad_norm": 0.6440894176240278,
|
||
|
|
"learning_rate": 3.377933838151374e-05,
|
||
|
|
"loss": 0.124,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11203815042972565,
|
||
|
|
"step": 1455,
|
||
|
|
"valid_targets_mean": 847.4,
|
||
|
|
"valid_targets_min": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.336,
|
||
|
|
"grad_norm": 0.6348699227329601,
|
||
|
|
"learning_rate": 3.3721392850649714e-05,
|
||
|
|
"loss": 0.1758,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1289135068655014,
|
||
|
|
"step": 1460,
|
||
|
|
"valid_targets_mean": 991.3,
|
||
|
|
"valid_targets_min": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.344,
|
||
|
|
"grad_norm": 0.7253148487683199,
|
||
|
|
"learning_rate": 3.3663228892672034e-05,
|
||
|
|
"loss": 0.1489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12253601104021072,
|
||
|
|
"step": 1465,
|
||
|
|
"valid_targets_mean": 866.7,
|
||
|
|
"valid_targets_min": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.352,
|
||
|
|
"grad_norm": 0.6968645665141834,
|
||
|
|
"learning_rate": 3.36048474334769e-05,
|
||
|
|
"loss": 0.1543,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1570267379283905,
|
||
|
|
"step": 1470,
|
||
|
|
"valid_targets_mean": 999.6,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.36,
|
||
|
|
"grad_norm": 0.7237159999675643,
|
||
|
|
"learning_rate": 3.3546249402422834e-05,
|
||
|
|
"loss": 0.1562,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1736963391304016,
|
||
|
|
"step": 1475,
|
||
|
|
"valid_targets_mean": 937.2,
|
||
|
|
"valid_targets_min": 531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.368,
|
||
|
|
"grad_norm": 0.5043207572468628,
|
||
|
|
"learning_rate": 3.3487435732315944e-05,
|
||
|
|
"loss": 0.1653,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16285204887390137,
|
||
|
|
"step": 1480,
|
||
|
|
"valid_targets_mean": 2031.6,
|
||
|
|
"valid_targets_min": 447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.376,
|
||
|
|
"grad_norm": 0.7677480497490647,
|
||
|
|
"learning_rate": 3.342840735939501e-05,
|
||
|
|
"loss": 0.226,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.306096613407135,
|
||
|
|
"step": 1485,
|
||
|
|
"valid_targets_mean": 1291.9,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.384,
|
||
|
|
"grad_norm": 0.6822127897762736,
|
||
|
|
"learning_rate": 3.33691652233166e-05,
|
||
|
|
"loss": 0.1422,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13742084801197052,
|
||
|
|
"step": 1490,
|
||
|
|
"valid_targets_mean": 876.8,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.392,
|
||
|
|
"grad_norm": 0.6428337691702128,
|
||
|
|
"learning_rate": 3.330971026714016e-05,
|
||
|
|
"loss": 0.146,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11387015879154205,
|
||
|
|
"step": 1495,
|
||
|
|
"valid_targets_mean": 922.8,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4,
|
||
|
|
"grad_norm": 0.7714905951446593,
|
||
|
|
"learning_rate": 3.325004343731292e-05,
|
||
|
|
"loss": 0.1405,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15051744878292084,
|
||
|
|
"step": 1500,
|
||
|
|
"valid_targets_mean": 778.1,
|
||
|
|
"valid_targets_min": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.408,
|
||
|
|
"grad_norm": 0.6537243906589456,
|
||
|
|
"learning_rate": 3.3190165683654885e-05,
|
||
|
|
"loss": 0.1362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12786319851875305,
|
||
|
|
"step": 1505,
|
||
|
|
"valid_targets_mean": 924.9,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.416,
|
||
|
|
"grad_norm": 0.7181334150669801,
|
||
|
|
"learning_rate": 3.31300779593437e-05,
|
||
|
|
"loss": 0.1587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1835220456123352,
|
||
|
|
"step": 1510,
|
||
|
|
"valid_targets_mean": 1048.0,
|
||
|
|
"valid_targets_min": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.424,
|
||
|
|
"grad_norm": 0.7200024616684744,
|
||
|
|
"learning_rate": 3.306978122089948e-05,
|
||
|
|
"loss": 0.1273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1197834461927414,
|
||
|
|
"step": 1515,
|
||
|
|
"valid_targets_mean": 701.9,
|
||
|
|
"valid_targets_min": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.432,
|
||
|
|
"grad_norm": 0.76193388728556,
|
||
|
|
"learning_rate": 3.300927642816957e-05,
|
||
|
|
"loss": 0.1289,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1792105734348297,
|
||
|
|
"step": 1520,
|
||
|
|
"valid_targets_mean": 1012.2,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.44,
|
||
|
|
"grad_norm": 0.6802775591904766,
|
||
|
|
"learning_rate": 3.294856454431328e-05,
|
||
|
|
"loss": 0.1377,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17906546592712402,
|
||
|
|
"step": 1525,
|
||
|
|
"valid_targets_mean": 1050.2,
|
||
|
|
"valid_targets_min": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.448,
|
||
|
|
"grad_norm": 1.127644705809151,
|
||
|
|
"learning_rate": 3.288764653578653e-05,
|
||
|
|
"loss": 0.1336,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.098813995718956,
|
||
|
|
"step": 1530,
|
||
|
|
"valid_targets_mean": 773.1,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.456,
|
||
|
|
"grad_norm": 0.6447834377487103,
|
||
|
|
"learning_rate": 3.2826523372326516e-05,
|
||
|
|
"loss": 0.1288,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1183435320854187,
|
||
|
|
"step": 1535,
|
||
|
|
"valid_targets_mean": 878.8,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.464,
|
||
|
|
"grad_norm": 0.7130623789996078,
|
||
|
|
"learning_rate": 3.276519602693621e-05,
|
||
|
|
"loss": 0.1456,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1491391956806183,
|
||
|
|
"step": 1540,
|
||
|
|
"valid_targets_mean": 800.8,
|
||
|
|
"valid_targets_min": 547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.472,
|
||
|
|
"grad_norm": 0.620842798883401,
|
||
|
|
"learning_rate": 3.270366547586892e-05,
|
||
|
|
"loss": 0.1398,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1251269280910492,
|
||
|
|
"step": 1545,
|
||
|
|
"valid_targets_mean": 798.4,
|
||
|
|
"valid_targets_min": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.48,
|
||
|
|
"grad_norm": 0.6742982523505523,
|
||
|
|
"learning_rate": 3.2641932698612715e-05,
|
||
|
|
"loss": 0.1554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11616584658622742,
|
||
|
|
"step": 1550,
|
||
|
|
"valid_targets_mean": 786.2,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.488,
|
||
|
|
"grad_norm": 0.7167899223398502,
|
||
|
|
"learning_rate": 3.2579998677874855e-05,
|
||
|
|
"loss": 0.1798,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17860479652881622,
|
||
|
|
"step": 1555,
|
||
|
|
"valid_targets_mean": 1121.8,
|
||
|
|
"valid_targets_min": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.496,
|
||
|
|
"grad_norm": 0.7001792061426741,
|
||
|
|
"learning_rate": 3.251786439956614e-05,
|
||
|
|
"loss": 0.1352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1385573148727417,
|
||
|
|
"step": 1560,
|
||
|
|
"valid_targets_mean": 875.4,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.504,
|
||
|
|
"grad_norm": 0.6570116776180441,
|
||
|
|
"learning_rate": 3.2455530852785206e-05,
|
||
|
|
"loss": 0.1457,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1328207403421402,
|
||
|
|
"step": 1565,
|
||
|
|
"valid_targets_mean": 897.6,
|
||
|
|
"valid_targets_min": 428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.512,
|
||
|
|
"grad_norm": 0.611927697245731,
|
||
|
|
"learning_rate": 3.239299902980281e-05,
|
||
|
|
"loss": 0.1409,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11006229370832443,
|
||
|
|
"step": 1570,
|
||
|
|
"valid_targets_mean": 858.8,
|
||
|
|
"valid_targets_min": 594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.52,
|
||
|
|
"grad_norm": 0.6484285085659105,
|
||
|
|
"learning_rate": 3.2330269926046e-05,
|
||
|
|
"loss": 0.1542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19695384800434113,
|
||
|
|
"step": 1575,
|
||
|
|
"valid_targets_mean": 1437.2,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.528,
|
||
|
|
"grad_norm": 0.5964550781287618,
|
||
|
|
"learning_rate": 3.2267344540082284e-05,
|
||
|
|
"loss": 0.1296,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12830229103565216,
|
||
|
|
"step": 1580,
|
||
|
|
"valid_targets_mean": 1032.3,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.536,
|
||
|
|
"grad_norm": 0.6517306987311609,
|
||
|
|
"learning_rate": 3.220422387360373e-05,
|
||
|
|
"loss": 0.1364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10732553899288177,
|
||
|
|
"step": 1585,
|
||
|
|
"valid_targets_mean": 722.9,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.544,
|
||
|
|
"grad_norm": 0.7032594046384518,
|
||
|
|
"learning_rate": 3.2140908931411026e-05,
|
||
|
|
"loss": 0.1505,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12496422231197357,
|
||
|
|
"step": 1590,
|
||
|
|
"valid_targets_mean": 762.9,
|
||
|
|
"valid_targets_min": 459
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.552,
|
||
|
|
"grad_norm": 0.647513049077345,
|
||
|
|
"learning_rate": 3.207740072139748e-05,
|
||
|
|
"loss": 0.1864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14673733711242676,
|
||
|
|
"step": 1595,
|
||
|
|
"valid_targets_mean": 907.1,
|
||
|
|
"valid_targets_min": 555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.56,
|
||
|
|
"grad_norm": 0.6265580167463557,
|
||
|
|
"learning_rate": 3.2013700254532996e-05,
|
||
|
|
"loss": 0.1396,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11060081422328949,
|
||
|
|
"step": 1600,
|
||
|
|
"valid_targets_mean": 896.2,
|
||
|
|
"valid_targets_min": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.568,
|
||
|
|
"grad_norm": 0.6798658233952004,
|
||
|
|
"learning_rate": 3.194980854484794e-05,
|
||
|
|
"loss": 0.1523,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15329161286354065,
|
||
|
|
"step": 1605,
|
||
|
|
"valid_targets_mean": 882.2,
|
||
|
|
"valid_targets_min": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.576,
|
||
|
|
"grad_norm": 0.6635462419744681,
|
||
|
|
"learning_rate": 3.188572660941702e-05,
|
||
|
|
"loss": 0.1335,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11939306557178497,
|
||
|
|
"step": 1610,
|
||
|
|
"valid_targets_mean": 808.9,
|
||
|
|
"valid_targets_min": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.584,
|
||
|
|
"grad_norm": 0.6617445706954177,
|
||
|
|
"learning_rate": 3.182145546834311e-05,
|
||
|
|
"loss": 0.1415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12312141805887222,
|
||
|
|
"step": 1615,
|
||
|
|
"valid_targets_mean": 789.6,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.592,
|
||
|
|
"grad_norm": 0.7205574844014744,
|
||
|
|
"learning_rate": 3.1756996144740994e-05,
|
||
|
|
"loss": 0.1463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22105172276496887,
|
||
|
|
"step": 1620,
|
||
|
|
"valid_targets_mean": 1439.3,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6,
|
||
|
|
"grad_norm": 0.7133516783301649,
|
||
|
|
"learning_rate": 3.1692349664721074e-05,
|
||
|
|
"loss": 0.1518,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11445567011833191,
|
||
|
|
"step": 1625,
|
||
|
|
"valid_targets_mean": 795.9,
|
||
|
|
"valid_targets_min": 416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.608,
|
||
|
|
"grad_norm": 0.6228609586291742,
|
||
|
|
"learning_rate": 3.1627517057373046e-05,
|
||
|
|
"loss": 0.1301,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14198367297649384,
|
||
|
|
"step": 1630,
|
||
|
|
"valid_targets_mean": 1275.9,
|
||
|
|
"valid_targets_min": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.616,
|
||
|
|
"grad_norm": 0.7777392370038086,
|
||
|
|
"learning_rate": 3.156249935474953e-05,
|
||
|
|
"loss": 0.1471,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12163381278514862,
|
||
|
|
"step": 1635,
|
||
|
|
"valid_targets_mean": 905.9,
|
||
|
|
"valid_targets_min": 623
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.624,
|
||
|
|
"grad_norm": 0.7122943324469065,
|
||
|
|
"learning_rate": 3.1497297591849614e-05,
|
||
|
|
"loss": 0.1338,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15949739515781403,
|
||
|
|
"step": 1640,
|
||
|
|
"valid_targets_mean": 939.0,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.632,
|
||
|
|
"grad_norm": 0.5904037247920326,
|
||
|
|
"learning_rate": 3.143191280660238e-05,
|
||
|
|
"loss": 0.1496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18335631489753723,
|
||
|
|
"step": 1645,
|
||
|
|
"valid_targets_mean": 1674.0,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.64,
|
||
|
|
"grad_norm": 0.6644180963489857,
|
||
|
|
"learning_rate": 3.1366346039850424e-05,
|
||
|
|
"loss": 0.152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11574030667543411,
|
||
|
|
"step": 1650,
|
||
|
|
"valid_targets_mean": 785.8,
|
||
|
|
"valid_targets_min": 574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.648,
|
||
|
|
"grad_norm": 0.7622292992036905,
|
||
|
|
"learning_rate": 3.130059833533323e-05,
|
||
|
|
"loss": 0.1454,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13806472718715668,
|
||
|
|
"step": 1655,
|
||
|
|
"valid_targets_mean": 757.1,
|
||
|
|
"valid_targets_min": 447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.656,
|
||
|
|
"grad_norm": 0.7666468087153077,
|
||
|
|
"learning_rate": 3.123467073967059e-05,
|
||
|
|
"loss": 0.1468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24357299506664276,
|
||
|
|
"step": 1660,
|
||
|
|
"valid_targets_mean": 1023.1,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.664,
|
||
|
|
"grad_norm": 0.7620880026977072,
|
||
|
|
"learning_rate": 3.116856430234594e-05,
|
||
|
|
"loss": 0.1795,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20035260915756226,
|
||
|
|
"step": 1665,
|
||
|
|
"valid_targets_mean": 1098.9,
|
||
|
|
"valid_targets_min": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.672,
|
||
|
|
"grad_norm": 0.7693275033385583,
|
||
|
|
"learning_rate": 3.110228007568963e-05,
|
||
|
|
"loss": 0.159,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.270406037569046,
|
||
|
|
"step": 1670,
|
||
|
|
"valid_targets_mean": 1265.4,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.68,
|
||
|
|
"grad_norm": 0.6810833402248885,
|
||
|
|
"learning_rate": 3.103581911486221e-05,
|
||
|
|
"loss": 0.1578,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14812511205673218,
|
||
|
|
"step": 1675,
|
||
|
|
"valid_targets_mean": 970.6,
|
||
|
|
"valid_targets_min": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6879999999999997,
|
||
|
|
"grad_norm": 0.7047758447085022,
|
||
|
|
"learning_rate": 3.0969182477837604e-05,
|
||
|
|
"loss": 0.1252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16393721103668213,
|
||
|
|
"step": 1680,
|
||
|
|
"valid_targets_mean": 882.7,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6959999999999997,
|
||
|
|
"grad_norm": 0.6996051941190828,
|
||
|
|
"learning_rate": 3.090237122538628e-05,
|
||
|
|
"loss": 0.1189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12491434812545776,
|
||
|
|
"step": 1685,
|
||
|
|
"valid_targets_mean": 850.1,
|
||
|
|
"valid_targets_min": 559
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7039999999999997,
|
||
|
|
"grad_norm": 0.825415715952181,
|
||
|
|
"learning_rate": 3.0835386421058345e-05,
|
||
|
|
"loss": 0.1543,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20368003845214844,
|
||
|
|
"step": 1690,
|
||
|
|
"valid_targets_mean": 1022.1,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7119999999999997,
|
||
|
|
"grad_norm": 0.7550799135353304,
|
||
|
|
"learning_rate": 3.0768229131166664e-05,
|
||
|
|
"loss": 0.1732,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20968970656394958,
|
||
|
|
"step": 1695,
|
||
|
|
"valid_targets_mean": 1147.6,
|
||
|
|
"valid_targets_min": 553
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7199999999999998,
|
||
|
|
"grad_norm": 0.6583647661605726,
|
||
|
|
"learning_rate": 3.070090042476983e-05,
|
||
|
|
"loss": 0.1816,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1478102207183838,
|
||
|
|
"step": 1700,
|
||
|
|
"valid_targets_mean": 989.4,
|
||
|
|
"valid_targets_min": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7279999999999998,
|
||
|
|
"grad_norm": 0.6240340960504206,
|
||
|
|
"learning_rate": 3.063340137365517e-05,
|
||
|
|
"loss": 0.1446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1631350815296173,
|
||
|
|
"step": 1705,
|
||
|
|
"valid_targets_mean": 1248.9,
|
||
|
|
"valid_targets_min": 641
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7359999999999998,
|
||
|
|
"grad_norm": 0.6965617320231037,
|
||
|
|
"learning_rate": 3.0565733052321674e-05,
|
||
|
|
"loss": 0.1823,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16352877020835876,
|
||
|
|
"step": 1710,
|
||
|
|
"valid_targets_mean": 1014.0,
|
||
|
|
"valid_targets_min": 552
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7439999999999998,
|
||
|
|
"grad_norm": 0.6546126474286746,
|
||
|
|
"learning_rate": 3.0497896537962924e-05,
|
||
|
|
"loss": 0.1295,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10371782630681992,
|
||
|
|
"step": 1715,
|
||
|
|
"valid_targets_mean": 816.9,
|
||
|
|
"valid_targets_min": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.752,
|
||
|
|
"grad_norm": 0.6863948280952433,
|
||
|
|
"learning_rate": 3.042989291044991e-05,
|
||
|
|
"loss": 0.1424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1213807612657547,
|
||
|
|
"step": 1720,
|
||
|
|
"valid_targets_mean": 907.7,
|
||
|
|
"valid_targets_min": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.76,
|
||
|
|
"grad_norm": 0.6196156050560603,
|
||
|
|
"learning_rate": 3.036172325231383e-05,
|
||
|
|
"loss": 0.1265,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10670541971921921,
|
||
|
|
"step": 1725,
|
||
|
|
"valid_targets_mean": 817.1,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.768,
|
||
|
|
"grad_norm": 0.6885198422874407,
|
||
|
|
"learning_rate": 3.0293388648728908e-05,
|
||
|
|
"loss": 0.1732,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14460545778274536,
|
||
|
|
"step": 1730,
|
||
|
|
"valid_targets_mean": 836.1,
|
||
|
|
"valid_targets_min": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.776,
|
||
|
|
"grad_norm": 0.6716979655013353,
|
||
|
|
"learning_rate": 3.022489018749508e-05,
|
||
|
|
"loss": 0.1438,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.128073051571846,
|
||
|
|
"step": 1735,
|
||
|
|
"valid_targets_mean": 822.5,
|
||
|
|
"valid_targets_min": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.784,
|
||
|
|
"grad_norm": 0.5905720717331144,
|
||
|
|
"learning_rate": 3.015622895902068e-05,
|
||
|
|
"loss": 0.1289,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10219383239746094,
|
||
|
|
"step": 1740,
|
||
|
|
"valid_targets_mean": 1018.3,
|
||
|
|
"valid_targets_min": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.792,
|
||
|
|
"grad_norm": 0.6538619633756149,
|
||
|
|
"learning_rate": 3.008740605630508e-05,
|
||
|
|
"loss": 0.1127,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11181123554706573,
|
||
|
|
"step": 1745,
|
||
|
|
"valid_targets_mean": 890.4,
|
||
|
|
"valid_targets_min": 406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8,
|
||
|
|
"grad_norm": 0.5951374209934471,
|
||
|
|
"learning_rate": 3.0018422574921337e-05,
|
||
|
|
"loss": 0.136,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10837581753730774,
|
||
|
|
"step": 1750,
|
||
|
|
"valid_targets_mean": 899.2,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.808,
|
||
|
|
"grad_norm": 0.7052329434080948,
|
||
|
|
"learning_rate": 2.9949279612998673e-05,
|
||
|
|
"loss": 0.137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13119608163833618,
|
||
|
|
"step": 1755,
|
||
|
|
"valid_targets_mean": 898.5,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.816,
|
||
|
|
"grad_norm": 0.8015749810161719,
|
||
|
|
"learning_rate": 2.9879978271205064e-05,
|
||
|
|
"loss": 0.158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.34896156191825867,
|
||
|
|
"step": 1760,
|
||
|
|
"valid_targets_mean": 1340.1,
|
||
|
|
"valid_targets_min": 396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.824,
|
||
|
|
"grad_norm": 0.6290954626710966,
|
||
|
|
"learning_rate": 2.9810519652729692e-05,
|
||
|
|
"loss": 0.1473,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14061371982097626,
|
||
|
|
"step": 1765,
|
||
|
|
"valid_targets_mean": 915.2,
|
||
|
|
"valid_targets_min": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.832,
|
||
|
|
"grad_norm": 0.7261508576120159,
|
||
|
|
"learning_rate": 2.9740904863265378e-05,
|
||
|
|
"loss": 0.1687,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17512479424476624,
|
||
|
|
"step": 1770,
|
||
|
|
"valid_targets_mean": 943.8,
|
||
|
|
"valid_targets_min": 488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.84,
|
||
|
|
"grad_norm": 0.8060494791243035,
|
||
|
|
"learning_rate": 2.967113501099097e-05,
|
||
|
|
"loss": 0.1405,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2883511781692505,
|
||
|
|
"step": 1775,
|
||
|
|
"valid_targets_mean": 1182.6,
|
||
|
|
"valid_targets_min": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.848,
|
||
|
|
"grad_norm": 0.6654915787262201,
|
||
|
|
"learning_rate": 2.9601211206553745e-05,
|
||
|
|
"loss": 0.1193,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1242620199918747,
|
||
|
|
"step": 1780,
|
||
|
|
"valid_targets_mean": 882.2,
|
||
|
|
"valid_targets_min": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.856,
|
||
|
|
"grad_norm": 0.7346953920740353,
|
||
|
|
"learning_rate": 2.9531134563051686e-05,
|
||
|
|
"loss": 0.1239,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11942558735609055,
|
||
|
|
"step": 1785,
|
||
|
|
"valid_targets_mean": 803.6,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.864,
|
||
|
|
"grad_norm": 0.65907236959137,
|
||
|
|
"learning_rate": 2.946090619601579e-05,
|
||
|
|
"loss": 0.1313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13639883697032928,
|
||
|
|
"step": 1790,
|
||
|
|
"valid_targets_mean": 874.3,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.872,
|
||
|
|
"grad_norm": 0.6397246748417049,
|
||
|
|
"learning_rate": 2.9390527223392292e-05,
|
||
|
|
"loss": 0.1411,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12321165949106216,
|
||
|
|
"step": 1795,
|
||
|
|
"valid_targets_mean": 806.2,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.88,
|
||
|
|
"grad_norm": 1.0747048389216176,
|
||
|
|
"learning_rate": 2.931999876552488e-05,
|
||
|
|
"loss": 0.1455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13217294216156006,
|
||
|
|
"step": 1800,
|
||
|
|
"valid_targets_mean": 878.2,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.888,
|
||
|
|
"grad_norm": 0.6288006656918863,
|
||
|
|
"learning_rate": 2.9249321945136854e-05,
|
||
|
|
"loss": 0.1347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11415164917707443,
|
||
|
|
"step": 1805,
|
||
|
|
"valid_targets_mean": 833.4,
|
||
|
|
"valid_targets_min": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.896,
|
||
|
|
"grad_norm": 0.6493401209466289,
|
||
|
|
"learning_rate": 2.9178497887313257e-05,
|
||
|
|
"loss": 0.1533,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1355169266462326,
|
||
|
|
"step": 1810,
|
||
|
|
"valid_targets_mean": 1076.9,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.904,
|
||
|
|
"grad_norm": 0.6826145785690596,
|
||
|
|
"learning_rate": 2.9107527719482968e-05,
|
||
|
|
"loss": 0.1278,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14696922898292542,
|
||
|
|
"step": 1815,
|
||
|
|
"valid_targets_mean": 1041.2,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.912,
|
||
|
|
"grad_norm": 0.6714528039918809,
|
||
|
|
"learning_rate": 2.9036412571400747e-05,
|
||
|
|
"loss": 0.1557,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12627488374710083,
|
||
|
|
"step": 1820,
|
||
|
|
"valid_targets_mean": 791.7,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.92,
|
||
|
|
"grad_norm": 0.669057548346749,
|
||
|
|
"learning_rate": 2.8965153575129255e-05,
|
||
|
|
"loss": 0.1216,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11150173842906952,
|
||
|
|
"step": 1825,
|
||
|
|
"valid_targets_mean": 713.1,
|
||
|
|
"valid_targets_min": 428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.928,
|
||
|
|
"grad_norm": 6.262537715901929,
|
||
|
|
"learning_rate": 2.8893751865021044e-05,
|
||
|
|
"loss": 0.1561,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1828661561012268,
|
||
|
|
"step": 1830,
|
||
|
|
"valid_targets_mean": 1074.8,
|
||
|
|
"valid_targets_min": 438
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.936,
|
||
|
|
"grad_norm": 0.8329681709717811,
|
||
|
|
"learning_rate": 2.8822208577700473e-05,
|
||
|
|
"loss": 0.1798,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3160470426082611,
|
||
|
|
"step": 1835,
|
||
|
|
"valid_targets_mean": 1414.8,
|
||
|
|
"valid_targets_min": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.944,
|
||
|
|
"grad_norm": 0.7357974139181813,
|
||
|
|
"learning_rate": 2.8750524852045642e-05,
|
||
|
|
"loss": 0.1211,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1282620131969452,
|
||
|
|
"step": 1840,
|
||
|
|
"valid_targets_mean": 820.2,
|
||
|
|
"valid_targets_min": 448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.952,
|
||
|
|
"grad_norm": 0.6775803708281112,
|
||
|
|
"learning_rate": 2.867870182917024e-05,
|
||
|
|
"loss": 0.1477,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1339489221572876,
|
||
|
|
"step": 1845,
|
||
|
|
"valid_targets_mean": 790.2,
|
||
|
|
"valid_targets_min": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.96,
|
||
|
|
"grad_norm": 0.6597044199311293,
|
||
|
|
"learning_rate": 2.8606740652405394e-05,
|
||
|
|
"loss": 0.1651,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15459227561950684,
|
||
|
|
"step": 1850,
|
||
|
|
"valid_targets_mean": 954.8,
|
||
|
|
"valid_targets_min": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.968,
|
||
|
|
"grad_norm": 0.5869493252220873,
|
||
|
|
"learning_rate": 2.853464246728147e-05,
|
||
|
|
"loss": 0.2023,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2387658804655075,
|
||
|
|
"step": 1855,
|
||
|
|
"valid_targets_mean": 1852.9,
|
||
|
|
"valid_targets_min": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.976,
|
||
|
|
"grad_norm": 0.7270659719435554,
|
||
|
|
"learning_rate": 2.846240842150984e-05,
|
||
|
|
"loss": 0.1283,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16146603226661682,
|
||
|
|
"step": 1860,
|
||
|
|
"valid_targets_mean": 959.2,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.984,
|
||
|
|
"grad_norm": 0.6700866512360112,
|
||
|
|
"learning_rate": 2.839003966496458e-05,
|
||
|
|
"loss": 0.1535,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13563427329063416,
|
||
|
|
"step": 1865,
|
||
|
|
"valid_targets_mean": 851.7,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.992,
|
||
|
|
"grad_norm": 0.5765343337231628,
|
||
|
|
"learning_rate": 2.8317537349664215e-05,
|
||
|
|
"loss": 0.1295,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10503876209259033,
|
||
|
|
"step": 1870,
|
||
|
|
"valid_targets_mean": 913.2,
|
||
|
|
"valid_targets_min": 619
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"grad_norm": 0.6634220547526659,
|
||
|
|
"learning_rate": 2.824490262975334e-05,
|
||
|
|
"loss": 0.1364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15313103795051575,
|
||
|
|
"step": 1875,
|
||
|
|
"valid_targets_mean": 1032.9,
|
||
|
|
"valid_targets_min": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.008,
|
||
|
|
"grad_norm": 0.6340931383149534,
|
||
|
|
"learning_rate": 2.817213666148427e-05,
|
||
|
|
"loss": 0.1325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11905479431152344,
|
||
|
|
"step": 1880,
|
||
|
|
"valid_targets_mean": 989.5,
|
||
|
|
"valid_targets_min": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.016,
|
||
|
|
"grad_norm": 0.7397667131418036,
|
||
|
|
"learning_rate": 2.809924060319862e-05,
|
||
|
|
"loss": 0.1533,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09949196875095367,
|
||
|
|
"step": 1885,
|
||
|
|
"valid_targets_mean": 925.4,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.024,
|
||
|
|
"grad_norm": 0.7891254837207506,
|
||
|
|
"learning_rate": 2.802621561530888e-05,
|
||
|
|
"loss": 0.1245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14507977664470673,
|
||
|
|
"step": 1890,
|
||
|
|
"valid_targets_mean": 951.9,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.032,
|
||
|
|
"grad_norm": 0.7167855246712508,
|
||
|
|
"learning_rate": 2.7953062860279937e-05,
|
||
|
|
"loss": 0.1173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16226643323898315,
|
||
|
|
"step": 1895,
|
||
|
|
"valid_targets_mean": 1303.5,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.04,
|
||
|
|
"grad_norm": 0.8560753101693489,
|
||
|
|
"learning_rate": 2.7879783502610557e-05,
|
||
|
|
"loss": 0.1404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1734582930803299,
|
||
|
|
"step": 1900,
|
||
|
|
"valid_targets_mean": 1111.7,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.048,
|
||
|
|
"grad_norm": 0.6097438352541962,
|
||
|
|
"learning_rate": 2.7806378708814875e-05,
|
||
|
|
"loss": 0.1176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07898702472448349,
|
||
|
|
"step": 1905,
|
||
|
|
"valid_targets_mean": 814.9,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.056,
|
||
|
|
"grad_norm": 0.8334738917846108,
|
||
|
|
"learning_rate": 2.773284964740379e-05,
|
||
|
|
"loss": 0.1172,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1159522607922554,
|
||
|
|
"step": 1910,
|
||
|
|
"valid_targets_mean": 1074.0,
|
||
|
|
"valid_targets_min": 552
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.064,
|
||
|
|
"grad_norm": 0.73709069608429,
|
||
|
|
"learning_rate": 2.7659197488866403e-05,
|
||
|
|
"loss": 0.1148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09380116313695908,
|
||
|
|
"step": 1915,
|
||
|
|
"valid_targets_mean": 831.4,
|
||
|
|
"valid_targets_min": 594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.072,
|
||
|
|
"grad_norm": 0.7306469497568479,
|
||
|
|
"learning_rate": 2.7585423405651347e-05,
|
||
|
|
"loss": 0.1478,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1965886950492859,
|
||
|
|
"step": 1920,
|
||
|
|
"valid_targets_mean": 1309.8,
|
||
|
|
"valid_targets_min": 551
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.08,
|
||
|
|
"grad_norm": 0.6295094975865887,
|
||
|
|
"learning_rate": 2.7511528572148153e-05,
|
||
|
|
"loss": 0.1062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08431434631347656,
|
||
|
|
"step": 1925,
|
||
|
|
"valid_targets_mean": 857.9,
|
||
|
|
"valid_targets_min": 418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.088,
|
||
|
|
"grad_norm": 0.6649810415419259,
|
||
|
|
"learning_rate": 2.7437514164668536e-05,
|
||
|
|
"loss": 0.1123,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08847647160291672,
|
||
|
|
"step": 1930,
|
||
|
|
"valid_targets_mean": 897.8,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.096,
|
||
|
|
"grad_norm": 0.8373666738680312,
|
||
|
|
"learning_rate": 2.7363381361427692e-05,
|
||
|
|
"loss": 0.122,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15449827909469604,
|
||
|
|
"step": 1935,
|
||
|
|
"valid_targets_mean": 964.7,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.104,
|
||
|
|
"grad_norm": 0.7253704386655504,
|
||
|
|
"learning_rate": 2.72891313425255e-05,
|
||
|
|
"loss": 0.1011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09447682648897171,
|
||
|
|
"step": 1940,
|
||
|
|
"valid_targets_mean": 813.2,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.112,
|
||
|
|
"grad_norm": 0.7999319248633481,
|
||
|
|
"learning_rate": 2.7214765289927777e-05,
|
||
|
|
"loss": 0.1154,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12414933741092682,
|
||
|
|
"step": 1945,
|
||
|
|
"valid_targets_mean": 857.9,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.12,
|
||
|
|
"grad_norm": 0.6796572603097303,
|
||
|
|
"learning_rate": 2.714028438744746e-05,
|
||
|
|
"loss": 0.1068,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10158456861972809,
|
||
|
|
"step": 1950,
|
||
|
|
"valid_targets_mean": 1028.1,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.128,
|
||
|
|
"grad_norm": 0.7284877435667799,
|
||
|
|
"learning_rate": 2.706568982072573e-05,
|
||
|
|
"loss": 0.0905,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10231685638427734,
|
||
|
|
"step": 1955,
|
||
|
|
"valid_targets_mean": 862.3,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.136,
|
||
|
|
"grad_norm": 0.7786727324959434,
|
||
|
|
"learning_rate": 2.6990982777213174e-05,
|
||
|
|
"loss": 0.1375,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13553225994110107,
|
||
|
|
"step": 1960,
|
||
|
|
"valid_targets_mean": 991.1,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.144,
|
||
|
|
"grad_norm": 0.6242705242724281,
|
||
|
|
"learning_rate": 2.691616444615085e-05,
|
||
|
|
"loss": 0.106,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08428169786930084,
|
||
|
|
"step": 1965,
|
||
|
|
"valid_targets_mean": 833.7,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.152,
|
||
|
|
"grad_norm": 0.797000991363135,
|
||
|
|
"learning_rate": 2.6841236018551402e-05,
|
||
|
|
"loss": 0.1187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11778293550014496,
|
||
|
|
"step": 1970,
|
||
|
|
"valid_targets_mean": 869.9,
|
||
|
|
"valid_targets_min": 441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.16,
|
||
|
|
"grad_norm": 0.7417976078950065,
|
||
|
|
"learning_rate": 2.6766198687180028e-05,
|
||
|
|
"loss": 0.1305,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10415495932102203,
|
||
|
|
"step": 1975,
|
||
|
|
"valid_targets_mean": 898.8,
|
||
|
|
"valid_targets_min": 471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.168,
|
||
|
|
"grad_norm": 0.8089478184209437,
|
||
|
|
"learning_rate": 2.6691053646535564e-05,
|
||
|
|
"loss": 0.1623,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11006303131580353,
|
||
|
|
"step": 1980,
|
||
|
|
"valid_targets_mean": 914.4,
|
||
|
|
"valid_targets_min": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.176,
|
||
|
|
"grad_norm": 0.7090781876298934,
|
||
|
|
"learning_rate": 2.6615802092831446e-05,
|
||
|
|
"loss": 0.1047,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12003593146800995,
|
||
|
|
"step": 1985,
|
||
|
|
"valid_targets_mean": 847.3,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.184,
|
||
|
|
"grad_norm": 0.8798620396082656,
|
||
|
|
"learning_rate": 2.6540445223976637e-05,
|
||
|
|
"loss": 0.0974,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12840867042541504,
|
||
|
|
"step": 1990,
|
||
|
|
"valid_targets_mean": 868.6,
|
||
|
|
"valid_targets_min": 576
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.192,
|
||
|
|
"grad_norm": 0.7587296343097659,
|
||
|
|
"learning_rate": 2.6464984239556602e-05,
|
||
|
|
"loss": 0.1094,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08521630614995956,
|
||
|
|
"step": 1995,
|
||
|
|
"valid_targets_mean": 819.2,
|
||
|
|
"valid_targets_min": 454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2,
|
||
|
|
"grad_norm": 0.7323881770409771,
|
||
|
|
"learning_rate": 2.63894203408142e-05,
|
||
|
|
"loss": 0.1192,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10320921987295151,
|
||
|
|
"step": 2000,
|
||
|
|
"valid_targets_mean": 838.9,
|
||
|
|
"valid_targets_min": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.208,
|
||
|
|
"grad_norm": 0.7810349295950669,
|
||
|
|
"learning_rate": 2.6313754730630528e-05,
|
||
|
|
"loss": 0.139,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12769360840320587,
|
||
|
|
"step": 2005,
|
||
|
|
"valid_targets_mean": 969.1,
|
||
|
|
"valid_targets_min": 577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.216,
|
||
|
|
"grad_norm": 0.6965562856943036,
|
||
|
|
"learning_rate": 2.623798861350582e-05,
|
||
|
|
"loss": 0.1298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11534813791513443,
|
||
|
|
"step": 2010,
|
||
|
|
"valid_targets_mean": 912.4,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.224,
|
||
|
|
"grad_norm": 0.721081345451744,
|
||
|
|
"learning_rate": 2.6162123195540247e-05,
|
||
|
|
"loss": 0.1231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09381479024887085,
|
||
|
|
"step": 2015,
|
||
|
|
"valid_targets_mean": 704.2,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.232,
|
||
|
|
"grad_norm": 0.8473383362522039,
|
||
|
|
"learning_rate": 2.6086159684414726e-05,
|
||
|
|
"loss": 0.1359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2669636309146881,
|
||
|
|
"step": 2020,
|
||
|
|
"valid_targets_mean": 1619.2,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.24,
|
||
|
|
"grad_norm": 0.688237535569076,
|
||
|
|
"learning_rate": 2.6010099289371694e-05,
|
||
|
|
"loss": 0.1218,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11964704096317291,
|
||
|
|
"step": 2025,
|
||
|
|
"valid_targets_mean": 1055.5,
|
||
|
|
"valid_targets_min": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.248,
|
||
|
|
"grad_norm": 0.6929958820942029,
|
||
|
|
"learning_rate": 2.5933943221195844e-05,
|
||
|
|
"loss": 0.1295,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08900363743305206,
|
||
|
|
"step": 2030,
|
||
|
|
"valid_targets_mean": 793.6,
|
||
|
|
"valid_targets_min": 396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2560000000000002,
|
||
|
|
"grad_norm": 0.7775109141995964,
|
||
|
|
"learning_rate": 2.5857692692194884e-05,
|
||
|
|
"loss": 0.1055,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10303100943565369,
|
||
|
|
"step": 2035,
|
||
|
|
"valid_targets_mean": 882.3,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2640000000000002,
|
||
|
|
"grad_norm": 0.7091217128524674,
|
||
|
|
"learning_rate": 2.5781348916180195e-05,
|
||
|
|
"loss": 0.0998,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.098441481590271,
|
||
|
|
"step": 2040,
|
||
|
|
"valid_targets_mean": 1006.2,
|
||
|
|
"valid_targets_min": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2720000000000002,
|
||
|
|
"grad_norm": 0.8897445923890558,
|
||
|
|
"learning_rate": 2.570491310844755e-05,
|
||
|
|
"loss": 0.1389,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15889273583889008,
|
||
|
|
"step": 2045,
|
||
|
|
"valid_targets_mean": 1019.2,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2800000000000002,
|
||
|
|
"grad_norm": 0.7107182186416007,
|
||
|
|
"learning_rate": 2.562838648575774e-05,
|
||
|
|
"loss": 0.118,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11557357013225555,
|
||
|
|
"step": 2050,
|
||
|
|
"valid_targets_mean": 1069.4,
|
||
|
|
"valid_targets_min": 546
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.288,
|
||
|
|
"grad_norm": 0.7129277983851455,
|
||
|
|
"learning_rate": 2.5551770266317224e-05,
|
||
|
|
"loss": 0.1252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10616722702980042,
|
||
|
|
"step": 2055,
|
||
|
|
"valid_targets_mean": 885.9,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.296,
|
||
|
|
"grad_norm": 0.677295678050319,
|
||
|
|
"learning_rate": 2.5475065669758713e-05,
|
||
|
|
"loss": 0.1164,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08337932825088501,
|
||
|
|
"step": 2060,
|
||
|
|
"valid_targets_mean": 820.1,
|
||
|
|
"valid_targets_min": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.304,
|
||
|
|
"grad_norm": 0.7824949930209248,
|
||
|
|
"learning_rate": 2.5398273917121786e-05,
|
||
|
|
"loss": 0.0995,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09587116539478302,
|
||
|
|
"step": 2065,
|
||
|
|
"valid_targets_mean": 799.9,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.312,
|
||
|
|
"grad_norm": 1.0827163695354118,
|
||
|
|
"learning_rate": 2.532139623083342e-05,
|
||
|
|
"loss": 0.1472,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23146377503871918,
|
||
|
|
"step": 2070,
|
||
|
|
"valid_targets_mean": 1284.2,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.32,
|
||
|
|
"grad_norm": 0.7876540426113231,
|
||
|
|
"learning_rate": 2.5244433834688552e-05,
|
||
|
|
"loss": 0.1476,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2188844531774521,
|
||
|
|
"step": 2075,
|
||
|
|
"valid_targets_mean": 1293.9,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.328,
|
||
|
|
"grad_norm": 0.8039211820025277,
|
||
|
|
"learning_rate": 2.5167387953830602e-05,
|
||
|
|
"loss": 0.1103,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1387537121772766,
|
||
|
|
"step": 2080,
|
||
|
|
"valid_targets_mean": 830.8,
|
||
|
|
"valid_targets_min": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.336,
|
||
|
|
"grad_norm": 0.8150464261801589,
|
||
|
|
"learning_rate": 2.5090259814731946e-05,
|
||
|
|
"loss": 0.1188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08860965073108673,
|
||
|
|
"step": 2085,
|
||
|
|
"valid_targets_mean": 765.8,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.344,
|
||
|
|
"grad_norm": 0.7721377677499426,
|
||
|
|
"learning_rate": 2.5013050645174414e-05,
|
||
|
|
"loss": 0.108,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12573741376399994,
|
||
|
|
"step": 2090,
|
||
|
|
"valid_targets_mean": 910.7,
|
||
|
|
"valid_targets_min": 562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.352,
|
||
|
|
"grad_norm": 1.051076408856922,
|
||
|
|
"learning_rate": 2.4935761674229735e-05,
|
||
|
|
"loss": 0.136,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17311853170394897,
|
||
|
|
"step": 2095,
|
||
|
|
"valid_targets_mean": 1102.4,
|
||
|
|
"valid_targets_min": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.36,
|
||
|
|
"grad_norm": 0.9010163902542431,
|
||
|
|
"learning_rate": 2.4858394132239982e-05,
|
||
|
|
"loss": 0.1274,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15161563456058502,
|
||
|
|
"step": 2100,
|
||
|
|
"valid_targets_mean": 930.6,
|
||
|
|
"valid_targets_min": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.368,
|
||
|
|
"grad_norm": 0.6627204250441622,
|
||
|
|
"learning_rate": 2.4780949250797964e-05,
|
||
|
|
"loss": 0.1309,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08458087593317032,
|
||
|
|
"step": 2105,
|
||
|
|
"valid_targets_mean": 834.9,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.376,
|
||
|
|
"grad_norm": 0.83019951899081,
|
||
|
|
"learning_rate": 2.4703428262727656e-05,
|
||
|
|
"loss": 0.1335,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10285459458827972,
|
||
|
|
"step": 2110,
|
||
|
|
"valid_targets_mean": 800.4,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.384,
|
||
|
|
"grad_norm": 0.8268436558039793,
|
||
|
|
"learning_rate": 2.4625832402064525e-05,
|
||
|
|
"loss": 0.1005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1025954931974411,
|
||
|
|
"step": 2115,
|
||
|
|
"valid_targets_mean": 846.8,
|
||
|
|
"valid_targets_min": 539
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.392,
|
||
|
|
"grad_norm": 0.8038334993067265,
|
||
|
|
"learning_rate": 2.454816290403595e-05,
|
||
|
|
"loss": 0.1267,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11060905456542969,
|
||
|
|
"step": 2120,
|
||
|
|
"valid_targets_mean": 811.8,
|
||
|
|
"valid_targets_min": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4,
|
||
|
|
"grad_norm": 0.6898907065545627,
|
||
|
|
"learning_rate": 2.4470421005041492e-05,
|
||
|
|
"loss": 0.1041,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1327819973230362,
|
||
|
|
"step": 2125,
|
||
|
|
"valid_targets_mean": 1047.2,
|
||
|
|
"valid_targets_min": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.408,
|
||
|
|
"grad_norm": 4.330284104552601,
|
||
|
|
"learning_rate": 2.4392607942633263e-05,
|
||
|
|
"loss": 0.1015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11577026546001434,
|
||
|
|
"step": 2130,
|
||
|
|
"valid_targets_mean": 1057.9,
|
||
|
|
"valid_targets_min": 574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.416,
|
||
|
|
"grad_norm": 0.678009922465366,
|
||
|
|
"learning_rate": 2.43147249554962e-05,
|
||
|
|
"loss": 0.1616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08846744149923325,
|
||
|
|
"step": 2135,
|
||
|
|
"valid_targets_mean": 794.7,
|
||
|
|
"valid_targets_min": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.424,
|
||
|
|
"grad_norm": 0.7518169977411372,
|
||
|
|
"learning_rate": 2.423677328342835e-05,
|
||
|
|
"loss": 0.1038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09468118846416473,
|
||
|
|
"step": 2140,
|
||
|
|
"valid_targets_mean": 853.1,
|
||
|
|
"valid_targets_min": 485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.432,
|
||
|
|
"grad_norm": 0.7984169304764213,
|
||
|
|
"learning_rate": 2.415875416732113e-05,
|
||
|
|
"loss": 0.1174,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1375335156917572,
|
||
|
|
"step": 2145,
|
||
|
|
"valid_targets_mean": 894.8,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.44,
|
||
|
|
"grad_norm": 0.8317134998385687,
|
||
|
|
"learning_rate": 2.4080668849139603e-05,
|
||
|
|
"loss": 0.118,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11628472805023193,
|
||
|
|
"step": 2150,
|
||
|
|
"valid_targets_mean": 955.4,
|
||
|
|
"valid_targets_min": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.448,
|
||
|
|
"grad_norm": 0.8125522664200509,
|
||
|
|
"learning_rate": 2.4002518571902665e-05,
|
||
|
|
"loss": 0.1242,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1283692717552185,
|
||
|
|
"step": 2155,
|
||
|
|
"valid_targets_mean": 873.8,
|
||
|
|
"valid_targets_min": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.456,
|
||
|
|
"grad_norm": 0.9200367233165645,
|
||
|
|
"learning_rate": 2.392430457966328e-05,
|
||
|
|
"loss": 0.1109,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1673322319984436,
|
||
|
|
"step": 2160,
|
||
|
|
"valid_targets_mean": 972.5,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.464,
|
||
|
|
"grad_norm": 0.7357592949714992,
|
||
|
|
"learning_rate": 2.3846028117488686e-05,
|
||
|
|
"loss": 0.1079,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1122480109333992,
|
||
|
|
"step": 2165,
|
||
|
|
"valid_targets_mean": 1010.8,
|
||
|
|
"valid_targets_min": 563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.472,
|
||
|
|
"grad_norm": 0.720650181938497,
|
||
|
|
"learning_rate": 2.3767690431440533e-05,
|
||
|
|
"loss": 0.1212,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09994740039110184,
|
||
|
|
"step": 2170,
|
||
|
|
"valid_targets_mean": 817.9,
|
||
|
|
"valid_targets_min": 427
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.48,
|
||
|
|
"grad_norm": 0.823217288866293,
|
||
|
|
"learning_rate": 2.368929276855512e-05,
|
||
|
|
"loss": 0.1106,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14170852303504944,
|
||
|
|
"step": 2175,
|
||
|
|
"valid_targets_mean": 876.6,
|
||
|
|
"valid_targets_min": 544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.488,
|
||
|
|
"grad_norm": 1.058760731084061,
|
||
|
|
"learning_rate": 2.361083637682347e-05,
|
||
|
|
"loss": 0.132,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10451669245958328,
|
||
|
|
"step": 2180,
|
||
|
|
"valid_targets_mean": 871.3,
|
||
|
|
"valid_targets_min": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.496,
|
||
|
|
"grad_norm": 0.7717649786690791,
|
||
|
|
"learning_rate": 2.3532322505171502e-05,
|
||
|
|
"loss": 0.1136,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11375638842582703,
|
||
|
|
"step": 2185,
|
||
|
|
"valid_targets_mean": 828.2,
|
||
|
|
"valid_targets_min": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.504,
|
||
|
|
"grad_norm": 0.7183572277157173,
|
||
|
|
"learning_rate": 2.3453752403440147e-05,
|
||
|
|
"loss": 0.139,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1301645189523697,
|
||
|
|
"step": 2190,
|
||
|
|
"valid_targets_mean": 972.3,
|
||
|
|
"valid_targets_min": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.512,
|
||
|
|
"grad_norm": 0.7728123534193143,
|
||
|
|
"learning_rate": 2.337512732236545e-05,
|
||
|
|
"loss": 0.1058,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10168056190013885,
|
||
|
|
"step": 2195,
|
||
|
|
"valid_targets_mean": 756.6,
|
||
|
|
"valid_targets_min": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.52,
|
||
|
|
"grad_norm": 1.4339255219311247,
|
||
|
|
"learning_rate": 2.3296448513558628e-05,
|
||
|
|
"loss": 0.1062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09760090708732605,
|
||
|
|
"step": 2200,
|
||
|
|
"valid_targets_mean": 897.6,
|
||
|
|
"valid_targets_min": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.528,
|
||
|
|
"grad_norm": 0.6868233777905957,
|
||
|
|
"learning_rate": 2.321771722948622e-05,
|
||
|
|
"loss": 0.1484,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1449117660522461,
|
||
|
|
"step": 2205,
|
||
|
|
"valid_targets_mean": 1630.0,
|
||
|
|
"valid_targets_min": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.536,
|
||
|
|
"grad_norm": 0.6806906401608229,
|
||
|
|
"learning_rate": 2.3138934723450074e-05,
|
||
|
|
"loss": 0.1165,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08565478026866913,
|
||
|
|
"step": 2210,
|
||
|
|
"valid_targets_mean": 856.7,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.544,
|
||
|
|
"grad_norm": 0.7627319393583615,
|
||
|
|
"learning_rate": 2.306010224956744e-05,
|
||
|
|
"loss": 0.1205,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1330558806657791,
|
||
|
|
"step": 2215,
|
||
|
|
"valid_targets_mean": 999.3,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.552,
|
||
|
|
"grad_norm": 0.8247596543816615,
|
||
|
|
"learning_rate": 2.2981221062750986e-05,
|
||
|
|
"loss": 0.1604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14138635993003845,
|
||
|
|
"step": 2220,
|
||
|
|
"valid_targets_mean": 964.9,
|
||
|
|
"valid_targets_min": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.56,
|
||
|
|
"grad_norm": 0.7095954426144264,
|
||
|
|
"learning_rate": 2.290229241868882e-05,
|
||
|
|
"loss": 0.1188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10400180518627167,
|
||
|
|
"step": 2225,
|
||
|
|
"valid_targets_mean": 905.8,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.568,
|
||
|
|
"grad_norm": 0.7841252716092892,
|
||
|
|
"learning_rate": 2.282331757382454e-05,
|
||
|
|
"loss": 0.1637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12375243008136749,
|
||
|
|
"step": 2230,
|
||
|
|
"valid_targets_mean": 1084.1,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.576,
|
||
|
|
"grad_norm": 0.6258147218913532,
|
||
|
|
"learning_rate": 2.2744297785337155e-05,
|
||
|
|
"loss": 0.1236,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08287594467401505,
|
||
|
|
"step": 2235,
|
||
|
|
"valid_targets_mean": 893.2,
|
||
|
|
"valid_targets_min": 594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.584,
|
||
|
|
"grad_norm": 0.7826941670558715,
|
||
|
|
"learning_rate": 2.2665234311121155e-05,
|
||
|
|
"loss": 0.1209,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11382591724395752,
|
||
|
|
"step": 2240,
|
||
|
|
"valid_targets_mean": 926.1,
|
||
|
|
"valid_targets_min": 562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.592,
|
||
|
|
"grad_norm": 0.7325008965964993,
|
||
|
|
"learning_rate": 2.258612840976645e-05,
|
||
|
|
"loss": 0.1066,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09644539654254913,
|
||
|
|
"step": 2245,
|
||
|
|
"valid_targets_mean": 766.8,
|
||
|
|
"valid_targets_min": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6,
|
||
|
|
"grad_norm": 0.682578876278063,
|
||
|
|
"learning_rate": 2.2506981340538315e-05,
|
||
|
|
"loss": 0.1069,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09101128578186035,
|
||
|
|
"step": 2250,
|
||
|
|
"valid_targets_mean": 852.6,
|
||
|
|
"valid_targets_min": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.608,
|
||
|
|
"grad_norm": 0.5290399948663445,
|
||
|
|
"learning_rate": 2.2427794363357384e-05,
|
||
|
|
"loss": 0.1158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11653144657611847,
|
||
|
|
"step": 2255,
|
||
|
|
"valid_targets_mean": 2054.8,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.616,
|
||
|
|
"grad_norm": 0.7175384669599434,
|
||
|
|
"learning_rate": 2.2348568738779566e-05,
|
||
|
|
"loss": 0.1301,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10867810249328613,
|
||
|
|
"step": 2260,
|
||
|
|
"valid_targets_mean": 813.8,
|
||
|
|
"valid_targets_min": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.624,
|
||
|
|
"grad_norm": 0.8469177584073655,
|
||
|
|
"learning_rate": 2.2269305727975993e-05,
|
||
|
|
"loss": 0.1317,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14089643955230713,
|
||
|
|
"step": 2265,
|
||
|
|
"valid_targets_mean": 977.0,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.632,
|
||
|
|
"grad_norm": 0.7775610010446571,
|
||
|
|
"learning_rate": 2.2190006592712927e-05,
|
||
|
|
"loss": 0.1193,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13051855564117432,
|
||
|
|
"step": 2270,
|
||
|
|
"valid_targets_mean": 1009.5,
|
||
|
|
"valid_targets_min": 492
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.64,
|
||
|
|
"grad_norm": 0.7374918588799454,
|
||
|
|
"learning_rate": 2.2110672595331698e-05,
|
||
|
|
"loss": 0.1192,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09962435066699982,
|
||
|
|
"step": 2275,
|
||
|
|
"valid_targets_mean": 892.7,
|
||
|
|
"valid_targets_min": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.648,
|
||
|
|
"grad_norm": 0.7705278585713413,
|
||
|
|
"learning_rate": 2.2031304998728587e-05,
|
||
|
|
"loss": 0.1043,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11635805666446686,
|
||
|
|
"step": 2280,
|
||
|
|
"valid_targets_mean": 872.3,
|
||
|
|
"valid_targets_min": 533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.656,
|
||
|
|
"grad_norm": 0.7795818552117938,
|
||
|
|
"learning_rate": 2.1951905066334737e-05,
|
||
|
|
"loss": 0.103,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10012821108102798,
|
||
|
|
"step": 2285,
|
||
|
|
"valid_targets_mean": 758.8,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.664,
|
||
|
|
"grad_norm": 4.9483858153946025,
|
||
|
|
"learning_rate": 2.1872474062096046e-05,
|
||
|
|
"loss": 0.1076,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09752249717712402,
|
||
|
|
"step": 2290,
|
||
|
|
"valid_targets_mean": 850.1,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.672,
|
||
|
|
"grad_norm": 0.8700115929950681,
|
||
|
|
"learning_rate": 2.179301325045301e-05,
|
||
|
|
"loss": 0.1273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19074232876300812,
|
||
|
|
"step": 2295,
|
||
|
|
"valid_targets_mean": 1016.4,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.68,
|
||
|
|
"grad_norm": 1.5334691662476867,
|
||
|
|
"learning_rate": 2.1713523896320647e-05,
|
||
|
|
"loss": 0.1042,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08753467351198196,
|
||
|
|
"step": 2300,
|
||
|
|
"valid_targets_mean": 775.1,
|
||
|
|
"valid_targets_min": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6879999999999997,
|
||
|
|
"grad_norm": 0.7194518070589961,
|
||
|
|
"learning_rate": 2.163400726506832e-05,
|
||
|
|
"loss": 0.1049,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08830483257770538,
|
||
|
|
"step": 2305,
|
||
|
|
"valid_targets_mean": 729.5,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6959999999999997,
|
||
|
|
"grad_norm": 0.8815939342282404,
|
||
|
|
"learning_rate": 2.155446462249961e-05,
|
||
|
|
"loss": 0.1391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1877199411392212,
|
||
|
|
"step": 2310,
|
||
|
|
"valid_targets_mean": 1014.9,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7039999999999997,
|
||
|
|
"grad_norm": 0.7630816705563278,
|
||
|
|
"learning_rate": 2.147489723483217e-05,
|
||
|
|
"loss": 0.1023,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10068291425704956,
|
||
|
|
"step": 2315,
|
||
|
|
"valid_targets_mean": 772.1,
|
||
|
|
"valid_targets_min": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7119999999999997,
|
||
|
|
"grad_norm": 0.7382517531107952,
|
||
|
|
"learning_rate": 2.139530636867757e-05,
|
||
|
|
"loss": 0.1038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.097693532705307,
|
||
|
|
"step": 2320,
|
||
|
|
"valid_targets_mean": 845.9,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7199999999999998,
|
||
|
|
"grad_norm": 0.6952681638720215,
|
||
|
|
"learning_rate": 2.1315693291021114e-05,
|
||
|
|
"loss": 0.1044,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09656163305044174,
|
||
|
|
"step": 2325,
|
||
|
|
"valid_targets_mean": 779.2,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7279999999999998,
|
||
|
|
"grad_norm": 0.7011125947204983,
|
||
|
|
"learning_rate": 2.1236059269201686e-05,
|
||
|
|
"loss": 0.0912,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08665567636489868,
|
||
|
|
"step": 2330,
|
||
|
|
"valid_targets_mean": 903.8,
|
||
|
|
"valid_targets_min": 541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7359999999999998,
|
||
|
|
"grad_norm": 0.7596491570380602,
|
||
|
|
"learning_rate": 2.1156405570891584e-05,
|
||
|
|
"loss": 0.106,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16104748845100403,
|
||
|
|
"step": 2335,
|
||
|
|
"valid_targets_mean": 1337.7,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7439999999999998,
|
||
|
|
"grad_norm": 0.7837544123142385,
|
||
|
|
"learning_rate": 2.1076733464076322e-05,
|
||
|
|
"loss": 0.1224,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10330668836832047,
|
||
|
|
"step": 2340,
|
||
|
|
"valid_targets_mean": 799.4,
|
||
|
|
"valid_targets_min": 571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.752,
|
||
|
|
"grad_norm": 0.7653711981254095,
|
||
|
|
"learning_rate": 2.0997044217034462e-05,
|
||
|
|
"loss": 0.1066,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1075320616364479,
|
||
|
|
"step": 2345,
|
||
|
|
"valid_targets_mean": 756.2,
|
||
|
|
"valid_targets_min": 473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.76,
|
||
|
|
"grad_norm": 0.8264468923738882,
|
||
|
|
"learning_rate": 2.0917339098317405e-05,
|
||
|
|
"loss": 0.1316,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1973426342010498,
|
||
|
|
"step": 2350,
|
||
|
|
"valid_targets_mean": 1276.9,
|
||
|
|
"valid_targets_min": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.768,
|
||
|
|
"grad_norm": 0.7585885229626271,
|
||
|
|
"learning_rate": 2.083761937672922e-05,
|
||
|
|
"loss": 0.1064,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09834052622318268,
|
||
|
|
"step": 2355,
|
||
|
|
"valid_targets_mean": 793.8,
|
||
|
|
"valid_targets_min": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.776,
|
||
|
|
"grad_norm": 0.743265105231493,
|
||
|
|
"learning_rate": 2.0757886321306433e-05,
|
||
|
|
"loss": 0.1122,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10782724618911743,
|
||
|
|
"step": 2360,
|
||
|
|
"valid_targets_mean": 869.9,
|
||
|
|
"valid_targets_min": 541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.784,
|
||
|
|
"grad_norm": 0.8016117024296018,
|
||
|
|
"learning_rate": 2.0678141201297827e-05,
|
||
|
|
"loss": 0.11,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12636038661003113,
|
||
|
|
"step": 2365,
|
||
|
|
"valid_targets_mean": 1038.0,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.792,
|
||
|
|
"grad_norm": 0.7710823001060885,
|
||
|
|
"learning_rate": 2.059838528614423e-05,
|
||
|
|
"loss": 0.1223,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10119035840034485,
|
||
|
|
"step": 2370,
|
||
|
|
"valid_targets_mean": 893.8,
|
||
|
|
"valid_targets_min": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8,
|
||
|
|
"grad_norm": 0.7460626197721942,
|
||
|
|
"learning_rate": 2.0518619845458322e-05,
|
||
|
|
"loss": 0.1604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10346980392932892,
|
||
|
|
"step": 2375,
|
||
|
|
"valid_targets_mean": 807.0,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.808,
|
||
|
|
"grad_norm": 0.8123087104330138,
|
||
|
|
"learning_rate": 2.0438846149004426e-05,
|
||
|
|
"loss": 0.144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1365559697151184,
|
||
|
|
"step": 2380,
|
||
|
|
"valid_targets_mean": 992.1,
|
||
|
|
"valid_targets_min": 548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.816,
|
||
|
|
"grad_norm": 0.7412259924373668,
|
||
|
|
"learning_rate": 2.0359065466678268e-05,
|
||
|
|
"loss": 0.0945,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09502339363098145,
|
||
|
|
"step": 2385,
|
||
|
|
"valid_targets_mean": 788.4,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.824,
|
||
|
|
"grad_norm": 0.73069039856101,
|
||
|
|
"learning_rate": 2.0279279068486795e-05,
|
||
|
|
"loss": 0.1077,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1051429808139801,
|
||
|
|
"step": 2390,
|
||
|
|
"valid_targets_mean": 861.6,
|
||
|
|
"valid_targets_min": 557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.832,
|
||
|
|
"grad_norm": 0.6985213047175839,
|
||
|
|
"learning_rate": 2.019948822452794e-05,
|
||
|
|
"loss": 0.1115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09317454695701599,
|
||
|
|
"step": 2395,
|
||
|
|
"valid_targets_mean": 854.2,
|
||
|
|
"valid_targets_min": 503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.84,
|
||
|
|
"grad_norm": 0.6558227489154802,
|
||
|
|
"learning_rate": 2.0119694204970393e-05,
|
||
|
|
"loss": 0.1105,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12951532006263733,
|
||
|
|
"step": 2400,
|
||
|
|
"valid_targets_mean": 1170.6,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.848,
|
||
|
|
"grad_norm": 0.7244860226360714,
|
||
|
|
"learning_rate": 2.0039898280033414e-05,
|
||
|
|
"loss": 0.1045,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10344484448432922,
|
||
|
|
"step": 2405,
|
||
|
|
"valid_targets_mean": 888.8,
|
||
|
|
"valid_targets_min": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.856,
|
||
|
|
"grad_norm": 0.6853248190879865,
|
||
|
|
"learning_rate": 1.9960101719966592e-05,
|
||
|
|
"loss": 0.1229,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12488465756177902,
|
||
|
|
"step": 2410,
|
||
|
|
"valid_targets_mean": 1122.1,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.864,
|
||
|
|
"grad_norm": 0.6472647160190197,
|
||
|
|
"learning_rate": 1.9880305795029617e-05,
|
||
|
|
"loss": 0.0977,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10344689339399338,
|
||
|
|
"step": 2415,
|
||
|
|
"valid_targets_mean": 1005.9,
|
||
|
|
"valid_targets_min": 645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.872,
|
||
|
|
"grad_norm": 0.7842785828649766,
|
||
|
|
"learning_rate": 1.980051177547207e-05,
|
||
|
|
"loss": 0.1287,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10108557343482971,
|
||
|
|
"step": 2420,
|
||
|
|
"valid_targets_mean": 900.6,
|
||
|
|
"valid_targets_min": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.88,
|
||
|
|
"grad_norm": 0.8140599499363703,
|
||
|
|
"learning_rate": 1.9720720931513212e-05,
|
||
|
|
"loss": 0.1084,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12927132844924927,
|
||
|
|
"step": 2425,
|
||
|
|
"valid_targets_mean": 966.9,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.888,
|
||
|
|
"grad_norm": 0.7386140762653786,
|
||
|
|
"learning_rate": 1.9640934533321735e-05,
|
||
|
|
"loss": 0.1165,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11146294325590134,
|
||
|
|
"step": 2430,
|
||
|
|
"valid_targets_mean": 841.1,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.896,
|
||
|
|
"grad_norm": 0.9449195329909676,
|
||
|
|
"learning_rate": 1.9561153850995577e-05,
|
||
|
|
"loss": 0.1333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1926388293504715,
|
||
|
|
"step": 2435,
|
||
|
|
"valid_targets_mean": 1120.4,
|
||
|
|
"valid_targets_min": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.904,
|
||
|
|
"grad_norm": 0.7623301589954754,
|
||
|
|
"learning_rate": 1.948138015454168e-05,
|
||
|
|
"loss": 0.1303,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1025683581829071,
|
||
|
|
"step": 2440,
|
||
|
|
"valid_targets_mean": 899.2,
|
||
|
|
"valid_targets_min": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.912,
|
||
|
|
"grad_norm": 0.8466799342788941,
|
||
|
|
"learning_rate": 1.9401614713855775e-05,
|
||
|
|
"loss": 0.114,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1362592577934265,
|
||
|
|
"step": 2445,
|
||
|
|
"valid_targets_mean": 1037.2,
|
||
|
|
"valid_targets_min": 554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.92,
|
||
|
|
"grad_norm": 1.5796336446869077,
|
||
|
|
"learning_rate": 1.932185879870218e-05,
|
||
|
|
"loss": 0.1257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12985458970069885,
|
||
|
|
"step": 2450,
|
||
|
|
"valid_targets_mean": 1039.4,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.928,
|
||
|
|
"grad_norm": 0.829075498076157,
|
||
|
|
"learning_rate": 1.924211367869357e-05,
|
||
|
|
"loss": 0.1156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11858369410037994,
|
||
|
|
"step": 2455,
|
||
|
|
"valid_targets_mean": 892.6,
|
||
|
|
"valid_targets_min": 438
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.936,
|
||
|
|
"grad_norm": 1.0985300935297633,
|
||
|
|
"learning_rate": 1.9162380623270783e-05,
|
||
|
|
"loss": 0.1139,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12260019034147263,
|
||
|
|
"step": 2460,
|
||
|
|
"valid_targets_mean": 891.1,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.944,
|
||
|
|
"grad_norm": 0.8323078229026958,
|
||
|
|
"learning_rate": 1.90826609016826e-05,
|
||
|
|
"loss": 0.1101,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1549152135848999,
|
||
|
|
"step": 2465,
|
||
|
|
"valid_targets_mean": 966.0,
|
||
|
|
"valid_targets_min": 492
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.952,
|
||
|
|
"grad_norm": 0.7646826806626748,
|
||
|
|
"learning_rate": 1.9002955782965548e-05,
|
||
|
|
"loss": 0.1011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10078634321689606,
|
||
|
|
"step": 2470,
|
||
|
|
"valid_targets_mean": 918.1,
|
||
|
|
"valid_targets_min": 563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.96,
|
||
|
|
"grad_norm": 0.9739262577751554,
|
||
|
|
"learning_rate": 1.8923266535923688e-05,
|
||
|
|
"loss": 0.1162,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15166492760181427,
|
||
|
|
"step": 2475,
|
||
|
|
"valid_targets_mean": 941.0,
|
||
|
|
"valid_targets_min": 401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.968,
|
||
|
|
"grad_norm": 0.7603308818023086,
|
||
|
|
"learning_rate": 1.8843594429108426e-05,
|
||
|
|
"loss": 0.1379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16866816580295563,
|
||
|
|
"step": 2480,
|
||
|
|
"valid_targets_mean": 1221.4,
|
||
|
|
"valid_targets_min": 566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.976,
|
||
|
|
"grad_norm": 0.7914420062717339,
|
||
|
|
"learning_rate": 1.8763940730798324e-05,
|
||
|
|
"loss": 0.1063,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1709909737110138,
|
||
|
|
"step": 2485,
|
||
|
|
"valid_targets_mean": 1715.1,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.984,
|
||
|
|
"grad_norm": 0.6954145735909032,
|
||
|
|
"learning_rate": 1.8684306708978896e-05,
|
||
|
|
"loss": 0.129,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0953080952167511,
|
||
|
|
"step": 2490,
|
||
|
|
"valid_targets_mean": 783.3,
|
||
|
|
"valid_targets_min": 533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.992,
|
||
|
|
"grad_norm": 0.796723667327091,
|
||
|
|
"learning_rate": 1.8604693631322433e-05,
|
||
|
|
"loss": 0.1194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1432575285434723,
|
||
|
|
"step": 2495,
|
||
|
|
"valid_targets_mean": 931.0,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0,
|
||
|
|
"grad_norm": 0.7452630637139206,
|
||
|
|
"learning_rate": 1.852510276516783e-05,
|
||
|
|
"loss": 0.107,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09386100620031357,
|
||
|
|
"step": 2500,
|
||
|
|
"valid_targets_mean": 808.5,
|
||
|
|
"valid_targets_min": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.008,
|
||
|
|
"grad_norm": 0.6039705377236406,
|
||
|
|
"learning_rate": 1.8445535377500393e-05,
|
||
|
|
"loss": 0.0829,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07107095420360565,
|
||
|
|
"step": 2505,
|
||
|
|
"valid_targets_mean": 872.4,
|
||
|
|
"valid_targets_min": 557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.016,
|
||
|
|
"grad_norm": 0.7217484500470416,
|
||
|
|
"learning_rate": 1.8365992734931686e-05,
|
||
|
|
"loss": 0.0826,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06790906190872192,
|
||
|
|
"step": 2510,
|
||
|
|
"valid_targets_mean": 872.8,
|
||
|
|
"valid_targets_min": 593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.024,
|
||
|
|
"grad_norm": 0.9824074436234963,
|
||
|
|
"learning_rate": 1.8286476103679356e-05,
|
||
|
|
"loss": 0.0941,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0929323211312294,
|
||
|
|
"step": 2515,
|
||
|
|
"valid_targets_mean": 864.1,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.032,
|
||
|
|
"grad_norm": 1.0904545035112219,
|
||
|
|
"learning_rate": 1.8206986749546992e-05,
|
||
|
|
"loss": 0.1004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07187691330909729,
|
||
|
|
"step": 2520,
|
||
|
|
"valid_targets_mean": 899.8,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.04,
|
||
|
|
"grad_norm": 0.6390439813848777,
|
||
|
|
"learning_rate": 1.8127525937903957e-05,
|
||
|
|
"loss": 0.0945,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06667788326740265,
|
||
|
|
"step": 2525,
|
||
|
|
"valid_targets_mean": 1012.2,
|
||
|
|
"valid_targets_min": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.048,
|
||
|
|
"grad_norm": 0.8624151009076726,
|
||
|
|
"learning_rate": 1.8048094933665262e-05,
|
||
|
|
"loss": 0.0796,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07868341356515884,
|
||
|
|
"step": 2530,
|
||
|
|
"valid_targets_mean": 920.8,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.056,
|
||
|
|
"grad_norm": 0.9692779244154147,
|
||
|
|
"learning_rate": 1.7968695001271416e-05,
|
||
|
|
"loss": 0.0808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06797877699136734,
|
||
|
|
"step": 2535,
|
||
|
|
"valid_targets_mean": 882.9,
|
||
|
|
"valid_targets_min": 538
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.064,
|
||
|
|
"grad_norm": 0.7879509422386841,
|
||
|
|
"learning_rate": 1.7889327404668316e-05,
|
||
|
|
"loss": 0.0889,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06811286509037018,
|
||
|
|
"step": 2540,
|
||
|
|
"valid_targets_mean": 657.0,
|
||
|
|
"valid_targets_min": 417
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.072,
|
||
|
|
"grad_norm": 0.8343540443495483,
|
||
|
|
"learning_rate": 1.7809993407287083e-05,
|
||
|
|
"loss": 0.1143,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0867060124874115,
|
||
|
|
"step": 2545,
|
||
|
|
"valid_targets_mean": 962.3,
|
||
|
|
"valid_targets_min": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.08,
|
||
|
|
"grad_norm": 0.6517667540067708,
|
||
|
|
"learning_rate": 1.7730694272024018e-05,
|
||
|
|
"loss": 0.0755,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07166572660207748,
|
||
|
|
"step": 2550,
|
||
|
|
"valid_targets_mean": 792.6,
|
||
|
|
"valid_targets_min": 425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.088,
|
||
|
|
"grad_norm": 0.9409298974066815,
|
||
|
|
"learning_rate": 1.765143126122044e-05,
|
||
|
|
"loss": 0.0808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07075785845518112,
|
||
|
|
"step": 2555,
|
||
|
|
"valid_targets_mean": 701.7,
|
||
|
|
"valid_targets_min": 492
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.096,
|
||
|
|
"grad_norm": 0.9303683290460381,
|
||
|
|
"learning_rate": 1.7572205636642622e-05,
|
||
|
|
"loss": 0.0924,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.077401302754879,
|
||
|
|
"step": 2560,
|
||
|
|
"valid_targets_mean": 734.2,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.104,
|
||
|
|
"grad_norm": 0.8323596502058777,
|
||
|
|
"learning_rate": 1.749301865946169e-05,
|
||
|
|
"loss": 0.1043,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07804550230503082,
|
||
|
|
"step": 2565,
|
||
|
|
"valid_targets_mean": 860.5,
|
||
|
|
"valid_targets_min": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.112,
|
||
|
|
"grad_norm": 0.8559619720793094,
|
||
|
|
"learning_rate": 1.7413871590233557e-05,
|
||
|
|
"loss": 0.0925,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0837281197309494,
|
||
|
|
"step": 2570,
|
||
|
|
"valid_targets_mean": 873.4,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.12,
|
||
|
|
"grad_norm": 0.7121674580406296,
|
||
|
|
"learning_rate": 1.7334765688878848e-05,
|
||
|
|
"loss": 0.0933,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06879082322120667,
|
||
|
|
"step": 2575,
|
||
|
|
"valid_targets_mean": 798.9,
|
||
|
|
"valid_targets_min": 548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.128,
|
||
|
|
"grad_norm": 0.9971120495500967,
|
||
|
|
"learning_rate": 1.7255702214662852e-05,
|
||
|
|
"loss": 0.086,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09851626306772232,
|
||
|
|
"step": 2580,
|
||
|
|
"valid_targets_mean": 781.4,
|
||
|
|
"valid_targets_min": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.136,
|
||
|
|
"grad_norm": 0.9219539955019028,
|
||
|
|
"learning_rate": 1.7176682426175468e-05,
|
||
|
|
"loss": 0.1081,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0875345915555954,
|
||
|
|
"step": 2585,
|
||
|
|
"valid_targets_mean": 974.8,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.144,
|
||
|
|
"grad_norm": 1.2717092381532147,
|
||
|
|
"learning_rate": 1.709770758131118e-05,
|
||
|
|
"loss": 0.0926,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1317518949508667,
|
||
|
|
"step": 2590,
|
||
|
|
"valid_targets_mean": 1149.6,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.152,
|
||
|
|
"grad_norm": 0.8800123242601117,
|
||
|
|
"learning_rate": 1.7018778937249017e-05,
|
||
|
|
"loss": 0.0876,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10916811227798462,
|
||
|
|
"step": 2595,
|
||
|
|
"valid_targets_mean": 989.7,
|
||
|
|
"valid_targets_min": 657
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.16,
|
||
|
|
"grad_norm": 0.9112036604014394,
|
||
|
|
"learning_rate": 1.6939897750432562e-05,
|
||
|
|
"loss": 0.098,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09577897936105728,
|
||
|
|
"step": 2600,
|
||
|
|
"valid_targets_mean": 955.5,
|
||
|
|
"valid_targets_min": 566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.168,
|
||
|
|
"grad_norm": 0.8387591446603542,
|
||
|
|
"learning_rate": 1.6861065276549933e-05,
|
||
|
|
"loss": 0.0936,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07986228913068771,
|
||
|
|
"step": 2605,
|
||
|
|
"valid_targets_mean": 803.5,
|
||
|
|
"valid_targets_min": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.176,
|
||
|
|
"grad_norm": 0.7204257329644403,
|
||
|
|
"learning_rate": 1.6782282770513788e-05,
|
||
|
|
"loss": 0.098,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07463609427213669,
|
||
|
|
"step": 2610,
|
||
|
|
"valid_targets_mean": 854.4,
|
||
|
|
"valid_targets_min": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.184,
|
||
|
|
"grad_norm": 0.8612927199672735,
|
||
|
|
"learning_rate": 1.6703551486441382e-05,
|
||
|
|
"loss": 0.1031,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11520431935787201,
|
||
|
|
"step": 2615,
|
||
|
|
"valid_targets_mean": 975.1,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.192,
|
||
|
|
"grad_norm": 0.8664320213490005,
|
||
|
|
"learning_rate": 1.6624872677634565e-05,
|
||
|
|
"loss": 0.0913,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08751243352890015,
|
||
|
|
"step": 2620,
|
||
|
|
"valid_targets_mean": 972.4,
|
||
|
|
"valid_targets_min": 581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2,
|
||
|
|
"grad_norm": 0.9681275417079883,
|
||
|
|
"learning_rate": 1.654624759655986e-05,
|
||
|
|
"loss": 0.0984,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09813649952411652,
|
||
|
|
"step": 2625,
|
||
|
|
"valid_targets_mean": 886.8,
|
||
|
|
"valid_targets_min": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.208,
|
||
|
|
"grad_norm": 1.0407521644002597,
|
||
|
|
"learning_rate": 1.64676774948285e-05,
|
||
|
|
"loss": 0.0926,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09159094095230103,
|
||
|
|
"step": 2630,
|
||
|
|
"valid_targets_mean": 846.4,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.216,
|
||
|
|
"grad_norm": 0.7173554015192248,
|
||
|
|
"learning_rate": 1.6389163623176536e-05,
|
||
|
|
"loss": 0.1215,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0797024741768837,
|
||
|
|
"step": 2635,
|
||
|
|
"valid_targets_mean": 901.9,
|
||
|
|
"valid_targets_min": 566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.224,
|
||
|
|
"grad_norm": 0.8592769401558998,
|
||
|
|
"learning_rate": 1.6310707231444884e-05,
|
||
|
|
"loss": 0.085,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07951955497264862,
|
||
|
|
"step": 2640,
|
||
|
|
"valid_targets_mean": 827.6,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.232,
|
||
|
|
"grad_norm": 0.8306119399846523,
|
||
|
|
"learning_rate": 1.623230956855947e-05,
|
||
|
|
"loss": 0.0723,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09050008654594421,
|
||
|
|
"step": 2645,
|
||
|
|
"valid_targets_mean": 761.6,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.24,
|
||
|
|
"grad_norm": 0.7665148049485115,
|
||
|
|
"learning_rate": 1.6153971882511324e-05,
|
||
|
|
"loss": 0.079,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07178527861833572,
|
||
|
|
"step": 2650,
|
||
|
|
"valid_targets_mean": 760.4,
|
||
|
|
"valid_targets_min": 448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.248,
|
||
|
|
"grad_norm": 0.7692520199355617,
|
||
|
|
"learning_rate": 1.6075695420336724e-05,
|
||
|
|
"loss": 0.1043,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07028958946466446,
|
||
|
|
"step": 2655,
|
||
|
|
"valid_targets_mean": 810.2,
|
||
|
|
"valid_targets_min": 554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.256,
|
||
|
|
"grad_norm": 0.7316939881648934,
|
||
|
|
"learning_rate": 1.5997481428097338e-05,
|
||
|
|
"loss": 0.1044,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13221722841262817,
|
||
|
|
"step": 2660,
|
||
|
|
"valid_targets_mean": 1782.2,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.264,
|
||
|
|
"grad_norm": 0.8770025162268091,
|
||
|
|
"learning_rate": 1.5919331150860396e-05,
|
||
|
|
"loss": 0.1006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1358993798494339,
|
||
|
|
"step": 2665,
|
||
|
|
"valid_targets_mean": 1218.9,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.272,
|
||
|
|
"grad_norm": 0.746194385306678,
|
||
|
|
"learning_rate": 1.5841245832678873e-05,
|
||
|
|
"loss": 0.0975,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.080272376537323,
|
||
|
|
"step": 2670,
|
||
|
|
"valid_targets_mean": 788.8,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.28,
|
||
|
|
"grad_norm": 0.7789493922430735,
|
||
|
|
"learning_rate": 1.576322671657166e-05,
|
||
|
|
"loss": 0.0842,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07402622699737549,
|
||
|
|
"step": 2675,
|
||
|
|
"valid_targets_mean": 819.1,
|
||
|
|
"valid_targets_min": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.288,
|
||
|
|
"grad_norm": 0.766158481713601,
|
||
|
|
"learning_rate": 1.5685275044503804e-05,
|
||
|
|
"loss": 0.0954,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0761728435754776,
|
||
|
|
"step": 2680,
|
||
|
|
"valid_targets_mean": 1043.2,
|
||
|
|
"valid_targets_min": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.296,
|
||
|
|
"grad_norm": 0.9738329205245164,
|
||
|
|
"learning_rate": 1.560739205736674e-05,
|
||
|
|
"loss": 0.0907,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08560563623905182,
|
||
|
|
"step": 2685,
|
||
|
|
"valid_targets_mean": 881.9,
|
||
|
|
"valid_targets_min": 507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.304,
|
||
|
|
"grad_norm": 0.917401924751551,
|
||
|
|
"learning_rate": 1.552957899495851e-05,
|
||
|
|
"loss": 0.1094,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15787991881370544,
|
||
|
|
"step": 2690,
|
||
|
|
"valid_targets_mean": 976.4,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.312,
|
||
|
|
"grad_norm": 0.8916162194334754,
|
||
|
|
"learning_rate": 1.5451837095964054e-05,
|
||
|
|
"loss": 0.0902,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09921112656593323,
|
||
|
|
"step": 2695,
|
||
|
|
"valid_targets_mean": 1003.8,
|
||
|
|
"valid_targets_min": 596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.32,
|
||
|
|
"grad_norm": 0.8708766115420277,
|
||
|
|
"learning_rate": 1.5374167597935478e-05,
|
||
|
|
"loss": 0.1017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08169998228549957,
|
||
|
|
"step": 2700,
|
||
|
|
"valid_targets_mean": 932.9,
|
||
|
|
"valid_targets_min": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.328,
|
||
|
|
"grad_norm": 0.776023741893538,
|
||
|
|
"learning_rate": 1.5296571737272354e-05,
|
||
|
|
"loss": 0.0734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08336486667394638,
|
||
|
|
"step": 2705,
|
||
|
|
"valid_targets_mean": 815.6,
|
||
|
|
"valid_targets_min": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.336,
|
||
|
|
"grad_norm": 0.9542252355636845,
|
||
|
|
"learning_rate": 1.5219050749202037e-05,
|
||
|
|
"loss": 0.1095,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19591785967350006,
|
||
|
|
"step": 2710,
|
||
|
|
"valid_targets_mean": 1389.9,
|
||
|
|
"valid_targets_min": 593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.344,
|
||
|
|
"grad_norm": 0.8864125490442216,
|
||
|
|
"learning_rate": 1.5141605867760021e-05,
|
||
|
|
"loss": 0.0798,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08587196469306946,
|
||
|
|
"step": 2715,
|
||
|
|
"valid_targets_mean": 822.1,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.352,
|
||
|
|
"grad_norm": 0.7527185671619987,
|
||
|
|
"learning_rate": 1.5064238325770267e-05,
|
||
|
|
"loss": 0.0789,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07073096930980682,
|
||
|
|
"step": 2720,
|
||
|
|
"valid_targets_mean": 861.9,
|
||
|
|
"valid_targets_min": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.36,
|
||
|
|
"grad_norm": 0.7512586587741376,
|
||
|
|
"learning_rate": 1.498694935482559e-05,
|
||
|
|
"loss": 0.0902,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07099224627017975,
|
||
|
|
"step": 2725,
|
||
|
|
"valid_targets_mean": 812.6,
|
||
|
|
"valid_targets_min": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.368,
|
||
|
|
"grad_norm": 0.9405061164636517,
|
||
|
|
"learning_rate": 1.4909740185268056e-05,
|
||
|
|
"loss": 0.1134,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09878502786159515,
|
||
|
|
"step": 2730,
|
||
|
|
"valid_targets_mean": 839.1,
|
||
|
|
"valid_targets_min": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.376,
|
||
|
|
"grad_norm": 0.9052795228667068,
|
||
|
|
"learning_rate": 1.4832612046169408e-05,
|
||
|
|
"loss": 0.0889,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11052730679512024,
|
||
|
|
"step": 2735,
|
||
|
|
"valid_targets_mean": 847.9,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.384,
|
||
|
|
"grad_norm": 0.7849642630781102,
|
||
|
|
"learning_rate": 1.4755566165311455e-05,
|
||
|
|
"loss": 0.0998,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08098804950714111,
|
||
|
|
"step": 2740,
|
||
|
|
"valid_targets_mean": 779.3,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.392,
|
||
|
|
"grad_norm": 0.9370098909939654,
|
||
|
|
"learning_rate": 1.4678603769166591e-05,
|
||
|
|
"loss": 0.0965,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10258492827415466,
|
||
|
|
"step": 2745,
|
||
|
|
"valid_targets_mean": 838.2,
|
||
|
|
"valid_targets_min": 468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.4,
|
||
|
|
"grad_norm": 0.6916768442414201,
|
||
|
|
"learning_rate": 1.4601726082878226e-05,
|
||
|
|
"loss": 0.099,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.062106356024742126,
|
||
|
|
"step": 2750,
|
||
|
|
"valid_targets_mean": 851.3,
|
||
|
|
"valid_targets_min": 557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.408,
|
||
|
|
"grad_norm": 0.6729245043658673,
|
||
|
|
"learning_rate": 1.4524934330241292e-05,
|
||
|
|
"loss": 0.0953,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06780697405338287,
|
||
|
|
"step": 2755,
|
||
|
|
"valid_targets_mean": 753.4,
|
||
|
|
"valid_targets_min": 427
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.416,
|
||
|
|
"grad_norm": 1.0448003759897928,
|
||
|
|
"learning_rate": 1.4448229733682784e-05,
|
||
|
|
"loss": 0.0796,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07199327647686005,
|
||
|
|
"step": 2760,
|
||
|
|
"valid_targets_mean": 857.9,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.424,
|
||
|
|
"grad_norm": 0.7783769295559022,
|
||
|
|
"learning_rate": 1.4371613514242264e-05,
|
||
|
|
"loss": 0.1168,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1220938190817833,
|
||
|
|
"step": 2765,
|
||
|
|
"valid_targets_mean": 1227.6,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.432,
|
||
|
|
"grad_norm": 0.8959322564560872,
|
||
|
|
"learning_rate": 1.4295086891552457e-05,
|
||
|
|
"loss": 0.0813,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08298659324645996,
|
||
|
|
"step": 2770,
|
||
|
|
"valid_targets_mean": 764.9,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.44,
|
||
|
|
"grad_norm": 0.9298947907055948,
|
||
|
|
"learning_rate": 1.4218651083819811e-05,
|
||
|
|
"loss": 0.0957,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07381783425807953,
|
||
|
|
"step": 2775,
|
||
|
|
"valid_targets_mean": 780.7,
|
||
|
|
"valid_targets_min": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.448,
|
||
|
|
"grad_norm": 0.7448045989894762,
|
||
|
|
"learning_rate": 1.4142307307805125e-05,
|
||
|
|
"loss": 0.0864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06524106860160828,
|
||
|
|
"step": 2780,
|
||
|
|
"valid_targets_mean": 857.9,
|
||
|
|
"valid_targets_min": 428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.456,
|
||
|
|
"grad_norm": 0.8049920591319956,
|
||
|
|
"learning_rate": 1.406605677880416e-05,
|
||
|
|
"loss": 0.0862,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09014227241277695,
|
||
|
|
"step": 2785,
|
||
|
|
"valid_targets_mean": 1201.6,
|
||
|
|
"valid_targets_min": 578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.464,
|
||
|
|
"grad_norm": 0.8404309290675868,
|
||
|
|
"learning_rate": 1.3989900710628313e-05,
|
||
|
|
"loss": 0.0951,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09047380089759827,
|
||
|
|
"step": 2790,
|
||
|
|
"valid_targets_mean": 800.4,
|
||
|
|
"valid_targets_min": 473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.4719999999999995,
|
||
|
|
"grad_norm": 0.9066055301702886,
|
||
|
|
"learning_rate": 1.3913840315585279e-05,
|
||
|
|
"loss": 0.0816,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09277326613664627,
|
||
|
|
"step": 2795,
|
||
|
|
"valid_targets_mean": 829.0,
|
||
|
|
"valid_targets_min": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.48,
|
||
|
|
"grad_norm": 0.8598771752202647,
|
||
|
|
"learning_rate": 1.3837876804459765e-05,
|
||
|
|
"loss": 0.1227,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08901967108249664,
|
||
|
|
"step": 2800,
|
||
|
|
"valid_targets_mean": 868.4,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.4879999999999995,
|
||
|
|
"grad_norm": 0.783672572024054,
|
||
|
|
"learning_rate": 1.3762011386494191e-05,
|
||
|
|
"loss": 0.0828,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07599220424890518,
|
||
|
|
"step": 2805,
|
||
|
|
"valid_targets_mean": 744.5,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.496,
|
||
|
|
"grad_norm": 0.7141310435933745,
|
||
|
|
"learning_rate": 1.3686245269369485e-05,
|
||
|
|
"loss": 0.0915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06869702786207199,
|
||
|
|
"step": 2810,
|
||
|
|
"valid_targets_mean": 940.9,
|
||
|
|
"valid_targets_min": 559
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.504,
|
||
|
|
"grad_norm": 0.9025153880900855,
|
||
|
|
"learning_rate": 1.3610579659185809e-05,
|
||
|
|
"loss": 0.0945,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11102897673845291,
|
||
|
|
"step": 2815,
|
||
|
|
"valid_targets_mean": 844.6,
|
||
|
|
"valid_targets_min": 423
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5120000000000005,
|
||
|
|
"grad_norm": 0.800663468084433,
|
||
|
|
"learning_rate": 1.35350157604434e-05,
|
||
|
|
"loss": 0.085,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07541121542453766,
|
||
|
|
"step": 2820,
|
||
|
|
"valid_targets_mean": 790.8,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.52,
|
||
|
|
"grad_norm": 0.8081250786086973,
|
||
|
|
"learning_rate": 1.345955477602337e-05,
|
||
|
|
"loss": 0.0937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08529967069625854,
|
||
|
|
"step": 2825,
|
||
|
|
"valid_targets_mean": 859.4,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5280000000000005,
|
||
|
|
"grad_norm": 0.8960539536732155,
|
||
|
|
"learning_rate": 1.3384197907168561e-05,
|
||
|
|
"loss": 0.1003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09535820782184601,
|
||
|
|
"step": 2830,
|
||
|
|
"valid_targets_mean": 790.9,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.536,
|
||
|
|
"grad_norm": 0.9356285004464241,
|
||
|
|
"learning_rate": 1.3308946353464438e-05,
|
||
|
|
"loss": 0.1165,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10715999454259872,
|
||
|
|
"step": 2835,
|
||
|
|
"valid_targets_mean": 1112.6,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5440000000000005,
|
||
|
|
"grad_norm": 0.7125180425061677,
|
||
|
|
"learning_rate": 1.3233801312819979e-05,
|
||
|
|
"loss": 0.0882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06889170408248901,
|
||
|
|
"step": 2840,
|
||
|
|
"valid_targets_mean": 868.6,
|
||
|
|
"valid_targets_min": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.552,
|
||
|
|
"grad_norm": 0.9666072232685547,
|
||
|
|
"learning_rate": 1.3158763981448606e-05,
|
||
|
|
"loss": 0.0868,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11292714625597,
|
||
|
|
"step": 2845,
|
||
|
|
"valid_targets_mean": 986.9,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5600000000000005,
|
||
|
|
"grad_norm": 0.9204304591677285,
|
||
|
|
"learning_rate": 1.3083835553849148e-05,
|
||
|
|
"loss": 0.0797,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08996661007404327,
|
||
|
|
"step": 2850,
|
||
|
|
"valid_targets_mean": 919.4,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.568,
|
||
|
|
"grad_norm": 0.7098940896911101,
|
||
|
|
"learning_rate": 1.3009017222786828e-05,
|
||
|
|
"loss": 0.0943,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07091812789440155,
|
||
|
|
"step": 2855,
|
||
|
|
"valid_targets_mean": 887.1,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.576,
|
||
|
|
"grad_norm": 0.9075241215569958,
|
||
|
|
"learning_rate": 1.2934310179274269e-05,
|
||
|
|
"loss": 0.088,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10161441564559937,
|
||
|
|
"step": 2860,
|
||
|
|
"valid_targets_mean": 974.2,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.584,
|
||
|
|
"grad_norm": 0.9322718683943222,
|
||
|
|
"learning_rate": 1.2859715612552541e-05,
|
||
|
|
"loss": 0.105,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0803801566362381,
|
||
|
|
"step": 2865,
|
||
|
|
"valid_targets_mean": 951.9,
|
||
|
|
"valid_targets_min": 488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.592,
|
||
|
|
"grad_norm": 0.8313467086832508,
|
||
|
|
"learning_rate": 1.278523471007223e-05,
|
||
|
|
"loss": 0.091,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09433354437351227,
|
||
|
|
"step": 2870,
|
||
|
|
"valid_targets_mean": 1132.4,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6,
|
||
|
|
"grad_norm": 0.8079314142812989,
|
||
|
|
"learning_rate": 1.271086865747451e-05,
|
||
|
|
"loss": 0.1164,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08496357500553131,
|
||
|
|
"step": 2875,
|
||
|
|
"valid_targets_mean": 833.5,
|
||
|
|
"valid_targets_min": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.608,
|
||
|
|
"grad_norm": 1.0236303127288213,
|
||
|
|
"learning_rate": 1.2636618638572316e-05,
|
||
|
|
"loss": 0.0905,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12658067047595978,
|
||
|
|
"step": 2880,
|
||
|
|
"valid_targets_mean": 881.5,
|
||
|
|
"valid_targets_min": 472
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.616,
|
||
|
|
"grad_norm": 0.8333427076318738,
|
||
|
|
"learning_rate": 1.2562485835331466e-05,
|
||
|
|
"loss": 0.0808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08028241991996765,
|
||
|
|
"step": 2885,
|
||
|
|
"valid_targets_mean": 748.9,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.624,
|
||
|
|
"grad_norm": 0.9768936929223744,
|
||
|
|
"learning_rate": 1.2488471427851852e-05,
|
||
|
|
"loss": 0.1008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12225892394781113,
|
||
|
|
"step": 2890,
|
||
|
|
"valid_targets_mean": 919.8,
|
||
|
|
"valid_targets_min": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.632,
|
||
|
|
"grad_norm": 0.6897936650252694,
|
||
|
|
"learning_rate": 1.241457659434866e-05,
|
||
|
|
"loss": 0.0978,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0659201443195343,
|
||
|
|
"step": 2895,
|
||
|
|
"valid_targets_mean": 821.9,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.64,
|
||
|
|
"grad_norm": 0.8876098353391305,
|
||
|
|
"learning_rate": 1.2340802511133605e-05,
|
||
|
|
"loss": 0.0817,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09190486371517181,
|
||
|
|
"step": 2900,
|
||
|
|
"valid_targets_mean": 868.3,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.648,
|
||
|
|
"grad_norm": 1.0077771221444112,
|
||
|
|
"learning_rate": 1.2267150352596216e-05,
|
||
|
|
"loss": 0.1091,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1861191987991333,
|
||
|
|
"step": 2905,
|
||
|
|
"valid_targets_mean": 1313.7,
|
||
|
|
"valid_targets_min": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.656,
|
||
|
|
"grad_norm": 0.9368442988921992,
|
||
|
|
"learning_rate": 1.2193621291185132e-05,
|
||
|
|
"loss": 0.1175,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20001402497291565,
|
||
|
|
"step": 2910,
|
||
|
|
"valid_targets_mean": 1509.2,
|
||
|
|
"valid_targets_min": 565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.664,
|
||
|
|
"grad_norm": 0.8626802211102149,
|
||
|
|
"learning_rate": 1.2120216497389446e-05,
|
||
|
|
"loss": 0.1291,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13889957964420319,
|
||
|
|
"step": 2915,
|
||
|
|
"valid_targets_mean": 1055.9,
|
||
|
|
"valid_targets_min": 602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.672,
|
||
|
|
"grad_norm": 0.8381078363134903,
|
||
|
|
"learning_rate": 1.2046937139720068e-05,
|
||
|
|
"loss": 0.0911,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11396999657154083,
|
||
|
|
"step": 2920,
|
||
|
|
"valid_targets_mean": 952.6,
|
||
|
|
"valid_targets_min": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.68,
|
||
|
|
"grad_norm": 0.8337297954472803,
|
||
|
|
"learning_rate": 1.1973784384691121e-05,
|
||
|
|
"loss": 0.0975,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09346058964729309,
|
||
|
|
"step": 2925,
|
||
|
|
"valid_targets_mean": 856.2,
|
||
|
|
"valid_targets_min": 473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.688,
|
||
|
|
"grad_norm": 0.7029624221310162,
|
||
|
|
"learning_rate": 1.1900759396801382e-05,
|
||
|
|
"loss": 0.0907,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06849217414855957,
|
||
|
|
"step": 2930,
|
||
|
|
"valid_targets_mean": 898.8,
|
||
|
|
"valid_targets_min": 602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.696,
|
||
|
|
"grad_norm": 0.7818350801821872,
|
||
|
|
"learning_rate": 1.1827863338515741e-05,
|
||
|
|
"loss": 0.092,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07988164573907852,
|
||
|
|
"step": 2935,
|
||
|
|
"valid_targets_mean": 707.1,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.704,
|
||
|
|
"grad_norm": 0.7756061520131283,
|
||
|
|
"learning_rate": 1.1755097370246669e-05,
|
||
|
|
"loss": 0.0877,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07403562217950821,
|
||
|
|
"step": 2940,
|
||
|
|
"valid_targets_mean": 949.6,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.712,
|
||
|
|
"grad_norm": 0.7173002311933365,
|
||
|
|
"learning_rate": 1.1682462650335791e-05,
|
||
|
|
"loss": 0.098,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07594731450080872,
|
||
|
|
"step": 2945,
|
||
|
|
"valid_targets_mean": 799.7,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.72,
|
||
|
|
"grad_norm": 0.7896332184742297,
|
||
|
|
"learning_rate": 1.1609960335035423e-05,
|
||
|
|
"loss": 0.0818,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08279333263635635,
|
||
|
|
"step": 2950,
|
||
|
|
"valid_targets_mean": 984.9,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.728,
|
||
|
|
"grad_norm": 0.9776144738454718,
|
||
|
|
"learning_rate": 1.1537591578490165e-05,
|
||
|
|
"loss": 0.0998,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1456027776002884,
|
||
|
|
"step": 2955,
|
||
|
|
"valid_targets_mean": 1332.9,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.736,
|
||
|
|
"grad_norm": 0.9359507220580169,
|
||
|
|
"learning_rate": 1.146535753271853e-05,
|
||
|
|
"loss": 0.1202,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15099552273750305,
|
||
|
|
"step": 2960,
|
||
|
|
"valid_targets_mean": 1355.7,
|
||
|
|
"valid_targets_min": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.744,
|
||
|
|
"grad_norm": 0.7220819850208092,
|
||
|
|
"learning_rate": 1.139325934759461e-05,
|
||
|
|
"loss": 0.0822,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09187281131744385,
|
||
|
|
"step": 2965,
|
||
|
|
"valid_targets_mean": 1074.4,
|
||
|
|
"valid_targets_min": 561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.752,
|
||
|
|
"grad_norm": 0.5905807746665216,
|
||
|
|
"learning_rate": 1.1321298170829768e-05,
|
||
|
|
"loss": 0.0922,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10941527783870697,
|
||
|
|
"step": 2970,
|
||
|
|
"valid_targets_mean": 2202.9,
|
||
|
|
"valid_targets_min": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.76,
|
||
|
|
"grad_norm": 1.112799196030323,
|
||
|
|
"learning_rate": 1.1249475147954363e-05,
|
||
|
|
"loss": 0.0824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10790747404098511,
|
||
|
|
"step": 2975,
|
||
|
|
"valid_targets_mean": 882.8,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.768,
|
||
|
|
"grad_norm": 0.6566387025187009,
|
||
|
|
"learning_rate": 1.1177791422299528e-05,
|
||
|
|
"loss": 0.0847,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06682457029819489,
|
||
|
|
"step": 2980,
|
||
|
|
"valid_targets_mean": 1061.5,
|
||
|
|
"valid_targets_min": 526
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.776,
|
||
|
|
"grad_norm": 0.9252893663043001,
|
||
|
|
"learning_rate": 1.1106248134978959e-05,
|
||
|
|
"loss": 0.0821,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10607043653726578,
|
||
|
|
"step": 2985,
|
||
|
|
"valid_targets_mean": 961.8,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.784,
|
||
|
|
"grad_norm": 0.7026497377797631,
|
||
|
|
"learning_rate": 1.1034846424870744e-05,
|
||
|
|
"loss": 0.0901,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06845103204250336,
|
||
|
|
"step": 2990,
|
||
|
|
"valid_targets_mean": 963.6,
|
||
|
|
"valid_targets_min": 622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.792,
|
||
|
|
"grad_norm": 0.8778334419174398,
|
||
|
|
"learning_rate": 1.0963587428599256e-05,
|
||
|
|
"loss": 0.1168,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10423515737056732,
|
||
|
|
"step": 2995,
|
||
|
|
"valid_targets_mean": 910.9,
|
||
|
|
"valid_targets_min": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.8,
|
||
|
|
"grad_norm": 0.7950092737522141,
|
||
|
|
"learning_rate": 1.089247228051704e-05,
|
||
|
|
"loss": 0.0814,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08087144792079926,
|
||
|
|
"step": 3000,
|
||
|
|
"valid_targets_mean": 820.4,
|
||
|
|
"valid_targets_min": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.808,
|
||
|
|
"grad_norm": 0.8721682415243375,
|
||
|
|
"learning_rate": 1.0821502112686753e-05,
|
||
|
|
"loss": 0.0887,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07225702702999115,
|
||
|
|
"step": 3005,
|
||
|
|
"valid_targets_mean": 717.1,
|
||
|
|
"valid_targets_min": 383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.816,
|
||
|
|
"grad_norm": 0.9423894585093093,
|
||
|
|
"learning_rate": 1.0750678054863158e-05,
|
||
|
|
"loss": 0.0958,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09208646416664124,
|
||
|
|
"step": 3010,
|
||
|
|
"valid_targets_mean": 855.6,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.824,
|
||
|
|
"grad_norm": 0.7766246439543594,
|
||
|
|
"learning_rate": 1.0680001234475127e-05,
|
||
|
|
"loss": 0.1298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08203871548175812,
|
||
|
|
"step": 3015,
|
||
|
|
"valid_targets_mean": 1001.3,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.832,
|
||
|
|
"grad_norm": 0.9878649492586721,
|
||
|
|
"learning_rate": 1.0609472776607715e-05,
|
||
|
|
"loss": 0.0943,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14963841438293457,
|
||
|
|
"step": 3020,
|
||
|
|
"valid_targets_mean": 881.9,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.84,
|
||
|
|
"grad_norm": 0.9599009488603156,
|
||
|
|
"learning_rate": 1.0539093803984217e-05,
|
||
|
|
"loss": 0.0789,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08646674454212189,
|
||
|
|
"step": 3025,
|
||
|
|
"valid_targets_mean": 1013.7,
|
||
|
|
"valid_targets_min": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.848,
|
||
|
|
"grad_norm": 0.7138781421940567,
|
||
|
|
"learning_rate": 1.046886543694832e-05,
|
||
|
|
"loss": 0.0859,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06895513832569122,
|
||
|
|
"step": 3030,
|
||
|
|
"valid_targets_mean": 867.2,
|
||
|
|
"valid_targets_min": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.856,
|
||
|
|
"grad_norm": 0.8515202976827628,
|
||
|
|
"learning_rate": 1.0398788793446263e-05,
|
||
|
|
"loss": 0.1115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09606703370809555,
|
||
|
|
"step": 3035,
|
||
|
|
"valid_targets_mean": 1090.6,
|
||
|
|
"valid_targets_min": 546
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.864,
|
||
|
|
"grad_norm": 1.4988346042882428,
|
||
|
|
"learning_rate": 1.0328864989009037e-05,
|
||
|
|
"loss": 0.1022,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07324632257223129,
|
||
|
|
"step": 3040,
|
||
|
|
"valid_targets_mean": 869.6,
|
||
|
|
"valid_targets_min": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.872,
|
||
|
|
"grad_norm": 0.8325983712860322,
|
||
|
|
"learning_rate": 1.0259095136734634e-05,
|
||
|
|
"loss": 0.1326,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1295246034860611,
|
||
|
|
"step": 3045,
|
||
|
|
"valid_targets_mean": 1129.4,
|
||
|
|
"valid_targets_min": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.88,
|
||
|
|
"grad_norm": 0.7862743933629226,
|
||
|
|
"learning_rate": 1.0189480347270311e-05,
|
||
|
|
"loss": 0.1005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10198500752449036,
|
||
|
|
"step": 3050,
|
||
|
|
"valid_targets_mean": 923.4,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.888,
|
||
|
|
"grad_norm": 0.787405323498443,
|
||
|
|
"learning_rate": 1.0120021728794938e-05,
|
||
|
|
"loss": 0.0799,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08082190155982971,
|
||
|
|
"step": 3055,
|
||
|
|
"valid_targets_mean": 683.2,
|
||
|
|
"valid_targets_min": 449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.896,
|
||
|
|
"grad_norm": 0.9576234772943834,
|
||
|
|
"learning_rate": 1.0050720387001334e-05,
|
||
|
|
"loss": 0.1027,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09182494133710861,
|
||
|
|
"step": 3060,
|
||
|
|
"valid_targets_mean": 887.6,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.904,
|
||
|
|
"grad_norm": 0.9752361907473542,
|
||
|
|
"learning_rate": 9.981577425078672e-06,
|
||
|
|
"loss": 0.0971,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15568508207798004,
|
||
|
|
"step": 3065,
|
||
|
|
"valid_targets_mean": 1023.6,
|
||
|
|
"valid_targets_min": 463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.912,
|
||
|
|
"grad_norm": 0.8911558097766225,
|
||
|
|
"learning_rate": 9.912593943694924e-06,
|
||
|
|
"loss": 0.1129,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1109350174665451,
|
||
|
|
"step": 3070,
|
||
|
|
"valid_targets_mean": 1180.3,
|
||
|
|
"valid_targets_min": 568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.92,
|
||
|
|
"grad_norm": 0.9212926624311334,
|
||
|
|
"learning_rate": 9.843771040979328e-06,
|
||
|
|
"loss": 0.0871,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09667422622442245,
|
||
|
|
"step": 3075,
|
||
|
|
"valid_targets_mean": 950.1,
|
||
|
|
"valid_targets_min": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.928,
|
||
|
|
"grad_norm": 0.8573739269127677,
|
||
|
|
"learning_rate": 9.775109812504922e-06,
|
||
|
|
"loss": 0.0957,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1107478216290474,
|
||
|
|
"step": 3080,
|
||
|
|
"valid_targets_mean": 971.2,
|
||
|
|
"valid_targets_min": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.936,
|
||
|
|
"grad_norm": 0.8793036060018228,
|
||
|
|
"learning_rate": 9.706611351271088e-06,
|
||
|
|
"loss": 0.0772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09450344741344452,
|
||
|
|
"step": 3085,
|
||
|
|
"valid_targets_mean": 806.3,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.944,
|
||
|
|
"grad_norm": 0.9522517797294872,
|
||
|
|
"learning_rate": 9.638276747686169e-06,
|
||
|
|
"loss": 0.105,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1285589337348938,
|
||
|
|
"step": 3090,
|
||
|
|
"valid_targets_mean": 1150.9,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.952,
|
||
|
|
"grad_norm": 0.9313589976377755,
|
||
|
|
"learning_rate": 9.570107089550091e-06,
|
||
|
|
"loss": 0.1143,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12102701514959335,
|
||
|
|
"step": 3095,
|
||
|
|
"valid_targets_mean": 858.7,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.96,
|
||
|
|
"grad_norm": 0.9163513791269962,
|
||
|
|
"learning_rate": 9.502103462037074e-06,
|
||
|
|
"loss": 0.1004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1062905564904213,
|
||
|
|
"step": 3100,
|
||
|
|
"valid_targets_mean": 964.9,
|
||
|
|
"valid_targets_min": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.968,
|
||
|
|
"grad_norm": 1.0687818787426926,
|
||
|
|
"learning_rate": 9.434266947678326e-06,
|
||
|
|
"loss": 0.1087,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10951408743858337,
|
||
|
|
"step": 3105,
|
||
|
|
"valid_targets_mean": 981.2,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.976,
|
||
|
|
"grad_norm": 0.7184448345349286,
|
||
|
|
"learning_rate": 9.366598626344836e-06,
|
||
|
|
"loss": 0.082,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06675899028778076,
|
||
|
|
"step": 3110,
|
||
|
|
"valid_targets_mean": 834.8,
|
||
|
|
"valid_targets_min": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.984,
|
||
|
|
"grad_norm": 0.8654614629854598,
|
||
|
|
"learning_rate": 9.299099575230172e-06,
|
||
|
|
"loss": 0.1179,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08472222089767456,
|
||
|
|
"step": 3115,
|
||
|
|
"valid_targets_mean": 834.8,
|
||
|
|
"valid_targets_min": 471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.992,
|
||
|
|
"grad_norm": 0.5860181708153348,
|
||
|
|
"learning_rate": 9.231770868833334e-06,
|
||
|
|
"loss": 0.0864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10731495916843414,
|
||
|
|
"step": 3120,
|
||
|
|
"valid_targets_mean": 2171.6,
|
||
|
|
"valid_targets_min": 583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.8324406309143618,
|
||
|
|
"learning_rate": 9.164613578941652e-06,
|
||
|
|
"loss": 0.0861,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08765757828950882,
|
||
|
|
"step": 3125,
|
||
|
|
"valid_targets_mean": 918.4,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.008,
|
||
|
|
"grad_norm": 1.0510573946733208,
|
||
|
|
"learning_rate": 9.097628774613732e-06,
|
||
|
|
"loss": 0.0718,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06804905831813812,
|
||
|
|
"step": 3130,
|
||
|
|
"valid_targets_mean": 935.7,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.016,
|
||
|
|
"grad_norm": 0.7707638109753482,
|
||
|
|
"learning_rate": 9.030817522162403e-06,
|
||
|
|
"loss": 0.0702,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07694844156503677,
|
||
|
|
"step": 3135,
|
||
|
|
"valid_targets_mean": 979.8,
|
||
|
|
"valid_targets_min": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.024,
|
||
|
|
"grad_norm": 0.8982765492579255,
|
||
|
|
"learning_rate": 8.964180885137797e-06,
|
||
|
|
"loss": 0.0986,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09304551035165787,
|
||
|
|
"step": 3140,
|
||
|
|
"valid_targets_mean": 924.1,
|
||
|
|
"valid_targets_min": 417
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.032,
|
||
|
|
"grad_norm": 0.8324731032677587,
|
||
|
|
"learning_rate": 8.897719924310375e-06,
|
||
|
|
"loss": 0.0694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06775309890508652,
|
||
|
|
"step": 3145,
|
||
|
|
"valid_targets_mean": 908.8,
|
||
|
|
"valid_targets_min": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.04,
|
||
|
|
"grad_norm": 0.9396107459729739,
|
||
|
|
"learning_rate": 8.831435697654068e-06,
|
||
|
|
"loss": 0.0752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08411605656147003,
|
||
|
|
"step": 3150,
|
||
|
|
"valid_targets_mean": 924.2,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.048,
|
||
|
|
"grad_norm": 0.6785395368878784,
|
||
|
|
"learning_rate": 8.765329260329413e-06,
|
||
|
|
"loss": 0.0698,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05708220601081848,
|
||
|
|
"step": 3155,
|
||
|
|
"valid_targets_mean": 869.2,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.056,
|
||
|
|
"grad_norm": 0.8586369742809761,
|
||
|
|
"learning_rate": 8.699401664666774e-06,
|
||
|
|
"loss": 0.0655,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06502902507781982,
|
||
|
|
"step": 3160,
|
||
|
|
"valid_targets_mean": 793.8,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.064,
|
||
|
|
"grad_norm": 0.8368101486858089,
|
||
|
|
"learning_rate": 8.633653960149579e-06,
|
||
|
|
"loss": 0.0758,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06350349634885788,
|
||
|
|
"step": 3165,
|
||
|
|
"valid_targets_mean": 855.6,
|
||
|
|
"valid_targets_min": 568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.072,
|
||
|
|
"grad_norm": 0.8412868112255628,
|
||
|
|
"learning_rate": 8.56808719339762e-06,
|
||
|
|
"loss": 0.0698,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.057309672236442566,
|
||
|
|
"step": 3170,
|
||
|
|
"valid_targets_mean": 939.9,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.08,
|
||
|
|
"grad_norm": 0.9294442133648975,
|
||
|
|
"learning_rate": 8.502702408150391e-06,
|
||
|
|
"loss": 0.0855,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10744750499725342,
|
||
|
|
"step": 3175,
|
||
|
|
"valid_targets_mean": 1084.8,
|
||
|
|
"valid_targets_min": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.088,
|
||
|
|
"grad_norm": 0.8337209904777393,
|
||
|
|
"learning_rate": 8.43750064525047e-06,
|
||
|
|
"loss": 0.0809,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06538328528404236,
|
||
|
|
"step": 3180,
|
||
|
|
"valid_targets_mean": 731.6,
|
||
|
|
"valid_targets_min": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.096,
|
||
|
|
"grad_norm": 0.8657468914758869,
|
||
|
|
"learning_rate": 8.372482942626952e-06,
|
||
|
|
"loss": 0.064,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07184585928916931,
|
||
|
|
"step": 3185,
|
||
|
|
"valid_targets_mean": 859.4,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.104,
|
||
|
|
"grad_norm": 0.7209090350527402,
|
||
|
|
"learning_rate": 8.307650335278927e-06,
|
||
|
|
"loss": 0.0721,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05849084258079529,
|
||
|
|
"step": 3190,
|
||
|
|
"valid_targets_mean": 785.9,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.112,
|
||
|
|
"grad_norm": 0.7567765538916866,
|
||
|
|
"learning_rate": 8.243003855259015e-06,
|
||
|
|
"loss": 0.0852,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06685122102499008,
|
||
|
|
"step": 3195,
|
||
|
|
"valid_targets_mean": 824.2,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.12,
|
||
|
|
"grad_norm": 0.80639522105524,
|
||
|
|
"learning_rate": 8.178544531656897e-06,
|
||
|
|
"loss": 0.0803,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06725746393203735,
|
||
|
|
"step": 3200,
|
||
|
|
"valid_targets_mean": 877.0,
|
||
|
|
"valid_targets_min": 452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.128,
|
||
|
|
"grad_norm": 0.8589322565030704,
|
||
|
|
"learning_rate": 8.11427339058299e-06,
|
||
|
|
"loss": 0.0883,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.067558653652668,
|
||
|
|
"step": 3205,
|
||
|
|
"valid_targets_mean": 834.4,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.136,
|
||
|
|
"grad_norm": 0.6812113565653257,
|
||
|
|
"learning_rate": 8.050191455152072e-06,
|
||
|
|
"loss": 0.0685,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.058748096227645874,
|
||
|
|
"step": 3210,
|
||
|
|
"valid_targets_mean": 787.1,
|
||
|
|
"valid_targets_min": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.144,
|
||
|
|
"grad_norm": 0.7113832171090609,
|
||
|
|
"learning_rate": 7.986299745467013e-06,
|
||
|
|
"loss": 0.0798,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07876145839691162,
|
||
|
|
"step": 3215,
|
||
|
|
"valid_targets_mean": 1159.2,
|
||
|
|
"valid_targets_min": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.152,
|
||
|
|
"grad_norm": 0.8160806415654152,
|
||
|
|
"learning_rate": 7.922599278602524e-06,
|
||
|
|
"loss": 0.0671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06620678305625916,
|
||
|
|
"step": 3220,
|
||
|
|
"valid_targets_mean": 763.9,
|
||
|
|
"valid_targets_min": 449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.16,
|
||
|
|
"grad_norm": 0.7484997198691409,
|
||
|
|
"learning_rate": 7.859091068588987e-06,
|
||
|
|
"loss": 0.0707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05849097669124603,
|
||
|
|
"step": 3225,
|
||
|
|
"valid_targets_mean": 705.9,
|
||
|
|
"valid_targets_min": 485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.168,
|
||
|
|
"grad_norm": 0.7589106191581299,
|
||
|
|
"learning_rate": 7.795776126396284e-06,
|
||
|
|
"loss": 0.098,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06466265767812729,
|
||
|
|
"step": 3230,
|
||
|
|
"valid_targets_mean": 914.7,
|
||
|
|
"valid_targets_min": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.176,
|
||
|
|
"grad_norm": 0.9207333350083065,
|
||
|
|
"learning_rate": 7.732655459917726e-06,
|
||
|
|
"loss": 0.0864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07856682687997818,
|
||
|
|
"step": 3235,
|
||
|
|
"valid_targets_mean": 1036.2,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.184,
|
||
|
|
"grad_norm": 0.7936348748602636,
|
||
|
|
"learning_rate": 7.669730073954005e-06,
|
||
|
|
"loss": 0.0711,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06282620131969452,
|
||
|
|
"step": 3240,
|
||
|
|
"valid_targets_mean": 904.2,
|
||
|
|
"valid_targets_min": 553
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.192,
|
||
|
|
"grad_norm": 0.7253442301155842,
|
||
|
|
"learning_rate": 7.607000970197194e-06,
|
||
|
|
"loss": 0.094,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06407792121171951,
|
||
|
|
"step": 3245,
|
||
|
|
"valid_targets_mean": 831.5,
|
||
|
|
"valid_targets_min": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.2,
|
||
|
|
"grad_norm": 1.1209412847091478,
|
||
|
|
"learning_rate": 7.544469147214797e-06,
|
||
|
|
"loss": 0.0871,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13570475578308105,
|
||
|
|
"step": 3250,
|
||
|
|
"valid_targets_mean": 1172.7,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.208,
|
||
|
|
"grad_norm": 1.3916853073197575,
|
||
|
|
"learning_rate": 7.482135600433868e-06,
|
||
|
|
"loss": 0.0912,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09390473365783691,
|
||
|
|
"step": 3255,
|
||
|
|
"valid_targets_mean": 915.3,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.216,
|
||
|
|
"grad_norm": 0.824701167491673,
|
||
|
|
"learning_rate": 7.420001322125156e-06,
|
||
|
|
"loss": 0.0684,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07643882185220718,
|
||
|
|
"step": 3260,
|
||
|
|
"valid_targets_mean": 956.8,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.224,
|
||
|
|
"grad_norm": 0.7266209275554882,
|
||
|
|
"learning_rate": 7.3580673013872946e-06,
|
||
|
|
"loss": 0.0682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.058551251888275146,
|
||
|
|
"step": 3265,
|
||
|
|
"valid_targets_mean": 883.6,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.232,
|
||
|
|
"grad_norm": 0.894891668332706,
|
||
|
|
"learning_rate": 7.2963345241310904e-06,
|
||
|
|
"loss": 0.0803,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06584808230400085,
|
||
|
|
"step": 3270,
|
||
|
|
"valid_targets_mean": 772.3,
|
||
|
|
"valid_targets_min": 491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.24,
|
||
|
|
"grad_norm": 0.7138319729657315,
|
||
|
|
"learning_rate": 7.234803973063797e-06,
|
||
|
|
"loss": 0.0728,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.060329943895339966,
|
||
|
|
"step": 3275,
|
||
|
|
"valid_targets_mean": 762.9,
|
||
|
|
"valid_targets_min": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.248,
|
||
|
|
"grad_norm": 0.9752596494461916,
|
||
|
|
"learning_rate": 7.173476627673492e-06,
|
||
|
|
"loss": 0.0693,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09661838412284851,
|
||
|
|
"step": 3280,
|
||
|
|
"valid_targets_mean": 1086.2,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.256,
|
||
|
|
"grad_norm": 0.9852883612298059,
|
||
|
|
"learning_rate": 7.112353464213477e-06,
|
||
|
|
"loss": 0.0953,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08339089155197144,
|
||
|
|
"step": 3285,
|
||
|
|
"valid_targets_mean": 1042.8,
|
||
|
|
"valid_targets_min": 576
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.264,
|
||
|
|
"grad_norm": 0.7852849622597555,
|
||
|
|
"learning_rate": 7.051435455686735e-06,
|
||
|
|
"loss": 0.0651,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.059544969350099564,
|
||
|
|
"step": 3290,
|
||
|
|
"valid_targets_mean": 856.1,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.272,
|
||
|
|
"grad_norm": 0.8604114481180384,
|
||
|
|
"learning_rate": 6.990723571830438e-06,
|
||
|
|
"loss": 0.0665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0747007355093956,
|
||
|
|
"step": 3295,
|
||
|
|
"valid_targets_mean": 871.5,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.28,
|
||
|
|
"grad_norm": 0.9221542381832124,
|
||
|
|
"learning_rate": 6.93021877910052e-06,
|
||
|
|
"loss": 0.0965,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1211557537317276,
|
||
|
|
"step": 3300,
|
||
|
|
"valid_targets_mean": 1022.2,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.288,
|
||
|
|
"grad_norm": 0.7799908275852595,
|
||
|
|
"learning_rate": 6.8699220406562985e-06,
|
||
|
|
"loss": 0.0745,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06038517504930496,
|
||
|
|
"step": 3305,
|
||
|
|
"valid_targets_mean": 782.4,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.296,
|
||
|
|
"grad_norm": 0.7720999089439553,
|
||
|
|
"learning_rate": 6.809834316345117e-06,
|
||
|
|
"loss": 0.0666,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06718848645687103,
|
||
|
|
"step": 3310,
|
||
|
|
"valid_targets_mean": 683.2,
|
||
|
|
"valid_targets_min": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.304,
|
||
|
|
"grad_norm": 1.003311124904277,
|
||
|
|
"learning_rate": 6.749956562687083e-06,
|
||
|
|
"loss": 0.0664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06412294507026672,
|
||
|
|
"step": 3315,
|
||
|
|
"valid_targets_mean": 765.4,
|
||
|
|
"valid_targets_min": 441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.312,
|
||
|
|
"grad_norm": 0.9176716702823916,
|
||
|
|
"learning_rate": 6.690289732859841e-06,
|
||
|
|
"loss": 0.0756,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06900150328874588,
|
||
|
|
"step": 3320,
|
||
|
|
"valid_targets_mean": 726.8,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.32,
|
||
|
|
"grad_norm": 0.8801976297445904,
|
||
|
|
"learning_rate": 6.630834776683403e-06,
|
||
|
|
"loss": 0.0743,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08224289119243622,
|
||
|
|
"step": 3325,
|
||
|
|
"valid_targets_mean": 870.4,
|
||
|
|
"valid_targets_min": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.328,
|
||
|
|
"grad_norm": 0.750674011177151,
|
||
|
|
"learning_rate": 6.571592640605e-06,
|
||
|
|
"loss": 0.0615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.056616879999637604,
|
||
|
|
"step": 3330,
|
||
|
|
"valid_targets_mean": 793.8,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.336,
|
||
|
|
"grad_norm": 0.8610292635057717,
|
||
|
|
"learning_rate": 6.512564267684061e-06,
|
||
|
|
"loss": 0.0929,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0650147795677185,
|
||
|
|
"step": 3335,
|
||
|
|
"valid_targets_mean": 1461.8,
|
||
|
|
"valid_targets_min": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.344,
|
||
|
|
"grad_norm": 1.0077653627076866,
|
||
|
|
"learning_rate": 6.453750597577167e-06,
|
||
|
|
"loss": 0.0665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08667433261871338,
|
||
|
|
"step": 3340,
|
||
|
|
"valid_targets_mean": 968.2,
|
||
|
|
"valid_targets_min": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.352,
|
||
|
|
"grad_norm": 0.8467418168834477,
|
||
|
|
"learning_rate": 6.395152566523106e-06,
|
||
|
|
"loss": 0.0753,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09939449280500412,
|
||
|
|
"step": 3345,
|
||
|
|
"valid_targets_mean": 1185.5,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.36,
|
||
|
|
"grad_norm": 1.1761939041437515,
|
||
|
|
"learning_rate": 6.336771107327966e-06,
|
||
|
|
"loss": 0.0899,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12340512871742249,
|
||
|
|
"step": 3350,
|
||
|
|
"valid_targets_mean": 917.4,
|
||
|
|
"valid_targets_min": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.368,
|
||
|
|
"grad_norm": 1.137723815719334,
|
||
|
|
"learning_rate": 6.278607149350289e-06,
|
||
|
|
"loss": 0.072,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07936599850654602,
|
||
|
|
"step": 3355,
|
||
|
|
"valid_targets_mean": 831.8,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.376,
|
||
|
|
"grad_norm": 0.7671106717546039,
|
||
|
|
"learning_rate": 6.220661618486268e-06,
|
||
|
|
"loss": 0.0801,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06496668606996536,
|
||
|
|
"step": 3360,
|
||
|
|
"valid_targets_mean": 763.8,
|
||
|
|
"valid_targets_min": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.384,
|
||
|
|
"grad_norm": 0.7407932030120938,
|
||
|
|
"learning_rate": 6.162935437155024e-06,
|
||
|
|
"loss": 0.0922,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.069102942943573,
|
||
|
|
"step": 3365,
|
||
|
|
"valid_targets_mean": 1027.3,
|
||
|
|
"valid_targets_min": 565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.392,
|
||
|
|
"grad_norm": 0.9869221609265355,
|
||
|
|
"learning_rate": 6.105429524283901e-06,
|
||
|
|
"loss": 0.0993,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1857997626066208,
|
||
|
|
"step": 3370,
|
||
|
|
"valid_targets_mean": 1458.3,
|
||
|
|
"valid_targets_min": 402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4,
|
||
|
|
"grad_norm": 0.8059030956251092,
|
||
|
|
"learning_rate": 6.04814479529386e-06,
|
||
|
|
"loss": 0.0827,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09002557396888733,
|
||
|
|
"step": 3375,
|
||
|
|
"valid_targets_mean": 1050.7,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.408,
|
||
|
|
"grad_norm": 0.955351532425662,
|
||
|
|
"learning_rate": 5.991082162084889e-06,
|
||
|
|
"loss": 0.0731,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06369668245315552,
|
||
|
|
"step": 3380,
|
||
|
|
"valid_targets_mean": 1038.1,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.416,
|
||
|
|
"grad_norm": 0.834418232150413,
|
||
|
|
"learning_rate": 5.934242533021499e-06,
|
||
|
|
"loss": 0.1086,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05775479972362518,
|
||
|
|
"step": 3385,
|
||
|
|
"valid_targets_mean": 731.1,
|
||
|
|
"valid_targets_min": 454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.424,
|
||
|
|
"grad_norm": 0.987964590502229,
|
||
|
|
"learning_rate": 5.877626812918258e-06,
|
||
|
|
"loss": 0.0766,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13848869502544403,
|
||
|
|
"step": 3390,
|
||
|
|
"valid_targets_mean": 1440.2,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.432,
|
||
|
|
"grad_norm": 0.8334021563707787,
|
||
|
|
"learning_rate": 5.821235903025378e-06,
|
||
|
|
"loss": 0.0978,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0656728744506836,
|
||
|
|
"step": 3395,
|
||
|
|
"valid_targets_mean": 819.8,
|
||
|
|
"valid_targets_min": 544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.44,
|
||
|
|
"grad_norm": 0.7199157847990661,
|
||
|
|
"learning_rate": 5.765070701014391e-06,
|
||
|
|
"loss": 0.0704,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06393887847661972,
|
||
|
|
"step": 3400,
|
||
|
|
"valid_targets_mean": 926.3,
|
||
|
|
"valid_targets_min": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.448,
|
||
|
|
"grad_norm": 0.9044046663502227,
|
||
|
|
"learning_rate": 5.709132100963841e-06,
|
||
|
|
"loss": 0.0975,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08857779204845428,
|
||
|
|
"step": 3405,
|
||
|
|
"valid_targets_mean": 1049.2,
|
||
|
|
"valid_targets_min": 547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.456,
|
||
|
|
"grad_norm": 0.8155293751834836,
|
||
|
|
"learning_rate": 5.653420993345062e-06,
|
||
|
|
"loss": 0.0635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07042507827281952,
|
||
|
|
"step": 3410,
|
||
|
|
"valid_targets_mean": 721.9,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.464,
|
||
|
|
"grad_norm": 0.7081990917119335,
|
||
|
|
"learning_rate": 5.597938265007994e-06,
|
||
|
|
"loss": 0.0628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.057499922811985016,
|
||
|
|
"step": 3415,
|
||
|
|
"valid_targets_mean": 916.9,
|
||
|
|
"valid_targets_min": 602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4719999999999995,
|
||
|
|
"grad_norm": 0.9661217691084136,
|
||
|
|
"learning_rate": 5.542684799167069e-06,
|
||
|
|
"loss": 0.0796,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08886539936065674,
|
||
|
|
"step": 3420,
|
||
|
|
"valid_targets_mean": 1082.2,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.48,
|
||
|
|
"grad_norm": 0.8055935580691043,
|
||
|
|
"learning_rate": 5.487661475387152e-06,
|
||
|
|
"loss": 0.0837,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06585259735584259,
|
||
|
|
"step": 3425,
|
||
|
|
"valid_targets_mean": 797.6,
|
||
|
|
"valid_targets_min": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4879999999999995,
|
||
|
|
"grad_norm": 0.8384437110022962,
|
||
|
|
"learning_rate": 5.432869169569541e-06,
|
||
|
|
"loss": 0.0667,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0917351022362709,
|
||
|
|
"step": 3430,
|
||
|
|
"valid_targets_mean": 1237.9,
|
||
|
|
"valid_targets_min": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.496,
|
||
|
|
"grad_norm": 0.7055350608763379,
|
||
|
|
"learning_rate": 5.378308753938024e-06,
|
||
|
|
"loss": 0.0672,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0545569509267807,
|
||
|
|
"step": 3435,
|
||
|
|
"valid_targets_mean": 821.5,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.504,
|
||
|
|
"grad_norm": 0.7938678596509943,
|
||
|
|
"learning_rate": 5.323981097024986e-06,
|
||
|
|
"loss": 0.0619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05954422429203987,
|
||
|
|
"step": 3440,
|
||
|
|
"valid_targets_mean": 926.8,
|
||
|
|
"valid_targets_min": 541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5120000000000005,
|
||
|
|
"grad_norm": 0.710376263121058,
|
||
|
|
"learning_rate": 5.269887063657595e-06,
|
||
|
|
"loss": 0.078,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0633171796798706,
|
||
|
|
"step": 3445,
|
||
|
|
"valid_targets_mean": 887.9,
|
||
|
|
"valid_targets_min": 552
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.52,
|
||
|
|
"grad_norm": 0.9352971664606785,
|
||
|
|
"learning_rate": 5.216027514944027e-06,
|
||
|
|
"loss": 0.0789,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06568583101034164,
|
||
|
|
"step": 3450,
|
||
|
|
"valid_targets_mean": 782.4,
|
||
|
|
"valid_targets_min": 565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5280000000000005,
|
||
|
|
"grad_norm": 1.0299067570372629,
|
||
|
|
"learning_rate": 5.162403308259767e-06,
|
||
|
|
"loss": 0.078,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06486299633979797,
|
||
|
|
"step": 3455,
|
||
|
|
"valid_targets_mean": 873.9,
|
||
|
|
"valid_targets_min": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.536,
|
||
|
|
"grad_norm": 0.9662261943295932,
|
||
|
|
"learning_rate": 5.109015297233935e-06,
|
||
|
|
"loss": 0.0773,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08379887789487839,
|
||
|
|
"step": 3460,
|
||
|
|
"valid_targets_mean": 1013.6,
|
||
|
|
"valid_targets_min": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5440000000000005,
|
||
|
|
"grad_norm": 0.7998534820934543,
|
||
|
|
"learning_rate": 5.055864331735736e-06,
|
||
|
|
"loss": 0.0624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06942769885063171,
|
||
|
|
"step": 3465,
|
||
|
|
"valid_targets_mean": 784.6,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.552,
|
||
|
|
"grad_norm": 0.9072820507949081,
|
||
|
|
"learning_rate": 5.002951257860909e-06,
|
||
|
|
"loss": 0.098,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09650293737649918,
|
||
|
|
"step": 3470,
|
||
|
|
"valid_targets_mean": 872.7,
|
||
|
|
"valid_targets_min": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5600000000000005,
|
||
|
|
"grad_norm": 0.9556215264384249,
|
||
|
|
"learning_rate": 4.950276917918256e-06,
|
||
|
|
"loss": 0.0885,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08272415399551392,
|
||
|
|
"step": 3475,
|
||
|
|
"valid_targets_mean": 919.9,
|
||
|
|
"valid_targets_min": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.568,
|
||
|
|
"grad_norm": 0.7873277603382469,
|
||
|
|
"learning_rate": 4.8978421504162385e-06,
|
||
|
|
"loss": 0.0726,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06439163535833359,
|
||
|
|
"step": 3480,
|
||
|
|
"valid_targets_mean": 821.4,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.576,
|
||
|
|
"grad_norm": 0.9309358376083752,
|
||
|
|
"learning_rate": 4.845647790049634e-06,
|
||
|
|
"loss": 0.0777,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0956721156835556,
|
||
|
|
"step": 3485,
|
||
|
|
"valid_targets_mean": 1139.1,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.584,
|
||
|
|
"grad_norm": 1.32752398877127,
|
||
|
|
"learning_rate": 4.793694667686244e-06,
|
||
|
|
"loss": 0.0801,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11851969361305237,
|
||
|
|
"step": 3490,
|
||
|
|
"valid_targets_mean": 969.3,
|
||
|
|
"valid_targets_min": 531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.592,
|
||
|
|
"grad_norm": 0.7663546673030264,
|
||
|
|
"learning_rate": 4.741983610353664e-06,
|
||
|
|
"loss": 0.0832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06600794941186905,
|
||
|
|
"step": 3495,
|
||
|
|
"valid_targets_mean": 809.7,
|
||
|
|
"valid_targets_min": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.6,
|
||
|
|
"grad_norm": 0.7934732627314547,
|
||
|
|
"learning_rate": 4.690515441226122e-06,
|
||
|
|
"loss": 0.07,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06453859806060791,
|
||
|
|
"step": 3500,
|
||
|
|
"valid_targets_mean": 764.8,
|
||
|
|
"valid_targets_min": 471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.608,
|
||
|
|
"grad_norm": 1.0745817460030198,
|
||
|
|
"learning_rate": 4.639290979611379e-06,
|
||
|
|
"loss": 0.074,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09399828314781189,
|
||
|
|
"step": 3505,
|
||
|
|
"valid_targets_mean": 1305.3,
|
||
|
|
"valid_targets_min": 559
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.616,
|
||
|
|
"grad_norm": 0.8247645705611755,
|
||
|
|
"learning_rate": 4.588311040937683e-06,
|
||
|
|
"loss": 0.072,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06673099100589752,
|
||
|
|
"step": 3510,
|
||
|
|
"valid_targets_mean": 943.1,
|
||
|
|
"valid_targets_min": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.624,
|
||
|
|
"grad_norm": 0.9109017424927973,
|
||
|
|
"learning_rate": 4.537576436740783e-06,
|
||
|
|
"loss": 0.0815,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09812849014997482,
|
||
|
|
"step": 3515,
|
||
|
|
"valid_targets_mean": 932.1,
|
||
|
|
"valid_targets_min": 558
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.632,
|
||
|
|
"grad_norm": 1.0092470799951956,
|
||
|
|
"learning_rate": 4.487087974651016e-06,
|
||
|
|
"loss": 0.0761,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08479701727628708,
|
||
|
|
"step": 3520,
|
||
|
|
"valid_targets_mean": 1049.4,
|
||
|
|
"valid_targets_min": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.64,
|
||
|
|
"grad_norm": 1.1229829135516711,
|
||
|
|
"learning_rate": 4.436846458380455e-06,
|
||
|
|
"loss": 0.074,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09786868095397949,
|
||
|
|
"step": 3525,
|
||
|
|
"valid_targets_mean": 964.7,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.648,
|
||
|
|
"grad_norm": 0.6833678630320003,
|
||
|
|
"learning_rate": 4.386852687710104e-06,
|
||
|
|
"loss": 0.062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05484826862812042,
|
||
|
|
"step": 3530,
|
||
|
|
"valid_targets_mean": 882.2,
|
||
|
|
"valid_targets_min": 538
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.656,
|
||
|
|
"grad_norm": 0.752457884182261,
|
||
|
|
"learning_rate": 4.337107458477177e-06,
|
||
|
|
"loss": 0.0777,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06667198240756989,
|
||
|
|
"step": 3535,
|
||
|
|
"valid_targets_mean": 829.9,
|
||
|
|
"valid_targets_min": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.664,
|
||
|
|
"grad_norm": 0.8183447240718775,
|
||
|
|
"learning_rate": 4.287611562562422e-06,
|
||
|
|
"loss": 0.0894,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06430254876613617,
|
||
|
|
"step": 3540,
|
||
|
|
"valid_targets_mean": 768.8,
|
||
|
|
"valid_targets_min": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.672,
|
||
|
|
"grad_norm": 0.7778594591440714,
|
||
|
|
"learning_rate": 4.238365787877516e-06,
|
||
|
|
"loss": 0.0879,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06610207259654999,
|
||
|
|
"step": 3545,
|
||
|
|
"valid_targets_mean": 774.2,
|
||
|
|
"valid_targets_min": 416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.68,
|
||
|
|
"grad_norm": 0.7408596941120574,
|
||
|
|
"learning_rate": 4.189370918352531e-06,
|
||
|
|
"loss": 0.0655,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06291748583316803,
|
||
|
|
"step": 3550,
|
||
|
|
"valid_targets_mean": 825.8,
|
||
|
|
"valid_targets_min": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.688,
|
||
|
|
"grad_norm": 0.9669008112490017,
|
||
|
|
"learning_rate": 4.140627733923439e-06,
|
||
|
|
"loss": 0.0792,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1275126039981842,
|
||
|
|
"step": 3555,
|
||
|
|
"valid_targets_mean": 1193.2,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.696,
|
||
|
|
"grad_norm": 0.8347158923249489,
|
||
|
|
"learning_rate": 4.092137010519712e-06,
|
||
|
|
"loss": 0.081,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08259904384613037,
|
||
|
|
"step": 3560,
|
||
|
|
"valid_targets_mean": 1022.1,
|
||
|
|
"valid_targets_min": 468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.704,
|
||
|
|
"grad_norm": 0.7263524739984435,
|
||
|
|
"learning_rate": 4.043899520051964e-06,
|
||
|
|
"loss": 0.0656,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.062250278890132904,
|
||
|
|
"step": 3565,
|
||
|
|
"valid_targets_mean": 819.1,
|
||
|
|
"valid_targets_min": 507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.712,
|
||
|
|
"grad_norm": 0.8065410169235507,
|
||
|
|
"learning_rate": 3.995916030399658e-06,
|
||
|
|
"loss": 0.0773,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06370916962623596,
|
||
|
|
"step": 3570,
|
||
|
|
"valid_targets_mean": 722.7,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.72,
|
||
|
|
"grad_norm": 0.907021075972577,
|
||
|
|
"learning_rate": 3.948187305398892e-06,
|
||
|
|
"loss": 0.0853,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07696904242038727,
|
||
|
|
"step": 3575,
|
||
|
|
"valid_targets_mean": 868.0,
|
||
|
|
"valid_targets_min": 556
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.728,
|
||
|
|
"grad_norm": 0.6402111741227499,
|
||
|
|
"learning_rate": 3.90071410483023e-06,
|
||
|
|
"loss": 0.0692,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05004892498254776,
|
||
|
|
"step": 3580,
|
||
|
|
"valid_targets_mean": 926.3,
|
||
|
|
"valid_targets_min": 581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.736,
|
||
|
|
"grad_norm": 1.113414574317508,
|
||
|
|
"learning_rate": 3.853497184406623e-06,
|
||
|
|
"loss": 0.0928,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07702088356018066,
|
||
|
|
"step": 3585,
|
||
|
|
"valid_targets_mean": 949.3,
|
||
|
|
"valid_targets_min": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.744,
|
||
|
|
"grad_norm": 0.6131949560769171,
|
||
|
|
"learning_rate": 3.80653729576135e-06,
|
||
|
|
"loss": 0.0656,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07584548741579056,
|
||
|
|
"step": 3590,
|
||
|
|
"valid_targets_mean": 1754.1,
|
||
|
|
"valid_targets_min": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.752,
|
||
|
|
"grad_norm": 15.857350207682247,
|
||
|
|
"learning_rate": 3.7598351864360872e-06,
|
||
|
|
"loss": 0.087,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05998915061354637,
|
||
|
|
"step": 3595,
|
||
|
|
"valid_targets_mean": 756.8,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.76,
|
||
|
|
"grad_norm": 0.9328208235959797,
|
||
|
|
"learning_rate": 3.713391599868985e-06,
|
||
|
|
"loss": 0.0693,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06560102850198746,
|
||
|
|
"step": 3600,
|
||
|
|
"valid_targets_mean": 1117.0,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.768,
|
||
|
|
"grad_norm": 0.7083644653133527,
|
||
|
|
"learning_rate": 3.6672072753828424e-06,
|
||
|
|
"loss": 0.0627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05883736163377762,
|
||
|
|
"step": 3605,
|
||
|
|
"valid_targets_mean": 871.1,
|
||
|
|
"valid_targets_min": 411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.776,
|
||
|
|
"grad_norm": 0.7376928312531449,
|
||
|
|
"learning_rate": 3.6212829481733368e-06,
|
||
|
|
"loss": 0.1016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05465541034936905,
|
||
|
|
"step": 3610,
|
||
|
|
"valid_targets_mean": 764.8,
|
||
|
|
"valid_targets_min": 454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.784,
|
||
|
|
"grad_norm": 0.7297951264279697,
|
||
|
|
"learning_rate": 3.575619349297317e-06,
|
||
|
|
"loss": 0.0709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05742385983467102,
|
||
|
|
"step": 3615,
|
||
|
|
"valid_targets_mean": 872.8,
|
||
|
|
"valid_targets_min": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.792,
|
||
|
|
"grad_norm": 1.1192953705793156,
|
||
|
|
"learning_rate": 3.5302172056611682e-06,
|
||
|
|
"loss": 0.0811,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07923439145088196,
|
||
|
|
"step": 3620,
|
||
|
|
"valid_targets_mean": 951.4,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.8,
|
||
|
|
"grad_norm": 1.3256193080215943,
|
||
|
|
"learning_rate": 3.485077240009247e-06,
|
||
|
|
"loss": 0.0711,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09199804812669754,
|
||
|
|
"step": 3625,
|
||
|
|
"valid_targets_mean": 917.0,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.808,
|
||
|
|
"grad_norm": 0.8608702612415465,
|
||
|
|
"learning_rate": 3.4402001709123643e-06,
|
||
|
|
"loss": 0.0969,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07680759578943253,
|
||
|
|
"step": 3630,
|
||
|
|
"valid_targets_mean": 1030.2,
|
||
|
|
"valid_targets_min": 533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.816,
|
||
|
|
"grad_norm": 0.8272921235726557,
|
||
|
|
"learning_rate": 3.3955867127563515e-06,
|
||
|
|
"loss": 0.0696,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07520703971385956,
|
||
|
|
"step": 3635,
|
||
|
|
"valid_targets_mean": 956.5,
|
||
|
|
"valid_targets_min": 583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.824,
|
||
|
|
"grad_norm": 0.9595544410014674,
|
||
|
|
"learning_rate": 3.351237575730695e-06,
|
||
|
|
"loss": 0.0761,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.057514142245054245,
|
||
|
|
"step": 3640,
|
||
|
|
"valid_targets_mean": 808.6,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.832,
|
||
|
|
"grad_norm": 1.0200301521244446,
|
||
|
|
"learning_rate": 3.307153465817219e-06,
|
||
|
|
"loss": 0.0652,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0669136494398117,
|
||
|
|
"step": 3645,
|
||
|
|
"valid_targets_mean": 921.6,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.84,
|
||
|
|
"grad_norm": 0.9250628512871966,
|
||
|
|
"learning_rate": 3.263335084778856e-06,
|
||
|
|
"loss": 0.0633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06535211950540543,
|
||
|
|
"step": 3650,
|
||
|
|
"valid_targets_mean": 899.9,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.848,
|
||
|
|
"grad_norm": 0.7333030773657969,
|
||
|
|
"learning_rate": 3.2197831301484816e-06,
|
||
|
|
"loss": 0.0769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.058711059391498566,
|
||
|
|
"step": 3655,
|
||
|
|
"valid_targets_mean": 856.2,
|
||
|
|
"valid_targets_min": 449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.856,
|
||
|
|
"grad_norm": 0.9157327185000818,
|
||
|
|
"learning_rate": 3.1764982952177805e-06,
|
||
|
|
"loss": 0.071,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06972061097621918,
|
||
|
|
"step": 3660,
|
||
|
|
"valid_targets_mean": 853.6,
|
||
|
|
"valid_targets_min": 492
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.864,
|
||
|
|
"grad_norm": 0.9031285032639718,
|
||
|
|
"learning_rate": 3.1334812690262507e-06,
|
||
|
|
"loss": 0.0713,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07708781957626343,
|
||
|
|
"step": 3665,
|
||
|
|
"valid_targets_mean": 831.5,
|
||
|
|
"valid_targets_min": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.872,
|
||
|
|
"grad_norm": 0.7166417969880111,
|
||
|
|
"learning_rate": 3.0907327363502084e-06,
|
||
|
|
"loss": 0.0687,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06776954233646393,
|
||
|
|
"step": 3670,
|
||
|
|
"valid_targets_mean": 950.5,
|
||
|
|
"valid_targets_min": 554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.88,
|
||
|
|
"grad_norm": 0.8507080895949477,
|
||
|
|
"learning_rate": 3.0482533776918987e-06,
|
||
|
|
"loss": 0.0683,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06763128936290741,
|
||
|
|
"step": 3675,
|
||
|
|
"valid_targets_mean": 866.5,
|
||
|
|
"valid_targets_min": 556
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.888,
|
||
|
|
"grad_norm": 0.6964626437196108,
|
||
|
|
"learning_rate": 3.0060438692686533e-06,
|
||
|
|
"loss": 0.0664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05540402978658676,
|
||
|
|
"step": 3680,
|
||
|
|
"valid_targets_mean": 845.1,
|
||
|
|
"valid_targets_min": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.896,
|
||
|
|
"grad_norm": 0.911090063477602,
|
||
|
|
"learning_rate": 2.964104883002139e-06,
|
||
|
|
"loss": 0.0664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07893071323633194,
|
||
|
|
"step": 3685,
|
||
|
|
"valid_targets_mean": 862.6,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.904,
|
||
|
|
"grad_norm": 0.7225504639275782,
|
||
|
|
"learning_rate": 2.9224370865076457e-06,
|
||
|
|
"loss": 0.1052,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05786134675145149,
|
||
|
|
"step": 3690,
|
||
|
|
"valid_targets_mean": 898.9,
|
||
|
|
"valid_targets_min": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.912,
|
||
|
|
"grad_norm": 0.8157459567342921,
|
||
|
|
"learning_rate": 2.8810411430834716e-06,
|
||
|
|
"loss": 0.09,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10838191211223602,
|
||
|
|
"step": 3695,
|
||
|
|
"valid_targets_mean": 1518.1,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.92,
|
||
|
|
"grad_norm": 2.0440615035308776,
|
||
|
|
"learning_rate": 2.8399177117003595e-06,
|
||
|
|
"loss": 0.0741,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10617715120315552,
|
||
|
|
"step": 3700,
|
||
|
|
"valid_targets_mean": 907.9,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.928,
|
||
|
|
"grad_norm": 0.7016965628788885,
|
||
|
|
"learning_rate": 2.7990674469910085e-06,
|
||
|
|
"loss": 0.0627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06436631828546524,
|
||
|
|
"step": 3705,
|
||
|
|
"valid_targets_mean": 888.1,
|
||
|
|
"valid_targets_min": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.936,
|
||
|
|
"grad_norm": 0.8356053222880082,
|
||
|
|
"learning_rate": 2.7584909992396515e-06,
|
||
|
|
"loss": 0.0683,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06576769798994064,
|
||
|
|
"step": 3710,
|
||
|
|
"valid_targets_mean": 802.6,
|
||
|
|
"valid_targets_min": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.944,
|
||
|
|
"grad_norm": 0.8700320179379732,
|
||
|
|
"learning_rate": 2.7181890143716995e-06,
|
||
|
|
"loss": 0.0831,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1040990948677063,
|
||
|
|
"step": 3715,
|
||
|
|
"valid_targets_mean": 1067.1,
|
||
|
|
"valid_targets_min": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.952,
|
||
|
|
"grad_norm": 0.6909915063769616,
|
||
|
|
"learning_rate": 2.6781621339434717e-06,
|
||
|
|
"loss": 0.0763,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0548010990023613,
|
||
|
|
"step": 3720,
|
||
|
|
"valid_targets_mean": 853.2,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.96,
|
||
|
|
"grad_norm": 0.8709969244384487,
|
||
|
|
"learning_rate": 2.638410995131966e-06,
|
||
|
|
"loss": 0.0887,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06444862484931946,
|
||
|
|
"step": 3725,
|
||
|
|
"valid_targets_mean": 900.3,
|
||
|
|
"valid_targets_min": 468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.968,
|
||
|
|
"grad_norm": 0.7819427220253011,
|
||
|
|
"learning_rate": 2.5989362307247313e-06,
|
||
|
|
"loss": 0.0632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.059864625334739685,
|
||
|
|
"step": 3730,
|
||
|
|
"valid_targets_mean": 990.4,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.976,
|
||
|
|
"grad_norm": 0.8859049708419192,
|
||
|
|
"learning_rate": 2.5597384691097847e-06,
|
||
|
|
"loss": 0.0778,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07407764345407486,
|
||
|
|
"step": 3735,
|
||
|
|
"valid_targets_mean": 701.7,
|
||
|
|
"valid_targets_min": 466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.984,
|
||
|
|
"grad_norm": 0.7938025501995064,
|
||
|
|
"learning_rate": 2.520818334265611e-06,
|
||
|
|
"loss": 0.0707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06618218123912811,
|
||
|
|
"step": 3740,
|
||
|
|
"valid_targets_mean": 992.5,
|
||
|
|
"valid_targets_min": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.992,
|
||
|
|
"grad_norm": 0.8889342642276553,
|
||
|
|
"learning_rate": 2.482176445751232e-06,
|
||
|
|
"loss": 0.0995,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18800382316112518,
|
||
|
|
"step": 3745,
|
||
|
|
"valid_targets_mean": 1662.3,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0,
|
||
|
|
"grad_norm": 0.7423988202026727,
|
||
|
|
"learning_rate": 2.4438134186963415e-06,
|
||
|
|
"loss": 0.0683,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07153650373220444,
|
||
|
|
"step": 3750,
|
||
|
|
"valid_targets_mean": 1792.0,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.008,
|
||
|
|
"grad_norm": 0.6846669028729305,
|
||
|
|
"learning_rate": 2.4057298637915105e-06,
|
||
|
|
"loss": 0.0745,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06927240639925003,
|
||
|
|
"step": 3755,
|
||
|
|
"valid_targets_mean": 938.9,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.016,
|
||
|
|
"grad_norm": 0.8174122246512885,
|
||
|
|
"learning_rate": 2.3679263872784717e-06,
|
||
|
|
"loss": 0.0715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.094500333070755,
|
||
|
|
"step": 3760,
|
||
|
|
"valid_targets_mean": 937.7,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.024,
|
||
|
|
"grad_norm": 0.731296665772432,
|
||
|
|
"learning_rate": 2.330403590940471e-06,
|
||
|
|
"loss": 0.0603,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.057936131954193115,
|
||
|
|
"step": 3765,
|
||
|
|
"valid_targets_mean": 927.5,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.032,
|
||
|
|
"grad_norm": 0.7527270039190072,
|
||
|
|
"learning_rate": 2.2931620720926717e-06,
|
||
|
|
"loss": 0.08,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05649135261774063,
|
||
|
|
"step": 3770,
|
||
|
|
"valid_targets_mean": 735.4,
|
||
|
|
"valid_targets_min": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.04,
|
||
|
|
"grad_norm": 0.6607880693972132,
|
||
|
|
"learning_rate": 2.256202423572669e-06,
|
||
|
|
"loss": 0.0833,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.052145324647426605,
|
||
|
|
"step": 3775,
|
||
|
|
"valid_targets_mean": 830.6,
|
||
|
|
"valid_targets_min": 571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.048,
|
||
|
|
"grad_norm": 0.7674629362054604,
|
||
|
|
"learning_rate": 2.219525233731035e-06,
|
||
|
|
"loss": 0.0829,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06597025692462921,
|
||
|
|
"step": 3780,
|
||
|
|
"valid_targets_mean": 860.8,
|
||
|
|
"valid_targets_min": 423
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.056,
|
||
|
|
"grad_norm": 0.8067654335852509,
|
||
|
|
"learning_rate": 2.183131086421961e-06,
|
||
|
|
"loss": 0.0612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06674222648143768,
|
||
|
|
"step": 3785,
|
||
|
|
"valid_targets_mean": 833.1,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.064,
|
||
|
|
"grad_norm": 0.798692828688171,
|
||
|
|
"learning_rate": 2.1470205609939533e-06,
|
||
|
|
"loss": 0.0585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0634201243519783,
|
||
|
|
"step": 3790,
|
||
|
|
"valid_targets_mean": 828.0,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.072,
|
||
|
|
"grad_norm": 0.9309599530401287,
|
||
|
|
"learning_rate": 2.1111942322806335e-06,
|
||
|
|
"loss": 0.0636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11342776566743851,
|
||
|
|
"step": 3795,
|
||
|
|
"valid_targets_mean": 1038.8,
|
||
|
|
"valid_targets_min": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.08,
|
||
|
|
"grad_norm": 0.6895626317148708,
|
||
|
|
"learning_rate": 2.0756526705915635e-06,
|
||
|
|
"loss": 0.0586,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05497216805815697,
|
||
|
|
"step": 3800,
|
||
|
|
"valid_targets_mean": 826.1,
|
||
|
|
"valid_targets_min": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.088,
|
||
|
|
"grad_norm": 0.8885058561936798,
|
||
|
|
"learning_rate": 2.0403964417031764e-06,
|
||
|
|
"loss": 0.0745,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10158699750900269,
|
||
|
|
"step": 3805,
|
||
|
|
"valid_targets_mean": 920.6,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.096,
|
||
|
|
"grad_norm": 0.6850905802220816,
|
||
|
|
"learning_rate": 2.0054261068497773e-06,
|
||
|
|
"loss": 0.0589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05741908773779869,
|
||
|
|
"step": 3810,
|
||
|
|
"valid_targets_mean": 806.9,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.104,
|
||
|
|
"grad_norm": 0.8697899534749736,
|
||
|
|
"learning_rate": 1.9707422227145922e-06,
|
||
|
|
"loss": 0.066,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06361544132232666,
|
||
|
|
"step": 3815,
|
||
|
|
"valid_targets_mean": 1002.5,
|
||
|
|
"valid_targets_min": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.112,
|
||
|
|
"grad_norm": 0.6731926316527731,
|
||
|
|
"learning_rate": 1.936345341420924e-06,
|
||
|
|
"loss": 0.0644,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05815357714891434,
|
||
|
|
"step": 3820,
|
||
|
|
"valid_targets_mean": 903.8,
|
||
|
|
"valid_targets_min": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.12,
|
||
|
|
"grad_norm": 0.6944702540923555,
|
||
|
|
"learning_rate": 1.9022360105233507e-06,
|
||
|
|
"loss": 0.0553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.052839022129774094,
|
||
|
|
"step": 3825,
|
||
|
|
"valid_targets_mean": 839.6,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.128,
|
||
|
|
"grad_norm": 0.6697596876704403,
|
||
|
|
"learning_rate": 1.8684147729990188e-06,
|
||
|
|
"loss": 0.0628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06421222537755966,
|
||
|
|
"step": 3830,
|
||
|
|
"valid_targets_mean": 1138.3,
|
||
|
|
"valid_targets_min": 578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.136,
|
||
|
|
"grad_norm": 0.70930236617704,
|
||
|
|
"learning_rate": 1.8348821672389893e-06,
|
||
|
|
"loss": 0.0744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06564471125602722,
|
||
|
|
"step": 3835,
|
||
|
|
"valid_targets_mean": 1003.9,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.144,
|
||
|
|
"grad_norm": 0.7133363267380656,
|
||
|
|
"learning_rate": 1.8016387270396784e-06,
|
||
|
|
"loss": 0.0549,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05746018514037132,
|
||
|
|
"step": 3840,
|
||
|
|
"valid_targets_mean": 836.8,
|
||
|
|
"valid_targets_min": 533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.152,
|
||
|
|
"grad_norm": 1.393721527480361,
|
||
|
|
"learning_rate": 1.7686849815943486e-06,
|
||
|
|
"loss": 0.0612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.061105988919734955,
|
||
|
|
"step": 3845,
|
||
|
|
"valid_targets_mean": 855.7,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.16,
|
||
|
|
"grad_norm": 0.8997638717351509,
|
||
|
|
"learning_rate": 1.7360214554847e-06,
|
||
|
|
"loss": 0.0801,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06404763460159302,
|
||
|
|
"step": 3850,
|
||
|
|
"valid_targets_mean": 742.3,
|
||
|
|
"valid_targets_min": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.168,
|
||
|
|
"grad_norm": 0.7478734761337703,
|
||
|
|
"learning_rate": 1.703648668672495e-06,
|
||
|
|
"loss": 0.055,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05328584462404251,
|
||
|
|
"step": 3855,
|
||
|
|
"valid_targets_mean": 827.1,
|
||
|
|
"valid_targets_min": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.176,
|
||
|
|
"grad_norm": 0.5629299888001849,
|
||
|
|
"learning_rate": 1.6715671364913077e-06,
|
||
|
|
"loss": 0.0599,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04512747377157211,
|
||
|
|
"step": 3860,
|
||
|
|
"valid_targets_mean": 956.1,
|
||
|
|
"valid_targets_min": 582
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.184,
|
||
|
|
"grad_norm": 0.7162074927852252,
|
||
|
|
"learning_rate": 1.6397773696383091e-06,
|
||
|
|
"loss": 0.0677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.055690545588731766,
|
||
|
|
"step": 3865,
|
||
|
|
"valid_targets_mean": 848.6,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.192,
|
||
|
|
"grad_norm": 0.8245228567205685,
|
||
|
|
"learning_rate": 1.6082798741661321e-06,
|
||
|
|
"loss": 0.0653,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05366797372698784,
|
||
|
|
"step": 3870,
|
||
|
|
"valid_targets_mean": 988.2,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2,
|
||
|
|
"grad_norm": 0.7698977466565333,
|
||
|
|
"learning_rate": 1.5770751514748273e-06,
|
||
|
|
"loss": 0.0645,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06601998955011368,
|
||
|
|
"step": 3875,
|
||
|
|
"valid_targets_mean": 886.9,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.208,
|
||
|
|
"grad_norm": 0.861707757423707,
|
||
|
|
"learning_rate": 1.5461636983038686e-06,
|
||
|
|
"loss": 0.0571,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06016688793897629,
|
||
|
|
"step": 3880,
|
||
|
|
"valid_targets_mean": 869.3,
|
||
|
|
"valid_targets_min": 573
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.216,
|
||
|
|
"grad_norm": 0.7843893872148268,
|
||
|
|
"learning_rate": 1.5155460067242578e-06,
|
||
|
|
"loss": 0.0579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06555959582328796,
|
||
|
|
"step": 3885,
|
||
|
|
"valid_targets_mean": 822.2,
|
||
|
|
"valid_targets_min": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.224,
|
||
|
|
"grad_norm": 0.7226354746736298,
|
||
|
|
"learning_rate": 1.4852225641306816e-06,
|
||
|
|
"loss": 0.0658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.053480908274650574,
|
||
|
|
"step": 3890,
|
||
|
|
"valid_targets_mean": 833.5,
|
||
|
|
"valid_targets_min": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.232,
|
||
|
|
"grad_norm": 0.5802698962131153,
|
||
|
|
"learning_rate": 1.4551938532337607e-06,
|
||
|
|
"loss": 0.0559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05033823102712631,
|
||
|
|
"step": 3895,
|
||
|
|
"valid_targets_mean": 961.6,
|
||
|
|
"valid_targets_min": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.24,
|
||
|
|
"grad_norm": 0.6730808501547918,
|
||
|
|
"learning_rate": 1.4254603520523614e-06,
|
||
|
|
"loss": 0.0643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.052310969680547714,
|
||
|
|
"step": 3900,
|
||
|
|
"valid_targets_mean": 1042.4,
|
||
|
|
"valid_targets_min": 653
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.248,
|
||
|
|
"grad_norm": 0.8730341878293636,
|
||
|
|
"learning_rate": 1.3960225339059875e-06,
|
||
|
|
"loss": 0.0747,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11478105187416077,
|
||
|
|
"step": 3905,
|
||
|
|
"valid_targets_mean": 1050.5,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.256,
|
||
|
|
"grad_norm": 0.6535203685996509,
|
||
|
|
"learning_rate": 1.3668808674072409e-06,
|
||
|
|
"loss": 0.0657,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05904577672481537,
|
||
|
|
"step": 3910,
|
||
|
|
"valid_targets_mean": 892.3,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.264,
|
||
|
|
"grad_norm": 0.7846320544982055,
|
||
|
|
"learning_rate": 1.338035816454375e-06,
|
||
|
|
"loss": 0.0654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05399632081389427,
|
||
|
|
"step": 3915,
|
||
|
|
"valid_targets_mean": 881.1,
|
||
|
|
"valid_targets_min": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.272,
|
||
|
|
"grad_norm": 0.7168968270055656,
|
||
|
|
"learning_rate": 1.3094878402238887e-06,
|
||
|
|
"loss": 0.0768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05075981467962265,
|
||
|
|
"step": 3920,
|
||
|
|
"valid_targets_mean": 851.4,
|
||
|
|
"valid_targets_min": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.28,
|
||
|
|
"grad_norm": 0.8869465898236252,
|
||
|
|
"learning_rate": 1.2812373931632371e-06,
|
||
|
|
"loss": 0.0711,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0765652060508728,
|
||
|
|
"step": 3925,
|
||
|
|
"valid_targets_mean": 895.0,
|
||
|
|
"valid_targets_min": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.288,
|
||
|
|
"grad_norm": 0.8358586687918181,
|
||
|
|
"learning_rate": 1.2532849249835932e-06,
|
||
|
|
"loss": 0.0714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06396108120679855,
|
||
|
|
"step": 3930,
|
||
|
|
"valid_targets_mean": 831.7,
|
||
|
|
"valid_targets_min": 459
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.296,
|
||
|
|
"grad_norm": 0.8210165464346263,
|
||
|
|
"learning_rate": 1.2256308806526774e-06,
|
||
|
|
"loss": 0.0599,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0579555407166481,
|
||
|
|
"step": 3935,
|
||
|
|
"valid_targets_mean": 654.8,
|
||
|
|
"valid_targets_min": 507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.304,
|
||
|
|
"grad_norm": 0.6681203570944947,
|
||
|
|
"learning_rate": 1.1982757003876855e-06,
|
||
|
|
"loss": 0.0635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.048654213547706604,
|
||
|
|
"step": 3940,
|
||
|
|
"valid_targets_mean": 810.0,
|
||
|
|
"valid_targets_min": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.312,
|
||
|
|
"grad_norm": 0.6799088682049679,
|
||
|
|
"learning_rate": 1.1712198196482793e-06,
|
||
|
|
"loss": 0.0598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05715302377939224,
|
||
|
|
"step": 3945,
|
||
|
|
"valid_targets_mean": 828.6,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.32,
|
||
|
|
"grad_norm": 0.6657347250611597,
|
||
|
|
"learning_rate": 1.1444636691296518e-06,
|
||
|
|
"loss": 0.0774,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.050616826862096786,
|
||
|
|
"step": 3950,
|
||
|
|
"valid_targets_mean": 813.9,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.328,
|
||
|
|
"grad_norm": 0.7410752665142455,
|
||
|
|
"learning_rate": 1.11800767475567e-06,
|
||
|
|
"loss": 0.087,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06187132000923157,
|
||
|
|
"step": 3955,
|
||
|
|
"valid_targets_mean": 1100.0,
|
||
|
|
"valid_targets_min": 431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.336,
|
||
|
|
"grad_norm": 0.7669300808343251,
|
||
|
|
"learning_rate": 1.0918522576721014e-06,
|
||
|
|
"loss": 0.0587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.055733539164066315,
|
||
|
|
"step": 3960,
|
||
|
|
"valid_targets_mean": 838.1,
|
||
|
|
"valid_targets_min": 454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.344,
|
||
|
|
"grad_norm": 0.8961934706850629,
|
||
|
|
"learning_rate": 1.0659978342399003e-06,
|
||
|
|
"loss": 0.065,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08290403336286545,
|
||
|
|
"step": 3965,
|
||
|
|
"valid_targets_mean": 1151.6,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.352,
|
||
|
|
"grad_norm": 0.6391167316161919,
|
||
|
|
"learning_rate": 1.0404448160285897e-06,
|
||
|
|
"loss": 0.0622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.051166288554668427,
|
||
|
|
"step": 3970,
|
||
|
|
"valid_targets_mean": 880.2,
|
||
|
|
"valid_targets_min": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.36,
|
||
|
|
"grad_norm": 1.1900372923848188,
|
||
|
|
"learning_rate": 1.0151936098097015e-06,
|
||
|
|
"loss": 0.0932,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19820678234100342,
|
||
|
|
"step": 3975,
|
||
|
|
"valid_targets_mean": 1518.1,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.368,
|
||
|
|
"grad_norm": 0.8375789509534456,
|
||
|
|
"learning_rate": 9.902446175503089e-07,
|
||
|
|
"loss": 0.0649,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.053763311356306076,
|
||
|
|
"step": 3980,
|
||
|
|
"valid_targets_mean": 869.1,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.376,
|
||
|
|
"grad_norm": 0.8871755308243114,
|
||
|
|
"learning_rate": 9.655982364066197e-07,
|
||
|
|
"loss": 0.083,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1606995314359665,
|
||
|
|
"step": 3985,
|
||
|
|
"valid_targets_mean": 1649.8,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.384,
|
||
|
|
"grad_norm": 0.8699835456903998,
|
||
|
|
"learning_rate": 9.412548587176595e-07,
|
||
|
|
"loss": 0.0873,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1351429671049118,
|
||
|
|
"step": 3990,
|
||
|
|
"valid_targets_mean": 1388.2,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.392,
|
||
|
|
"grad_norm": 0.719732729210347,
|
||
|
|
"learning_rate": 9.172148719990237e-07,
|
||
|
|
"loss": 0.0701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06241889297962189,
|
||
|
|
"step": 3995,
|
||
|
|
"valid_targets_mean": 781.2,
|
||
|
|
"valid_targets_min": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4,
|
||
|
|
"grad_norm": 0.6986415261986041,
|
||
|
|
"learning_rate": 8.934786589367106e-07,
|
||
|
|
"loss": 0.053,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04738050699234009,
|
||
|
|
"step": 4000,
|
||
|
|
"valid_targets_mean": 1008.9,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.408,
|
||
|
|
"grad_norm": 0.722758104299292,
|
||
|
|
"learning_rate": 8.700465973810246e-07,
|
||
|
|
"loss": 0.0595,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0537380613386631,
|
||
|
|
"step": 4005,
|
||
|
|
"valid_targets_mean": 900.3,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.416,
|
||
|
|
"grad_norm": 0.8558749091680005,
|
||
|
|
"learning_rate": 8.469190603405719e-07,
|
||
|
|
"loss": 0.0721,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0537833496928215,
|
||
|
|
"step": 4010,
|
||
|
|
"valid_targets_mean": 773.1,
|
||
|
|
"valid_targets_min": 483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.424,
|
||
|
|
"grad_norm": 0.7021990518704619,
|
||
|
|
"learning_rate": 8.240964159763121e-07,
|
||
|
|
"loss": 0.0738,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.054406896233558655,
|
||
|
|
"step": 4015,
|
||
|
|
"valid_targets_mean": 892.9,
|
||
|
|
"valid_targets_min": 419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.432,
|
||
|
|
"grad_norm": 0.7542283622774374,
|
||
|
|
"learning_rate": 8.015790275957003e-07,
|
||
|
|
"loss": 0.061,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.053682684898376465,
|
||
|
|
"step": 4020,
|
||
|
|
"valid_targets_mean": 818.3,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.44,
|
||
|
|
"grad_norm": 0.850281996059853,
|
||
|
|
"learning_rate": 7.793672536469077e-07,
|
||
|
|
"loss": 0.0603,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06802020967006683,
|
||
|
|
"step": 4025,
|
||
|
|
"valid_targets_mean": 839.2,
|
||
|
|
"valid_targets_min": 429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.448,
|
||
|
|
"grad_norm": 0.8076325556915775,
|
||
|
|
"learning_rate": 7.574614477131081e-07,
|
||
|
|
"loss": 0.0766,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06391239166259766,
|
||
|
|
"step": 4030,
|
||
|
|
"valid_targets_mean": 813.6,
|
||
|
|
"valid_targets_min": 493
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.456,
|
||
|
|
"grad_norm": 0.9413722677898027,
|
||
|
|
"learning_rate": 7.358619585068583e-07,
|
||
|
|
"loss": 0.0864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11960811913013458,
|
||
|
|
"step": 4035,
|
||
|
|
"valid_targets_mean": 1009.1,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.464,
|
||
|
|
"grad_norm": 0.8084183126035619,
|
||
|
|
"learning_rate": 7.145691298645419e-07,
|
||
|
|
"loss": 0.0586,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0709858387708664,
|
||
|
|
"step": 4040,
|
||
|
|
"valid_targets_mean": 964.0,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4719999999999995,
|
||
|
|
"grad_norm": 0.6245381040239134,
|
||
|
|
"learning_rate": 6.935833007408965e-07,
|
||
|
|
"loss": 0.0537,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0447721928358078,
|
||
|
|
"step": 4045,
|
||
|
|
"valid_targets_mean": 1129.9,
|
||
|
|
"valid_targets_min": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.48,
|
||
|
|
"grad_norm": 0.7766133969307153,
|
||
|
|
"learning_rate": 6.729048052036136e-07,
|
||
|
|
"loss": 0.0614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05909734219312668,
|
||
|
|
"step": 4050,
|
||
|
|
"valid_targets_mean": 746.9,
|
||
|
|
"valid_targets_min": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4879999999999995,
|
||
|
|
"grad_norm": 0.7035951525278532,
|
||
|
|
"learning_rate": 6.52533972428031e-07,
|
||
|
|
"loss": 0.0591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05800524726510048,
|
||
|
|
"step": 4055,
|
||
|
|
"valid_targets_mean": 1011.1,
|
||
|
|
"valid_targets_min": 569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.496,
|
||
|
|
"grad_norm": 0.8497993216044847,
|
||
|
|
"learning_rate": 6.324711266918826e-07,
|
||
|
|
"loss": 0.0667,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06494573503732681,
|
||
|
|
"step": 4060,
|
||
|
|
"valid_targets_mean": 794.0,
|
||
|
|
"valid_targets_min": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.504,
|
||
|
|
"grad_norm": 0.8177891523548564,
|
||
|
|
"learning_rate": 6.127165873701457e-07,
|
||
|
|
"loss": 0.0624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06821263581514359,
|
||
|
|
"step": 4065,
|
||
|
|
"valid_targets_mean": 1191.3,
|
||
|
|
"valid_targets_min": 583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5120000000000005,
|
||
|
|
"grad_norm": 0.8433139564163347,
|
||
|
|
"learning_rate": 5.932706689299461e-07,
|
||
|
|
"loss": 0.065,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06629593670368195,
|
||
|
|
"step": 4070,
|
||
|
|
"valid_targets_mean": 766.3,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.52,
|
||
|
|
"grad_norm": 0.696116550154397,
|
||
|
|
"learning_rate": 5.741336809255615e-07,
|
||
|
|
"loss": 0.0587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.058657385408878326,
|
||
|
|
"step": 4075,
|
||
|
|
"valid_targets_mean": 1665.0,
|
||
|
|
"valid_targets_min": 559
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5280000000000005,
|
||
|
|
"grad_norm": 0.9008199904333769,
|
||
|
|
"learning_rate": 5.553059279934902e-07,
|
||
|
|
"loss": 0.0643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09079106897115707,
|
||
|
|
"step": 4080,
|
||
|
|
"valid_targets_mean": 1069.2,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.536,
|
||
|
|
"grad_norm": 0.8115733452505096,
|
||
|
|
"learning_rate": 5.36787709847597e-07,
|
||
|
|
"loss": 0.0673,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08065254241228104,
|
||
|
|
"step": 4085,
|
||
|
|
"valid_targets_mean": 1330.5,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5440000000000005,
|
||
|
|
"grad_norm": 0.8006341156338428,
|
||
|
|
"learning_rate": 5.185793212743529e-07,
|
||
|
|
"loss": 0.0716,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08026818931102753,
|
||
|
|
"step": 4090,
|
||
|
|
"valid_targets_mean": 861.7,
|
||
|
|
"valid_targets_min": 411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.552,
|
||
|
|
"grad_norm": 0.6837803065916436,
|
||
|
|
"learning_rate": 5.006810521281335e-07,
|
||
|
|
"loss": 0.0611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04462552070617676,
|
||
|
|
"step": 4095,
|
||
|
|
"valid_targets_mean": 843.3,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5600000000000005,
|
||
|
|
"grad_norm": 0.9019291713010887,
|
||
|
|
"learning_rate": 4.830931873266065e-07,
|
||
|
|
"loss": 0.1052,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11825674772262573,
|
||
|
|
"step": 4100,
|
||
|
|
"valid_targets_mean": 1240.2,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.568,
|
||
|
|
"grad_norm": 0.827696103249896,
|
||
|
|
"learning_rate": 4.658160068462025e-07,
|
||
|
|
"loss": 0.0734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06389281153678894,
|
||
|
|
"step": 4105,
|
||
|
|
"valid_targets_mean": 695.1,
|
||
|
|
"valid_targets_min": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.576,
|
||
|
|
"grad_norm": 0.6539205464048427,
|
||
|
|
"learning_rate": 4.488497857176466e-07,
|
||
|
|
"loss": 0.061,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05515346676111221,
|
||
|
|
"step": 4110,
|
||
|
|
"valid_targets_mean": 827.1,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.584,
|
||
|
|
"grad_norm": 0.695414684968317,
|
||
|
|
"learning_rate": 4.321947940215898e-07,
|
||
|
|
"loss": 0.073,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05857130512595177,
|
||
|
|
"step": 4115,
|
||
|
|
"valid_targets_mean": 823.6,
|
||
|
|
"valid_targets_min": 531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.592,
|
||
|
|
"grad_norm": 0.9637059592515878,
|
||
|
|
"learning_rate": 4.1585129688430425e-07,
|
||
|
|
"loss": 0.0554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06732626259326935,
|
||
|
|
"step": 4120,
|
||
|
|
"valid_targets_mean": 868.4,
|
||
|
|
"valid_targets_min": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.6,
|
||
|
|
"grad_norm": 0.7770312997823519,
|
||
|
|
"learning_rate": 3.998195544734706e-07,
|
||
|
|
"loss": 0.0581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.050231486558914185,
|
||
|
|
"step": 4125,
|
||
|
|
"valid_targets_mean": 777.0,
|
||
|
|
"valid_targets_min": 452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.608,
|
||
|
|
"grad_norm": 0.8563576811599827,
|
||
|
|
"learning_rate": 3.840998219940284e-07,
|
||
|
|
"loss": 0.0596,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0819033831357956,
|
||
|
|
"step": 4130,
|
||
|
|
"valid_targets_mean": 1120.9,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.616,
|
||
|
|
"grad_norm": 0.7824387896671777,
|
||
|
|
"learning_rate": 3.6869234968411214e-07,
|
||
|
|
"loss": 0.0748,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0632677674293518,
|
||
|
|
"step": 4135,
|
||
|
|
"valid_targets_mean": 854.2,
|
||
|
|
"valid_targets_min": 575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.624,
|
||
|
|
"grad_norm": 0.821997875998823,
|
||
|
|
"learning_rate": 3.5359738281107504e-07,
|
||
|
|
"loss": 0.0573,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05922164022922516,
|
||
|
|
"step": 4140,
|
||
|
|
"valid_targets_mean": 923.0,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.632,
|
||
|
|
"grad_norm": 0.8827562626339426,
|
||
|
|
"learning_rate": 3.38815161667585e-07,
|
||
|
|
"loss": 0.0605,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.062201905995607376,
|
||
|
|
"step": 4145,
|
||
|
|
"valid_targets_mean": 782.9,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.64,
|
||
|
|
"grad_norm": 0.7921463346038955,
|
||
|
|
"learning_rate": 3.24345921567788e-07,
|
||
|
|
"loss": 0.0719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05580519512295723,
|
||
|
|
"step": 4150,
|
||
|
|
"valid_targets_mean": 1073.8,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.648,
|
||
|
|
"grad_norm": 0.7165813690063456,
|
||
|
|
"learning_rate": 3.101898928435754e-07,
|
||
|
|
"loss": 0.0769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08175031840801239,
|
||
|
|
"step": 4155,
|
||
|
|
"valid_targets_mean": 1565.8,
|
||
|
|
"valid_targets_min": 555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.656,
|
||
|
|
"grad_norm": 0.6656321494894558,
|
||
|
|
"learning_rate": 2.9634730084091343e-07,
|
||
|
|
"loss": 0.0745,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05006745457649231,
|
||
|
|
"step": 4160,
|
||
|
|
"valid_targets_mean": 786.4,
|
||
|
|
"valid_targets_min": 503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.664,
|
||
|
|
"grad_norm": 0.939829256612867,
|
||
|
|
"learning_rate": 2.8281836591624865e-07,
|
||
|
|
"loss": 0.0641,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07113055884838104,
|
||
|
|
"step": 4165,
|
||
|
|
"valid_targets_mean": 894.0,
|
||
|
|
"valid_targets_min": 396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.672,
|
||
|
|
"grad_norm": 0.7696682925719788,
|
||
|
|
"learning_rate": 2.6960330343301033e-07,
|
||
|
|
"loss": 0.063,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0579502247273922,
|
||
|
|
"step": 4170,
|
||
|
|
"valid_targets_mean": 786.8,
|
||
|
|
"valid_targets_min": 426
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.68,
|
||
|
|
"grad_norm": 0.9182508899662383,
|
||
|
|
"learning_rate": 2.5670232375817784e-07,
|
||
|
|
"loss": 0.0763,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09177736937999725,
|
||
|
|
"step": 4175,
|
||
|
|
"valid_targets_mean": 1085.4,
|
||
|
|
"valid_targets_min": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.688,
|
||
|
|
"grad_norm": 0.8160880687662895,
|
||
|
|
"learning_rate": 2.441156322589322e-07,
|
||
|
|
"loss": 0.1075,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11923444271087646,
|
||
|
|
"step": 4180,
|
||
|
|
"valid_targets_mean": 1245.9,
|
||
|
|
"valid_targets_min": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.696,
|
||
|
|
"grad_norm": 0.6590045004958579,
|
||
|
|
"learning_rate": 2.318434292993832e-07,
|
||
|
|
"loss": 0.057,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04918127506971359,
|
||
|
|
"step": 4185,
|
||
|
|
"valid_targets_mean": 838.5,
|
||
|
|
"valid_targets_min": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.704,
|
||
|
|
"grad_norm": 0.7931935096125178,
|
||
|
|
"learning_rate": 2.1988591023738514e-07,
|
||
|
|
"loss": 0.0737,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05984684079885483,
|
||
|
|
"step": 4190,
|
||
|
|
"valid_targets_mean": 636.6,
|
||
|
|
"valid_targets_min": 526
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.712,
|
||
|
|
"grad_norm": 0.8409861634776767,
|
||
|
|
"learning_rate": 2.0824326542142835e-07,
|
||
|
|
"loss": 0.0752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0892910361289978,
|
||
|
|
"step": 4195,
|
||
|
|
"valid_targets_mean": 1090.1,
|
||
|
|
"valid_targets_min": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.72,
|
||
|
|
"grad_norm": 0.7840113248676499,
|
||
|
|
"learning_rate": 1.9691568018759931e-07,
|
||
|
|
"loss": 0.0794,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05649395287036896,
|
||
|
|
"step": 4200,
|
||
|
|
"valid_targets_mean": 911.5,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.728,
|
||
|
|
"grad_norm": 0.8684225809470628,
|
||
|
|
"learning_rate": 1.8590333485664525e-07,
|
||
|
|
"loss": 0.0589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06985966861248016,
|
||
|
|
"step": 4205,
|
||
|
|
"valid_targets_mean": 1026.4,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.736,
|
||
|
|
"grad_norm": 1.273545703399396,
|
||
|
|
"learning_rate": 1.752064047310853e-07,
|
||
|
|
"loss": 0.074,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12074144184589386,
|
||
|
|
"step": 4210,
|
||
|
|
"valid_targets_mean": 1021.5,
|
||
|
|
"valid_targets_min": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.744,
|
||
|
|
"grad_norm": 0.843992482033791,
|
||
|
|
"learning_rate": 1.6482506009243949e-07,
|
||
|
|
"loss": 0.0661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06381138414144516,
|
||
|
|
"step": 4215,
|
||
|
|
"valid_targets_mean": 786.6,
|
||
|
|
"valid_targets_min": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.752,
|
||
|
|
"grad_norm": 0.8415630384133468,
|
||
|
|
"learning_rate": 1.5475946619850192e-07,
|
||
|
|
"loss": 0.0668,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07749566435813904,
|
||
|
|
"step": 4220,
|
||
|
|
"valid_targets_mean": 989.6,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.76,
|
||
|
|
"grad_norm": 0.7940286483649288,
|
||
|
|
"learning_rate": 1.4500978328071845e-07,
|
||
|
|
"loss": 0.0555,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06216109171509743,
|
||
|
|
"step": 4225,
|
||
|
|
"valid_targets_mean": 753.4,
|
||
|
|
"valid_targets_min": 448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.768,
|
||
|
|
"grad_norm": 0.9055893995439169,
|
||
|
|
"learning_rate": 1.3557616654163775e-07,
|
||
|
|
"loss": 0.0567,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06529085338115692,
|
||
|
|
"step": 4230,
|
||
|
|
"valid_targets_mean": 949.4,
|
||
|
|
"valid_targets_min": 482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.776,
|
||
|
|
"grad_norm": 0.8453635654072907,
|
||
|
|
"learning_rate": 1.264587661524308e-07,
|
||
|
|
"loss": 0.0642,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0687166303396225,
|
||
|
|
"step": 4235,
|
||
|
|
"valid_targets_mean": 933.4,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.784,
|
||
|
|
"grad_norm": 1.0395756007158212,
|
||
|
|
"learning_rate": 1.1765772725051084e-07,
|
||
|
|
"loss": 0.0729,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08578158169984818,
|
||
|
|
"step": 4240,
|
||
|
|
"valid_targets_mean": 962.8,
|
||
|
|
"valid_targets_min": 459
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.792,
|
||
|
|
"grad_norm": 0.6995744786603656,
|
||
|
|
"learning_rate": 1.0917318993721726e-07,
|
||
|
|
"loss": 0.0724,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06010109558701515,
|
||
|
|
"step": 4245,
|
||
|
|
"valid_targets_mean": 1372.9,
|
||
|
|
"valid_targets_min": 615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.8,
|
||
|
|
"grad_norm": 0.9882137918825006,
|
||
|
|
"learning_rate": 1.0100528927558861e-07,
|
||
|
|
"loss": 0.0947,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22277545928955078,
|
||
|
|
"step": 4250,
|
||
|
|
"valid_targets_mean": 1339.2,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.808,
|
||
|
|
"grad_norm": 0.9456999313109349,
|
||
|
|
"learning_rate": 9.31541552882087e-08,
|
||
|
|
"loss": 0.077,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11338469386100769,
|
||
|
|
"step": 4255,
|
||
|
|
"valid_targets_mean": 1125.1,
|
||
|
|
"valid_targets_min": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.816,
|
||
|
|
"grad_norm": 0.781828119425798,
|
||
|
|
"learning_rate": 8.561991295514161e-08,
|
||
|
|
"loss": 0.0605,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06928466260433197,
|
||
|
|
"step": 4260,
|
||
|
|
"valid_targets_mean": 916.7,
|
||
|
|
"valid_targets_min": 447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.824,
|
||
|
|
"grad_norm": 0.6906982313965497,
|
||
|
|
"learning_rate": 7.840268221193548e-08,
|
||
|
|
"loss": 0.0662,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04883572459220886,
|
||
|
|
"step": 4265,
|
||
|
|
"valid_targets_mean": 926.1,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.832,
|
||
|
|
"grad_norm": 0.6959903444394521,
|
||
|
|
"learning_rate": 7.150257794772186e-08,
|
||
|
|
"loss": 0.1008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.048169322311878204,
|
||
|
|
"step": 4270,
|
||
|
|
"valid_targets_mean": 747.8,
|
||
|
|
"valid_targets_min": 442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.84,
|
||
|
|
"grad_norm": 1.0189241815550472,
|
||
|
|
"learning_rate": 6.491971000337938e-08,
|
||
|
|
"loss": 0.0682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07251983880996704,
|
||
|
|
"step": 4275,
|
||
|
|
"valid_targets_mean": 1026.7,
|
||
|
|
"valid_targets_min": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.848,
|
||
|
|
"grad_norm": 0.7930936377631168,
|
||
|
|
"learning_rate": 5.8654183169788435e-08,
|
||
|
|
"loss": 0.0562,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05123412609100342,
|
||
|
|
"step": 4280,
|
||
|
|
"valid_targets_mean": 734.1,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.856,
|
||
|
|
"grad_norm": 0.845901888009316,
|
||
|
|
"learning_rate": 5.270609718616593e-08,
|
||
|
|
"loss": 0.0769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05024448037147522,
|
||
|
|
"step": 4285,
|
||
|
|
"valid_targets_mean": 1004.8,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.864,
|
||
|
|
"grad_norm": 0.7677871662641823,
|
||
|
|
"learning_rate": 4.70755467384687e-08,
|
||
|
|
"loss": 0.0744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.058658793568611145,
|
||
|
|
"step": 4290,
|
||
|
|
"valid_targets_mean": 911.6,
|
||
|
|
"valid_targets_min": 533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.872,
|
||
|
|
"grad_norm": 0.9839547388014913,
|
||
|
|
"learning_rate": 4.176262145789478e-08,
|
||
|
|
"loss": 0.0849,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15095025300979614,
|
||
|
|
"step": 4295,
|
||
|
|
"valid_targets_mean": 1179.4,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.88,
|
||
|
|
"grad_norm": 0.6814461361019861,
|
||
|
|
"learning_rate": 3.676740591945782e-08,
|
||
|
|
"loss": 0.0667,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05614306405186653,
|
||
|
|
"step": 4300,
|
||
|
|
"valid_targets_mean": 849.9,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.888,
|
||
|
|
"grad_norm": 0.7725328157167015,
|
||
|
|
"learning_rate": 3.208997964062821e-08,
|
||
|
|
"loss": 0.0735,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.055312514305114746,
|
||
|
|
"step": 4305,
|
||
|
|
"valid_targets_mean": 731.9,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.896,
|
||
|
|
"grad_norm": 0.9230695486583556,
|
||
|
|
"learning_rate": 2.773041708008295e-08,
|
||
|
|
"loss": 0.0654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09205324947834015,
|
||
|
|
"step": 4310,
|
||
|
|
"valid_targets_mean": 1044.9,
|
||
|
|
"valid_targets_min": 555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.904,
|
||
|
|
"grad_norm": 0.9005169578053986,
|
||
|
|
"learning_rate": 2.3688787636511057e-08,
|
||
|
|
"loss": 0.058,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08456471562385559,
|
||
|
|
"step": 4315,
|
||
|
|
"valid_targets_mean": 1237.9,
|
||
|
|
"valid_targets_min": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.912,
|
||
|
|
"grad_norm": 1.7069733059573364,
|
||
|
|
"learning_rate": 1.9965155647507782e-08,
|
||
|
|
"loss": 0.0698,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05954921245574951,
|
||
|
|
"step": 4320,
|
||
|
|
"valid_targets_mean": 886.6,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.92,
|
||
|
|
"grad_norm": 0.9240349041779561,
|
||
|
|
"learning_rate": 1.655958038855765e-08,
|
||
|
|
"loss": 0.0886,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07848560810089111,
|
||
|
|
"step": 4325,
|
||
|
|
"valid_targets_mean": 1234.1,
|
||
|
|
"valid_targets_min": 558
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.928,
|
||
|
|
"grad_norm": 0.7716220389789027,
|
||
|
|
"learning_rate": 1.3472116072084096e-08,
|
||
|
|
"loss": 0.0569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05979716777801514,
|
||
|
|
"step": 4330,
|
||
|
|
"valid_targets_mean": 837.9,
|
||
|
|
"valid_targets_min": 441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.936,
|
||
|
|
"grad_norm": 0.7776719180845648,
|
||
|
|
"learning_rate": 1.0702811846590167e-08,
|
||
|
|
"loss": 0.0629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0593021959066391,
|
||
|
|
"step": 4335,
|
||
|
|
"valid_targets_mean": 819.6,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.944,
|
||
|
|
"grad_norm": 0.797242975709299,
|
||
|
|
"learning_rate": 8.251711795876916e-09,
|
||
|
|
"loss": 0.0627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.060170456767082214,
|
||
|
|
"step": 4340,
|
||
|
|
"valid_targets_mean": 807.1,
|
||
|
|
"valid_targets_min": 418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.952,
|
||
|
|
"grad_norm": 0.7339526517022201,
|
||
|
|
"learning_rate": 6.1188549383373044e-09,
|
||
|
|
"loss": 0.0589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06010982766747475,
|
||
|
|
"step": 4345,
|
||
|
|
"valid_targets_mean": 789.3,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.96,
|
||
|
|
"grad_norm": 0.8389381067646968,
|
||
|
|
"learning_rate": 4.304275226338916e-09,
|
||
|
|
"loss": 0.0679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06441819667816162,
|
||
|
|
"step": 4350,
|
||
|
|
"valid_targets_mean": 1073.7,
|
||
|
|
"valid_targets_min": 383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.968,
|
||
|
|
"grad_norm": 0.7659262300618347,
|
||
|
|
"learning_rate": 2.8080015456799503e-09,
|
||
|
|
"loss": 0.0542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05551183596253395,
|
||
|
|
"step": 4355,
|
||
|
|
"valid_targets_mean": 826.9,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.976,
|
||
|
|
"grad_norm": 0.7634346959399079,
|
||
|
|
"learning_rate": 1.6300577151340257e-09,
|
||
|
|
"loss": 0.0598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05888133496046066,
|
||
|
|
"step": 4360,
|
||
|
|
"valid_targets_mean": 951.8,
|
||
|
|
"valid_targets_min": 417
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.984,
|
||
|
|
"grad_norm": 0.8460671107111588,
|
||
|
|
"learning_rate": 7.70462486070489e-10,
|
||
|
|
"loss": 0.0554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04984348267316818,
|
||
|
|
"step": 4365,
|
||
|
|
"valid_targets_mean": 901.7,
|
||
|
|
"valid_targets_min": 678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.992,
|
||
|
|
"grad_norm": 0.7850119216540071,
|
||
|
|
"learning_rate": 2.2922954214799065e-10,
|
||
|
|
"loss": 0.0632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06389277428388596,
|
||
|
|
"step": 4370,
|
||
|
|
"valid_targets_mean": 891.9,
|
||
|
|
"valid_targets_min": 583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"grad_norm": 0.9340733771796962,
|
||
|
|
"learning_rate": 6.367499107984288e-12,
|
||
|
|
"loss": 0.0571,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05963999778032303,
|
||
|
|
"step": 4375,
|
||
|
|
"valid_targets_mean": 783.6,
|
||
|
|
"valid_targets_min": 551
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05963999778032303,
|
||
|
|
"step": 4375,
|
||
|
|
"total_flos": 262473517694976.0,
|
||
|
|
"train_loss": 0.13307374988964626,
|
||
|
|
"train_runtime": 6395.8234,
|
||
|
|
"train_samples_per_second": 10.941,
|
||
|
|
"train_steps_per_second": 0.684,
|
||
|
|
"valid_targets_mean": 783.6,
|
||
|
|
"valid_targets_min": 551
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 4375,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 1500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 262473517694976.0,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|