Model: laion/openthoughts-4-code-qwen3-32b-annotated-32k_qwen3-1.7B_32k Source: Original Platform
16526 lines
459 KiB
JSON
16526 lines
459 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 2.0,
|
|
"eval_steps": 500,
|
|
"global_step": 7494,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0013344008540165466,
|
|
"grad_norm": 7.722733096580246,
|
|
"learning_rate": 7.999999999999999e-07,
|
|
"loss": 0.8094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8109167218208313,
|
|
"step": 5,
|
|
"valid_targets_mean": 17002.7,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 0.0026688017080330933,
|
|
"grad_norm": 7.174760064448044,
|
|
"learning_rate": 1.8e-06,
|
|
"loss": 0.7891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8015649318695068,
|
|
"step": 10,
|
|
"valid_targets_mean": 15526.4,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.0040032025620496394,
|
|
"grad_norm": 4.8505743485010075,
|
|
"learning_rate": 2.8e-06,
|
|
"loss": 0.7545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7334458827972412,
|
|
"step": 15,
|
|
"valid_targets_mean": 16465.6,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 0.0053376034160661865,
|
|
"grad_norm": 2.68270427913022,
|
|
"learning_rate": 3.7999999999999996e-06,
|
|
"loss": 0.751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7277793288230896,
|
|
"step": 20,
|
|
"valid_targets_mean": 15549.5,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 0.006672004270082733,
|
|
"grad_norm": 1.120496904003529,
|
|
"learning_rate": 4.8e-06,
|
|
"loss": 0.6974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7064324617385864,
|
|
"step": 25,
|
|
"valid_targets_mean": 14978.5,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 0.008006405124099279,
|
|
"grad_norm": 0.9291840952692353,
|
|
"learning_rate": 5.7999999999999995e-06,
|
|
"loss": 0.7026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7051266431808472,
|
|
"step": 30,
|
|
"valid_targets_mean": 16092.5,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 0.009340805978115827,
|
|
"grad_norm": 0.5938965072427588,
|
|
"learning_rate": 6.8e-06,
|
|
"loss": 0.671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6741308569908142,
|
|
"step": 35,
|
|
"valid_targets_mean": 15905.5,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 0.010675206832132373,
|
|
"grad_norm": 0.39881741842407664,
|
|
"learning_rate": 7.799999999999998e-06,
|
|
"loss": 0.686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6620453000068665,
|
|
"step": 40,
|
|
"valid_targets_mean": 17101.4,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 0.01200960768614892,
|
|
"grad_norm": 0.30052510531289645,
|
|
"learning_rate": 8.799999999999999e-06,
|
|
"loss": 0.6603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6401622295379639,
|
|
"step": 45,
|
|
"valid_targets_mean": 16533.7,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 0.013344008540165465,
|
|
"grad_norm": 0.2422091402873916,
|
|
"learning_rate": 9.799999999999998e-06,
|
|
"loss": 0.6859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6876528859138489,
|
|
"step": 50,
|
|
"valid_targets_mean": 15775.3,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 0.014678409394182012,
|
|
"grad_norm": 0.18435404196588112,
|
|
"learning_rate": 1.0799999999999998e-05,
|
|
"loss": 0.6601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6202096343040466,
|
|
"step": 55,
|
|
"valid_targets_mean": 16902.1,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 0.016012810248198558,
|
|
"grad_norm": 0.2015230015509419,
|
|
"learning_rate": 1.1799999999999999e-05,
|
|
"loss": 0.6605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6784886121749878,
|
|
"step": 60,
|
|
"valid_targets_mean": 15094.7,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 0.017347211102215106,
|
|
"grad_norm": 0.18391518955306396,
|
|
"learning_rate": 1.2799999999999998e-05,
|
|
"loss": 0.6674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6657936573028564,
|
|
"step": 65,
|
|
"valid_targets_mean": 16728.3,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 0.018681611956231654,
|
|
"grad_norm": 0.18728851278657252,
|
|
"learning_rate": 1.3799999999999998e-05,
|
|
"loss": 0.6615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6592050790786743,
|
|
"step": 70,
|
|
"valid_targets_mean": 15899.9,
|
|
"valid_targets_min": 89
|
|
},
|
|
{
|
|
"epoch": 0.020016012810248198,
|
|
"grad_norm": 0.18276533786350968,
|
|
"learning_rate": 1.4799999999999999e-05,
|
|
"loss": 0.6482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6309381723403931,
|
|
"step": 75,
|
|
"valid_targets_mean": 16547.5,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 0.021350413664264746,
|
|
"grad_norm": 0.15304572919199264,
|
|
"learning_rate": 1.5799999999999998e-05,
|
|
"loss": 0.6546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6724115014076233,
|
|
"step": 80,
|
|
"valid_targets_mean": 16472.0,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 0.02268481451828129,
|
|
"grad_norm": 0.17202347874272747,
|
|
"learning_rate": 1.68e-05,
|
|
"loss": 0.6514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6392582654953003,
|
|
"step": 85,
|
|
"valid_targets_mean": 15872.7,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 0.02401921537229784,
|
|
"grad_norm": 0.1605220259750001,
|
|
"learning_rate": 1.78e-05,
|
|
"loss": 0.6534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6394810676574707,
|
|
"step": 90,
|
|
"valid_targets_mean": 16103.1,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 0.025353616226314386,
|
|
"grad_norm": 0.1856380489569822,
|
|
"learning_rate": 1.8799999999999996e-05,
|
|
"loss": 0.6642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6631790399551392,
|
|
"step": 95,
|
|
"valid_targets_mean": 16072.2,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 0.02668801708033093,
|
|
"grad_norm": 0.17012529774883547,
|
|
"learning_rate": 1.98e-05,
|
|
"loss": 0.6403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6355913877487183,
|
|
"step": 100,
|
|
"valid_targets_mean": 15947.1,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 0.02802241793434748,
|
|
"grad_norm": 0.18944071384180122,
|
|
"learning_rate": 2.0799999999999997e-05,
|
|
"loss": 0.6474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.630897045135498,
|
|
"step": 105,
|
|
"valid_targets_mean": 16317.5,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 0.029356818788364023,
|
|
"grad_norm": 0.28359974957392275,
|
|
"learning_rate": 2.1799999999999998e-05,
|
|
"loss": 0.6414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6439160704612732,
|
|
"step": 110,
|
|
"valid_targets_mean": 17678.0,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 0.03069121964238057,
|
|
"grad_norm": 0.20867511279432566,
|
|
"learning_rate": 2.28e-05,
|
|
"loss": 0.653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6740957498550415,
|
|
"step": 115,
|
|
"valid_targets_mean": 14610.8,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 0.032025620496397116,
|
|
"grad_norm": 0.22034953051672115,
|
|
"learning_rate": 2.38e-05,
|
|
"loss": 0.6551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6769538521766663,
|
|
"step": 120,
|
|
"valid_targets_mean": 15110.7,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 0.03336002135041367,
|
|
"grad_norm": 0.19858036390829426,
|
|
"learning_rate": 2.4799999999999996e-05,
|
|
"loss": 0.6566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6704879999160767,
|
|
"step": 125,
|
|
"valid_targets_mean": 16683.8,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 0.03469442220443021,
|
|
"grad_norm": 0.17343296109636808,
|
|
"learning_rate": 2.5799999999999997e-05,
|
|
"loss": 0.641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6155713796615601,
|
|
"step": 130,
|
|
"valid_targets_mean": 16543.7,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 0.036028823058446756,
|
|
"grad_norm": 0.21961900685930796,
|
|
"learning_rate": 2.6799999999999998e-05,
|
|
"loss": 0.6375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6220330595970154,
|
|
"step": 135,
|
|
"valid_targets_mean": 16411.1,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 0.03736322391246331,
|
|
"grad_norm": 0.22084342992200506,
|
|
"learning_rate": 2.7799999999999995e-05,
|
|
"loss": 0.6648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6999180912971497,
|
|
"step": 140,
|
|
"valid_targets_mean": 14523.8,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 0.03869762476647985,
|
|
"grad_norm": 0.2310359062397061,
|
|
"learning_rate": 2.88e-05,
|
|
"loss": 0.6534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6643216013908386,
|
|
"step": 145,
|
|
"valid_targets_mean": 16719.3,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 0.040032025620496396,
|
|
"grad_norm": 0.186516441432285,
|
|
"learning_rate": 2.9799999999999996e-05,
|
|
"loss": 0.6485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6516410112380981,
|
|
"step": 150,
|
|
"valid_targets_mean": 17415.4,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 0.04136642647451294,
|
|
"grad_norm": 0.2534333458932033,
|
|
"learning_rate": 3.0799999999999996e-05,
|
|
"loss": 0.654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6449211239814758,
|
|
"step": 155,
|
|
"valid_targets_mean": 15443.2,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 0.04270082732852949,
|
|
"grad_norm": 0.29545907304950525,
|
|
"learning_rate": 3.1799999999999994e-05,
|
|
"loss": 0.6442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6491722464561462,
|
|
"step": 160,
|
|
"valid_targets_mean": 16999.6,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 0.044035228182546036,
|
|
"grad_norm": 0.26013562588749856,
|
|
"learning_rate": 3.28e-05,
|
|
"loss": 0.6282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6321196556091309,
|
|
"step": 165,
|
|
"valid_targets_mean": 16109.6,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 0.04536962903656258,
|
|
"grad_norm": 0.20357197994388182,
|
|
"learning_rate": 3.3799999999999995e-05,
|
|
"loss": 0.6459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6354120969772339,
|
|
"step": 170,
|
|
"valid_targets_mean": 16442.6,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 0.04670402989057913,
|
|
"grad_norm": 0.2528215866301578,
|
|
"learning_rate": 3.48e-05,
|
|
"loss": 0.6257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6035679578781128,
|
|
"step": 175,
|
|
"valid_targets_mean": 16467.2,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 0.04803843074459568,
|
|
"grad_norm": 0.29491368956485453,
|
|
"learning_rate": 3.5799999999999996e-05,
|
|
"loss": 0.6617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6350039839744568,
|
|
"step": 180,
|
|
"valid_targets_mean": 16867.5,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.04937283159861222,
|
|
"grad_norm": 0.2644554424332122,
|
|
"learning_rate": 3.679999999999999e-05,
|
|
"loss": 0.6367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6491011381149292,
|
|
"step": 185,
|
|
"valid_targets_mean": 16007.8,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 0.05070723245262877,
|
|
"grad_norm": 0.26708080017347674,
|
|
"learning_rate": 3.78e-05,
|
|
"loss": 0.6586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6851446628570557,
|
|
"step": 190,
|
|
"valid_targets_mean": 14931.2,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 0.05204163330664532,
|
|
"grad_norm": 0.32544168158249764,
|
|
"learning_rate": 3.8799999999999994e-05,
|
|
"loss": 0.6426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6339905858039856,
|
|
"step": 195,
|
|
"valid_targets_mean": 16106.9,
|
|
"valid_targets_min": 101
|
|
},
|
|
{
|
|
"epoch": 0.05337603416066186,
|
|
"grad_norm": 0.27173833321054686,
|
|
"learning_rate": 3.979999999999999e-05,
|
|
"loss": 0.6433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6287566423416138,
|
|
"step": 200,
|
|
"valid_targets_mean": 16933.6,
|
|
"valid_targets_min": 126
|
|
},
|
|
{
|
|
"epoch": 0.054710435014678406,
|
|
"grad_norm": 0.22284495398618304,
|
|
"learning_rate": 4.08e-05,
|
|
"loss": 0.6543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6004332900047302,
|
|
"step": 205,
|
|
"valid_targets_mean": 16562.4,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 0.05604483586869496,
|
|
"grad_norm": 0.34011968889770705,
|
|
"learning_rate": 4.18e-05,
|
|
"loss": 0.6531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6229228973388672,
|
|
"step": 210,
|
|
"valid_targets_mean": 15680.9,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 0.0573792367227115,
|
|
"grad_norm": 0.22699273620693833,
|
|
"learning_rate": 4.28e-05,
|
|
"loss": 0.6408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.588473916053772,
|
|
"step": 215,
|
|
"valid_targets_mean": 16374.6,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 0.058713637576728046,
|
|
"grad_norm": 0.34995893426720126,
|
|
"learning_rate": 4.3799999999999994e-05,
|
|
"loss": 0.6479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6611473560333252,
|
|
"step": 220,
|
|
"valid_targets_mean": 16212.5,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 0.0600480384307446,
|
|
"grad_norm": 0.28994318186176626,
|
|
"learning_rate": 4.48e-05,
|
|
"loss": 0.6398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.627586841583252,
|
|
"step": 225,
|
|
"valid_targets_mean": 16141.5,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 0.06138243928476114,
|
|
"grad_norm": 0.29147818946690207,
|
|
"learning_rate": 4.5799999999999995e-05,
|
|
"loss": 0.6594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6853932738304138,
|
|
"step": 230,
|
|
"valid_targets_mean": 14766.9,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 0.0627168401387777,
|
|
"grad_norm": 0.22883342494533743,
|
|
"learning_rate": 4.68e-05,
|
|
"loss": 0.6482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6339342594146729,
|
|
"step": 235,
|
|
"valid_targets_mean": 15721.2,
|
|
"valid_targets_min": 183
|
|
},
|
|
{
|
|
"epoch": 0.06405124099279423,
|
|
"grad_norm": 0.2461647251551573,
|
|
"learning_rate": 4.7799999999999996e-05,
|
|
"loss": 0.6426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6382437944412231,
|
|
"step": 240,
|
|
"valid_targets_mean": 15432.7,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 0.06538564184681078,
|
|
"grad_norm": 0.26152022660942825,
|
|
"learning_rate": 4.8799999999999994e-05,
|
|
"loss": 0.637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6458475589752197,
|
|
"step": 245,
|
|
"valid_targets_mean": 16287.8,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 0.06672004270082733,
|
|
"grad_norm": 0.2515010561393975,
|
|
"learning_rate": 4.98e-05,
|
|
"loss": 0.6589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6445261240005493,
|
|
"step": 250,
|
|
"valid_targets_mean": 16890.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 0.06805444355484387,
|
|
"grad_norm": 0.29505128400263414,
|
|
"learning_rate": 5.0799999999999995e-05,
|
|
"loss": 0.6496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6435770988464355,
|
|
"step": 255,
|
|
"valid_targets_mean": 15024.3,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 0.06938884440886042,
|
|
"grad_norm": 0.3188315335754373,
|
|
"learning_rate": 5.179999999999999e-05,
|
|
"loss": 0.6354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6181894540786743,
|
|
"step": 260,
|
|
"valid_targets_mean": 16111.8,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 0.07072324526287697,
|
|
"grad_norm": 0.35692218300890305,
|
|
"learning_rate": 5.279999999999999e-05,
|
|
"loss": 0.6368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6286272406578064,
|
|
"step": 265,
|
|
"valid_targets_mean": 15167.3,
|
|
"valid_targets_min": 109
|
|
},
|
|
{
|
|
"epoch": 0.07205764611689351,
|
|
"grad_norm": 0.3502722942708975,
|
|
"learning_rate": 5.38e-05,
|
|
"loss": 0.6445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.615933895111084,
|
|
"step": 270,
|
|
"valid_targets_mean": 16902.7,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 0.07339204697091006,
|
|
"grad_norm": 0.3215557581594254,
|
|
"learning_rate": 5.48e-05,
|
|
"loss": 0.6404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6237388849258423,
|
|
"step": 275,
|
|
"valid_targets_mean": 16192.2,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.07472644782492661,
|
|
"grad_norm": 0.39899551445003084,
|
|
"learning_rate": 5.5799999999999994e-05,
|
|
"loss": 0.63,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5924405455589294,
|
|
"step": 280,
|
|
"valid_targets_mean": 15586.0,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 0.07606084867894315,
|
|
"grad_norm": 0.36329343959447064,
|
|
"learning_rate": 5.679999999999999e-05,
|
|
"loss": 0.6413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6253823041915894,
|
|
"step": 285,
|
|
"valid_targets_mean": 15027.2,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 0.0773952495329597,
|
|
"grad_norm": 0.3552887259009282,
|
|
"learning_rate": 5.78e-05,
|
|
"loss": 0.633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6298519372940063,
|
|
"step": 290,
|
|
"valid_targets_mean": 15274.9,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 0.07872965038697624,
|
|
"grad_norm": 0.5427295323238193,
|
|
"learning_rate": 5.88e-05,
|
|
"loss": 0.6317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6390588283538818,
|
|
"step": 295,
|
|
"valid_targets_mean": 16540.1,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 0.08006405124099279,
|
|
"grad_norm": 0.35816189246454916,
|
|
"learning_rate": 5.98e-05,
|
|
"loss": 0.6602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6374964714050293,
|
|
"step": 300,
|
|
"valid_targets_mean": 15592.1,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 0.08139845209500934,
|
|
"grad_norm": 0.27987335172366956,
|
|
"learning_rate": 6.0799999999999994e-05,
|
|
"loss": 0.6367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6291804313659668,
|
|
"step": 305,
|
|
"valid_targets_mean": 15667.8,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 0.08273285294902588,
|
|
"grad_norm": 0.39583641768435684,
|
|
"learning_rate": 6.18e-05,
|
|
"loss": 0.6459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.627192497253418,
|
|
"step": 310,
|
|
"valid_targets_mean": 16965.0,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 0.08406725380304243,
|
|
"grad_norm": 0.34497427719209073,
|
|
"learning_rate": 6.28e-05,
|
|
"loss": 0.6357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6164098381996155,
|
|
"step": 315,
|
|
"valid_targets_mean": 16704.1,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.08540165465705898,
|
|
"grad_norm": 0.5235857704611651,
|
|
"learning_rate": 6.379999999999999e-05,
|
|
"loss": 0.6412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6287165880203247,
|
|
"step": 320,
|
|
"valid_targets_mean": 15693.0,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 0.08673605551107552,
|
|
"grad_norm": 0.5312837364567617,
|
|
"learning_rate": 6.479999999999999e-05,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6154929995536804,
|
|
"step": 325,
|
|
"valid_targets_mean": 16596.4,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 0.08807045636509207,
|
|
"grad_norm": 0.5265871873124938,
|
|
"learning_rate": 6.579999999999999e-05,
|
|
"loss": 0.6242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6224241852760315,
|
|
"step": 330,
|
|
"valid_targets_mean": 15641.3,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 0.08940485721910862,
|
|
"grad_norm": 0.45762649729585103,
|
|
"learning_rate": 6.68e-05,
|
|
"loss": 0.6308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6392424702644348,
|
|
"step": 335,
|
|
"valid_targets_mean": 17120.2,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 0.09073925807312516,
|
|
"grad_norm": 0.4694075757636562,
|
|
"learning_rate": 6.78e-05,
|
|
"loss": 0.6419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6159706115722656,
|
|
"step": 340,
|
|
"valid_targets_mean": 16286.1,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 0.09207365892714171,
|
|
"grad_norm": 0.5016929581867708,
|
|
"learning_rate": 6.879999999999999e-05,
|
|
"loss": 0.6334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6220609545707703,
|
|
"step": 345,
|
|
"valid_targets_mean": 17040.8,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 0.09340805978115826,
|
|
"grad_norm": 0.6373560197992398,
|
|
"learning_rate": 6.979999999999999e-05,
|
|
"loss": 0.6437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6372533440589905,
|
|
"step": 350,
|
|
"valid_targets_mean": 15782.5,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 0.0947424606351748,
|
|
"grad_norm": 0.5016876713993831,
|
|
"learning_rate": 7.079999999999999e-05,
|
|
"loss": 0.6437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6407184600830078,
|
|
"step": 355,
|
|
"valid_targets_mean": 16137.4,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 0.09607686148919135,
|
|
"grad_norm": 0.6364967752399274,
|
|
"learning_rate": 7.18e-05,
|
|
"loss": 0.6294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6185452342033386,
|
|
"step": 360,
|
|
"valid_targets_mean": 17217.6,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 0.0974112623432079,
|
|
"grad_norm": 0.5233293275358429,
|
|
"learning_rate": 7.28e-05,
|
|
"loss": 0.6413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.634662926197052,
|
|
"step": 365,
|
|
"valid_targets_mean": 16162.9,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 0.09874566319722444,
|
|
"grad_norm": 0.5448113397417333,
|
|
"learning_rate": 7.379999999999999e-05,
|
|
"loss": 0.6352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.637201189994812,
|
|
"step": 370,
|
|
"valid_targets_mean": 15398.7,
|
|
"valid_targets_min": 6
|
|
},
|
|
{
|
|
"epoch": 0.100080064051241,
|
|
"grad_norm": 0.5456285463552911,
|
|
"learning_rate": 7.479999999999999e-05,
|
|
"loss": 0.6426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6574046611785889,
|
|
"step": 375,
|
|
"valid_targets_mean": 15670.4,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 0.10141446490525755,
|
|
"grad_norm": 0.38739916988200274,
|
|
"learning_rate": 7.579999999999999e-05,
|
|
"loss": 0.6297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5901604890823364,
|
|
"step": 380,
|
|
"valid_targets_mean": 17108.8,
|
|
"valid_targets_min": 105
|
|
},
|
|
{
|
|
"epoch": 0.10274886575927408,
|
|
"grad_norm": 0.5322073833756792,
|
|
"learning_rate": 7.68e-05,
|
|
"loss": 0.6355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6376615762710571,
|
|
"step": 385,
|
|
"valid_targets_mean": 17225.1,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 0.10408326661329063,
|
|
"grad_norm": 0.4785089017506761,
|
|
"learning_rate": 7.780000000000001e-05,
|
|
"loss": 0.6413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6684964895248413,
|
|
"step": 390,
|
|
"valid_targets_mean": 15992.3,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 0.10541766746730719,
|
|
"grad_norm": 0.5065768208339545,
|
|
"learning_rate": 7.879999999999999e-05,
|
|
"loss": 0.63,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6249961256980896,
|
|
"step": 395,
|
|
"valid_targets_mean": 15035.4,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 0.10675206832132372,
|
|
"grad_norm": 0.5140100470937524,
|
|
"learning_rate": 7.98e-05,
|
|
"loss": 0.6405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6756935119628906,
|
|
"step": 400,
|
|
"valid_targets_mean": 16192.8,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.10808646917534027,
|
|
"grad_norm": 0.4375144441663607,
|
|
"learning_rate": 8.079999999999999e-05,
|
|
"loss": 0.6473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6521981954574585,
|
|
"step": 405,
|
|
"valid_targets_mean": 15850.0,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 0.10942087002935681,
|
|
"grad_norm": 0.2929651892552746,
|
|
"learning_rate": 8.18e-05,
|
|
"loss": 0.6357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6364883184432983,
|
|
"step": 410,
|
|
"valid_targets_mean": 15711.7,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.11075527088337336,
|
|
"grad_norm": 0.4564153270660509,
|
|
"learning_rate": 8.28e-05,
|
|
"loss": 0.6131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6293573379516602,
|
|
"step": 415,
|
|
"valid_targets_mean": 15629.3,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.11208967173738991,
|
|
"grad_norm": 0.3507225963416143,
|
|
"learning_rate": 8.379999999999999e-05,
|
|
"loss": 0.6145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6214045286178589,
|
|
"step": 420,
|
|
"valid_targets_mean": 16229.8,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 0.11342407259140645,
|
|
"grad_norm": 0.4192240853043197,
|
|
"learning_rate": 8.48e-05,
|
|
"loss": 0.6274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.619477391242981,
|
|
"step": 425,
|
|
"valid_targets_mean": 15496.3,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 0.114758473445423,
|
|
"grad_norm": 0.45349034065464183,
|
|
"learning_rate": 8.579999999999998e-05,
|
|
"loss": 0.6338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6174418926239014,
|
|
"step": 430,
|
|
"valid_targets_mean": 17214.9,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 0.11609287429943956,
|
|
"grad_norm": 0.4890743720349672,
|
|
"learning_rate": 8.68e-05,
|
|
"loss": 0.6439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6422944068908691,
|
|
"step": 435,
|
|
"valid_targets_mean": 14807.0,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 0.11742727515345609,
|
|
"grad_norm": 0.3945142664933707,
|
|
"learning_rate": 8.779999999999999e-05,
|
|
"loss": 0.6323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6414684057235718,
|
|
"step": 440,
|
|
"valid_targets_mean": 16481.1,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 0.11876167600747264,
|
|
"grad_norm": 0.396589484423819,
|
|
"learning_rate": 8.879999999999999e-05,
|
|
"loss": 0.6283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6542065739631653,
|
|
"step": 445,
|
|
"valid_targets_mean": 17148.1,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 0.1200960768614892,
|
|
"grad_norm": 0.4356111519880911,
|
|
"learning_rate": 8.98e-05,
|
|
"loss": 0.637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6526371240615845,
|
|
"step": 450,
|
|
"valid_targets_mean": 15223.1,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 0.12143047771550573,
|
|
"grad_norm": 0.3812490978850452,
|
|
"learning_rate": 9.079999999999998e-05,
|
|
"loss": 0.6232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6449786424636841,
|
|
"step": 455,
|
|
"valid_targets_mean": 16120.0,
|
|
"valid_targets_min": 85
|
|
},
|
|
{
|
|
"epoch": 0.12276487856952228,
|
|
"grad_norm": 0.5466324629441199,
|
|
"learning_rate": 9.18e-05,
|
|
"loss": 0.6288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6684505939483643,
|
|
"step": 460,
|
|
"valid_targets_mean": 14551.1,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 0.12409927942353884,
|
|
"grad_norm": 0.440898309600464,
|
|
"learning_rate": 9.279999999999999e-05,
|
|
"loss": 0.6389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6249978542327881,
|
|
"step": 465,
|
|
"valid_targets_mean": 16389.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 0.1254336802775554,
|
|
"grad_norm": 0.4812074587417184,
|
|
"learning_rate": 9.379999999999999e-05,
|
|
"loss": 0.6294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6114672422409058,
|
|
"step": 470,
|
|
"valid_targets_mean": 15764.7,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 0.12676808113157192,
|
|
"grad_norm": 0.33120352620579174,
|
|
"learning_rate": 9.479999999999999e-05,
|
|
"loss": 0.6481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6427431106567383,
|
|
"step": 475,
|
|
"valid_targets_mean": 16125.9,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.12810248198558846,
|
|
"grad_norm": 0.4938707791865049,
|
|
"learning_rate": 9.58e-05,
|
|
"loss": 0.6352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6327007412910461,
|
|
"step": 480,
|
|
"valid_targets_mean": 15312.6,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 0.12943688283960503,
|
|
"grad_norm": 0.6100558208938732,
|
|
"learning_rate": 9.68e-05,
|
|
"loss": 0.6405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6266542673110962,
|
|
"step": 485,
|
|
"valid_targets_mean": 15428.1,
|
|
"valid_targets_min": 151
|
|
},
|
|
{
|
|
"epoch": 0.13077128369362157,
|
|
"grad_norm": 0.6172374578198562,
|
|
"learning_rate": 9.779999999999999e-05,
|
|
"loss": 0.6278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6328811049461365,
|
|
"step": 490,
|
|
"valid_targets_mean": 16432.2,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 0.1321056845476381,
|
|
"grad_norm": 0.6025395448579794,
|
|
"learning_rate": 9.879999999999999e-05,
|
|
"loss": 0.6328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6516920924186707,
|
|
"step": 495,
|
|
"valid_targets_mean": 17290.2,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 0.13344008540165467,
|
|
"grad_norm": 0.7137832676715946,
|
|
"learning_rate": 9.979999999999999e-05,
|
|
"loss": 0.6409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6384673118591309,
|
|
"step": 500,
|
|
"valid_targets_mean": 16150.1,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 0.1347744862556712,
|
|
"grad_norm": 0.5110710358565494,
|
|
"learning_rate": 0.0001008,
|
|
"loss": 0.637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6318944692611694,
|
|
"step": 505,
|
|
"valid_targets_mean": 16285.9,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 0.13610888710968774,
|
|
"grad_norm": 0.4387540839417831,
|
|
"learning_rate": 0.00010179999999999998,
|
|
"loss": 0.6318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6122161149978638,
|
|
"step": 510,
|
|
"valid_targets_mean": 17210.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 0.1374432879637043,
|
|
"grad_norm": 0.3564577512761047,
|
|
"learning_rate": 0.00010279999999999999,
|
|
"loss": 0.6263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6141184568405151,
|
|
"step": 515,
|
|
"valid_targets_mean": 15328.4,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 0.13877768881772085,
|
|
"grad_norm": 0.6811113729467204,
|
|
"learning_rate": 0.00010379999999999999,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6268406510353088,
|
|
"step": 520,
|
|
"valid_targets_mean": 17020.2,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 0.14011208967173738,
|
|
"grad_norm": 0.6298148606182943,
|
|
"learning_rate": 0.00010479999999999999,
|
|
"loss": 0.6396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6501023769378662,
|
|
"step": 525,
|
|
"valid_targets_mean": 17929.1,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 0.14144649052575395,
|
|
"grad_norm": 0.4732100884671107,
|
|
"learning_rate": 0.0001058,
|
|
"loss": 0.6326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.668642520904541,
|
|
"step": 530,
|
|
"valid_targets_mean": 15325.7,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 0.14278089137977049,
|
|
"grad_norm": 0.6101088915867378,
|
|
"learning_rate": 0.00010679999999999998,
|
|
"loss": 0.6309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6329489946365356,
|
|
"step": 535,
|
|
"valid_targets_mean": 14211.2,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 0.14411529223378702,
|
|
"grad_norm": 0.6164583822509117,
|
|
"learning_rate": 0.00010779999999999999,
|
|
"loss": 0.643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6332310438156128,
|
|
"step": 540,
|
|
"valid_targets_mean": 16027.2,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 0.1454496930878036,
|
|
"grad_norm": 0.46354108969743496,
|
|
"learning_rate": 0.0001088,
|
|
"loss": 0.6337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6408779621124268,
|
|
"step": 545,
|
|
"valid_targets_mean": 15994.5,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 0.14678409394182013,
|
|
"grad_norm": 0.6728497617605657,
|
|
"learning_rate": 0.00010979999999999999,
|
|
"loss": 0.6524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6410496830940247,
|
|
"step": 550,
|
|
"valid_targets_mean": 15271.2,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 0.14811849479583666,
|
|
"grad_norm": 0.452251096886965,
|
|
"learning_rate": 0.0001108,
|
|
"loss": 0.6142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6183743476867676,
|
|
"step": 555,
|
|
"valid_targets_mean": 16031.0,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.14945289564985323,
|
|
"grad_norm": 0.4940079278296469,
|
|
"learning_rate": 0.00011179999999999998,
|
|
"loss": 0.6373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6272560358047485,
|
|
"step": 560,
|
|
"valid_targets_mean": 15618.4,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 0.15078729650386977,
|
|
"grad_norm": 0.40199346202147485,
|
|
"learning_rate": 0.00011279999999999999,
|
|
"loss": 0.6092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6106986999511719,
|
|
"step": 565,
|
|
"valid_targets_mean": 17251.2,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.1521216973578863,
|
|
"grad_norm": 0.7062551724607816,
|
|
"learning_rate": 0.0001138,
|
|
"loss": 0.627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6443933844566345,
|
|
"step": 570,
|
|
"valid_targets_mean": 15404.6,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.15345609821190287,
|
|
"grad_norm": 0.5295131206766565,
|
|
"learning_rate": 0.00011479999999999999,
|
|
"loss": 0.6334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6361149549484253,
|
|
"step": 575,
|
|
"valid_targets_mean": 16901.2,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 0.1547904990659194,
|
|
"grad_norm": 0.4347384689835901,
|
|
"learning_rate": 0.0001158,
|
|
"loss": 0.6426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6205735206604004,
|
|
"step": 580,
|
|
"valid_targets_mean": 16786.3,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 0.15612489991993594,
|
|
"grad_norm": 0.5775856333359377,
|
|
"learning_rate": 0.00011679999999999998,
|
|
"loss": 0.6461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6474686861038208,
|
|
"step": 585,
|
|
"valid_targets_mean": 14895.8,
|
|
"valid_targets_min": 108
|
|
},
|
|
{
|
|
"epoch": 0.15745930077395248,
|
|
"grad_norm": 0.7282618772499635,
|
|
"learning_rate": 0.00011779999999999999,
|
|
"loss": 0.6436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6281576752662659,
|
|
"step": 590,
|
|
"valid_targets_mean": 14939.4,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 0.15879370162796905,
|
|
"grad_norm": 0.4692524239463527,
|
|
"learning_rate": 0.0001188,
|
|
"loss": 0.6271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6387741565704346,
|
|
"step": 595,
|
|
"valid_targets_mean": 17040.3,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.16012810248198558,
|
|
"grad_norm": 0.5058728730875969,
|
|
"learning_rate": 0.00011979999999999998,
|
|
"loss": 0.6226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.609712541103363,
|
|
"step": 600,
|
|
"valid_targets_mean": 15650.8,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 0.16146250333600212,
|
|
"grad_norm": 0.46751037120907246,
|
|
"learning_rate": 0.0001208,
|
|
"loss": 0.6379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.665306806564331,
|
|
"step": 605,
|
|
"valid_targets_mean": 15767.0,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 0.1627969041900187,
|
|
"grad_norm": 0.41517754616987906,
|
|
"learning_rate": 0.00012179999999999999,
|
|
"loss": 0.6316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5787537097930908,
|
|
"step": 610,
|
|
"valid_targets_mean": 16859.7,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.16413130504403523,
|
|
"grad_norm": 0.45185294422831457,
|
|
"learning_rate": 0.00012279999999999998,
|
|
"loss": 0.6256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6185984015464783,
|
|
"step": 615,
|
|
"valid_targets_mean": 15708.6,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 0.16546570589805176,
|
|
"grad_norm": 0.8923654038299266,
|
|
"learning_rate": 0.0001238,
|
|
"loss": 0.6395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6756001114845276,
|
|
"step": 620,
|
|
"valid_targets_mean": 15895.9,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 0.16680010675206833,
|
|
"grad_norm": 0.6558823229700756,
|
|
"learning_rate": 0.00012479999999999997,
|
|
"loss": 0.6414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6151660680770874,
|
|
"step": 625,
|
|
"valid_targets_mean": 17162.6,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 0.16813450760608487,
|
|
"grad_norm": 0.4618405285160889,
|
|
"learning_rate": 0.0001258,
|
|
"loss": 0.6317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6160898804664612,
|
|
"step": 630,
|
|
"valid_targets_mean": 15995.3,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 0.1694689084601014,
|
|
"grad_norm": 0.5370756194829018,
|
|
"learning_rate": 0.0001268,
|
|
"loss": 0.6339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6276133060455322,
|
|
"step": 635,
|
|
"valid_targets_mean": 15865.4,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.17080330931411797,
|
|
"grad_norm": 0.45147493000289246,
|
|
"learning_rate": 0.0001278,
|
|
"loss": 0.6319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6035290360450745,
|
|
"step": 640,
|
|
"valid_targets_mean": 15300.2,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 0.1721377101681345,
|
|
"grad_norm": 0.4248585296196485,
|
|
"learning_rate": 0.0001288,
|
|
"loss": 0.6239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6245103478431702,
|
|
"step": 645,
|
|
"valid_targets_mean": 16338.7,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 0.17347211102215104,
|
|
"grad_norm": 0.45315242194661487,
|
|
"learning_rate": 0.00012979999999999998,
|
|
"loss": 0.6429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6325228214263916,
|
|
"step": 650,
|
|
"valid_targets_mean": 16460.1,
|
|
"valid_targets_min": 91
|
|
},
|
|
{
|
|
"epoch": 0.1748065118761676,
|
|
"grad_norm": 0.39926687744237155,
|
|
"learning_rate": 0.00013079999999999998,
|
|
"loss": 0.6401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6352002620697021,
|
|
"step": 655,
|
|
"valid_targets_mean": 16758.9,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 0.17614091273018415,
|
|
"grad_norm": 0.5573738851094832,
|
|
"learning_rate": 0.0001318,
|
|
"loss": 0.6239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6075339317321777,
|
|
"step": 660,
|
|
"valid_targets_mean": 16955.9,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 0.17747531358420068,
|
|
"grad_norm": 0.6041112170658741,
|
|
"learning_rate": 0.00013279999999999998,
|
|
"loss": 0.6387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6145156621932983,
|
|
"step": 665,
|
|
"valid_targets_mean": 16214.3,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 0.17880971443821725,
|
|
"grad_norm": 0.6691609500323099,
|
|
"learning_rate": 0.0001338,
|
|
"loss": 0.6424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6259413957595825,
|
|
"step": 670,
|
|
"valid_targets_mean": 15774.5,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 0.1801441152922338,
|
|
"grad_norm": 0.5510175966492971,
|
|
"learning_rate": 0.00013479999999999997,
|
|
"loss": 0.6444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6546534299850464,
|
|
"step": 675,
|
|
"valid_targets_mean": 16692.4,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 0.18147851614625032,
|
|
"grad_norm": 0.45284578631102296,
|
|
"learning_rate": 0.0001358,
|
|
"loss": 0.6242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6178637742996216,
|
|
"step": 680,
|
|
"valid_targets_mean": 15854.1,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 0.1828129170002669,
|
|
"grad_norm": 0.5598143553534308,
|
|
"learning_rate": 0.0001368,
|
|
"loss": 0.6377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6420258283615112,
|
|
"step": 685,
|
|
"valid_targets_mean": 16098.6,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 0.18414731785428343,
|
|
"grad_norm": 0.4898643325722064,
|
|
"learning_rate": 0.0001378,
|
|
"loss": 0.6253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6103246212005615,
|
|
"step": 690,
|
|
"valid_targets_mean": 15935.0,
|
|
"valid_targets_min": 46
|
|
},
|
|
{
|
|
"epoch": 0.18548171870829996,
|
|
"grad_norm": 0.48853693142080784,
|
|
"learning_rate": 0.00013879999999999999,
|
|
"loss": 0.625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6008676886558533,
|
|
"step": 695,
|
|
"valid_targets_mean": 15636.2,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 0.18681611956231653,
|
|
"grad_norm": 0.6347995761312584,
|
|
"learning_rate": 0.00013979999999999998,
|
|
"loss": 0.6324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.653724730014801,
|
|
"step": 700,
|
|
"valid_targets_mean": 14665.4,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 0.18815052041633307,
|
|
"grad_norm": 0.7828402154942533,
|
|
"learning_rate": 0.00014079999999999998,
|
|
"loss": 0.6318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6152931451797485,
|
|
"step": 705,
|
|
"valid_targets_mean": 17113.1,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.1894849212703496,
|
|
"grad_norm": 0.5241474516392837,
|
|
"learning_rate": 0.0001418,
|
|
"loss": 0.6333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6403192281723022,
|
|
"step": 710,
|
|
"valid_targets_mean": 15791.3,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 0.19081932212436617,
|
|
"grad_norm": 0.4632704579471851,
|
|
"learning_rate": 0.00014279999999999997,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6158638000488281,
|
|
"step": 715,
|
|
"valid_targets_mean": 16625.1,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.1921537229783827,
|
|
"grad_norm": 0.46117923760781826,
|
|
"learning_rate": 0.0001438,
|
|
"loss": 0.6072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6315052509307861,
|
|
"step": 720,
|
|
"valid_targets_mean": 15794.5,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 0.19348812383239924,
|
|
"grad_norm": 0.647822630194712,
|
|
"learning_rate": 0.0001448,
|
|
"loss": 0.6215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6225035190582275,
|
|
"step": 725,
|
|
"valid_targets_mean": 15483.7,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 0.1948225246864158,
|
|
"grad_norm": 0.49037071545761785,
|
|
"learning_rate": 0.0001458,
|
|
"loss": 0.6339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6174116134643555,
|
|
"step": 730,
|
|
"valid_targets_mean": 17187.7,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 0.19615692554043235,
|
|
"grad_norm": 1.2177701556230174,
|
|
"learning_rate": 0.0001468,
|
|
"loss": 0.6278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6415305137634277,
|
|
"step": 735,
|
|
"valid_targets_mean": 14950.6,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 0.19749132639444889,
|
|
"grad_norm": 0.6247158077856722,
|
|
"learning_rate": 0.0001478,
|
|
"loss": 0.6382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6371544599533081,
|
|
"step": 740,
|
|
"valid_targets_mean": 14802.8,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 0.19882572724846545,
|
|
"grad_norm": 0.6322408214786107,
|
|
"learning_rate": 0.00014879999999999998,
|
|
"loss": 0.6266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5996054410934448,
|
|
"step": 745,
|
|
"valid_targets_mean": 15770.3,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 0.200160128102482,
|
|
"grad_norm": 0.5899230568781816,
|
|
"learning_rate": 0.00014979999999999998,
|
|
"loss": 0.6311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.648550271987915,
|
|
"step": 750,
|
|
"valid_targets_mean": 15592.2,
|
|
"valid_targets_min": 146
|
|
},
|
|
{
|
|
"epoch": 0.20149452895649853,
|
|
"grad_norm": 0.5356162186310743,
|
|
"learning_rate": 0.00014999986979857214,
|
|
"loss": 0.6291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6485459804534912,
|
|
"step": 755,
|
|
"valid_targets_mean": 15480.7,
|
|
"valid_targets_min": 7
|
|
},
|
|
{
|
|
"epoch": 0.2028289298105151,
|
|
"grad_norm": 0.5996675499754749,
|
|
"learning_rate": 0.00014999934085604638,
|
|
"loss": 0.6301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6440800428390503,
|
|
"step": 760,
|
|
"valid_targets_mean": 15474.6,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 0.20416333066453163,
|
|
"grad_norm": 0.6652446495332728,
|
|
"learning_rate": 0.00014999840503770068,
|
|
"loss": 0.6316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6223655939102173,
|
|
"step": 765,
|
|
"valid_targets_mean": 15384.0,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 0.20549773151854817,
|
|
"grad_norm": 0.5183540862933816,
|
|
"learning_rate": 0.00014999706234861205,
|
|
"loss": 0.643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6405179500579834,
|
|
"step": 770,
|
|
"valid_targets_mean": 15208.1,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 0.20683213237256473,
|
|
"grad_norm": 0.46844848194628597,
|
|
"learning_rate": 0.00014999531279606457,
|
|
"loss": 0.6354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6337324380874634,
|
|
"step": 775,
|
|
"valid_targets_mean": 16525.7,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 0.20816653322658127,
|
|
"grad_norm": 0.671172585627433,
|
|
"learning_rate": 0.00014999315638954965,
|
|
"loss": 0.6348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6305123567581177,
|
|
"step": 780,
|
|
"valid_targets_mean": 17233.3,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 0.2095009340805978,
|
|
"grad_norm": 0.6074185020725967,
|
|
"learning_rate": 0.000149990593140766,
|
|
"loss": 0.6352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6281955242156982,
|
|
"step": 785,
|
|
"valid_targets_mean": 16683.3,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.21083533493461437,
|
|
"grad_norm": 0.7479959234194301,
|
|
"learning_rate": 0.00014998762306361933,
|
|
"loss": 0.6319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6173586249351501,
|
|
"step": 790,
|
|
"valid_targets_mean": 16722.9,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 0.2121697357886309,
|
|
"grad_norm": 0.6232754007313384,
|
|
"learning_rate": 0.00014998424617422253,
|
|
"loss": 0.6247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6114662289619446,
|
|
"step": 795,
|
|
"valid_targets_mean": 17318.8,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 0.21350413664264745,
|
|
"grad_norm": 2.474421684415322,
|
|
"learning_rate": 0.00014998046249089538,
|
|
"loss": 0.6309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.630775511264801,
|
|
"step": 800,
|
|
"valid_targets_mean": 16087.6,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 0.214838537496664,
|
|
"grad_norm": 0.5272341185487777,
|
|
"learning_rate": 0.00014997627203416458,
|
|
"loss": 0.627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6349064111709595,
|
|
"step": 805,
|
|
"valid_targets_mean": 15673.0,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 0.21617293835068055,
|
|
"grad_norm": 0.5074946180799155,
|
|
"learning_rate": 0.00014997167482676366,
|
|
"loss": 0.6285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.653042197227478,
|
|
"step": 810,
|
|
"valid_targets_mean": 14152.6,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.2175073392046971,
|
|
"grad_norm": 0.6304000671143047,
|
|
"learning_rate": 0.00014996667089363272,
|
|
"loss": 0.6266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6242920160293579,
|
|
"step": 815,
|
|
"valid_targets_mean": 15896.4,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 0.21884174005871362,
|
|
"grad_norm": 0.4929101127177894,
|
|
"learning_rate": 0.00014996126026191832,
|
|
"loss": 0.6425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6394720077514648,
|
|
"step": 820,
|
|
"valid_targets_mean": 16951.1,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 0.2201761409127302,
|
|
"grad_norm": 0.4831309890625364,
|
|
"learning_rate": 0.00014995544296097355,
|
|
"loss": 0.6282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6423969864845276,
|
|
"step": 825,
|
|
"valid_targets_mean": 14961.2,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 0.22151054176674673,
|
|
"grad_norm": 0.5671978591902495,
|
|
"learning_rate": 0.00014994921902235757,
|
|
"loss": 0.6304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6383193135261536,
|
|
"step": 830,
|
|
"valid_targets_mean": 15189.4,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 0.22284494262076326,
|
|
"grad_norm": 0.5408721097421353,
|
|
"learning_rate": 0.0001499425884798356,
|
|
"loss": 0.62,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6238391399383545,
|
|
"step": 835,
|
|
"valid_targets_mean": 15876.2,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 0.22417934347477983,
|
|
"grad_norm": 0.5091528558852999,
|
|
"learning_rate": 0.00014993555136937872,
|
|
"loss": 0.6298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.616040825843811,
|
|
"step": 840,
|
|
"valid_targets_mean": 16239.6,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 0.22551374432879637,
|
|
"grad_norm": 0.6991679506883416,
|
|
"learning_rate": 0.0001499281077291637,
|
|
"loss": 0.6358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6669756174087524,
|
|
"step": 845,
|
|
"valid_targets_mean": 15986.7,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 0.2268481451828129,
|
|
"grad_norm": 2.418185394050847,
|
|
"learning_rate": 0.00014992025759957267,
|
|
"loss": 0.6569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6653257012367249,
|
|
"step": 850,
|
|
"valid_targets_mean": 15783.3,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 0.22818254603682947,
|
|
"grad_norm": 0.9727676116589085,
|
|
"learning_rate": 0.0001499120010231931,
|
|
"loss": 0.6297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6188017129898071,
|
|
"step": 855,
|
|
"valid_targets_mean": 16147.9,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 0.229516946890846,
|
|
"grad_norm": 0.9375704480379955,
|
|
"learning_rate": 0.00014990333804481738,
|
|
"loss": 0.6312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6338417530059814,
|
|
"step": 860,
|
|
"valid_targets_mean": 16900.2,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 0.23085134774486255,
|
|
"grad_norm": 0.6205540371276664,
|
|
"learning_rate": 0.00014989426871144266,
|
|
"loss": 0.6304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6344268321990967,
|
|
"step": 865,
|
|
"valid_targets_mean": 16510.0,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 0.2321857485988791,
|
|
"grad_norm": 0.3787711944460734,
|
|
"learning_rate": 0.00014988479307227062,
|
|
"loss": 0.6153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6353251338005066,
|
|
"step": 870,
|
|
"valid_targets_mean": 14899.5,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 0.23352014945289565,
|
|
"grad_norm": 0.526714631887211,
|
|
"learning_rate": 0.00014987491117870717,
|
|
"loss": 0.632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6151652336120605,
|
|
"step": 875,
|
|
"valid_targets_mean": 16548.4,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 0.23485455030691219,
|
|
"grad_norm": 0.9469141187868311,
|
|
"learning_rate": 0.00014986462308436214,
|
|
"loss": 0.619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6323580741882324,
|
|
"step": 880,
|
|
"valid_targets_mean": 14792.8,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 0.23618895116092875,
|
|
"grad_norm": 0.5545869906721276,
|
|
"learning_rate": 0.00014985392884504903,
|
|
"loss": 0.6327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6401712894439697,
|
|
"step": 885,
|
|
"valid_targets_mean": 15866.4,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 0.2375233520149453,
|
|
"grad_norm": 0.6671209209088051,
|
|
"learning_rate": 0.00014984282851878477,
|
|
"loss": 0.6277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6353170871734619,
|
|
"step": 890,
|
|
"valid_targets_mean": 16787.9,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 0.23885775286896183,
|
|
"grad_norm": 0.707216009088664,
|
|
"learning_rate": 0.00014983132216578923,
|
|
"loss": 0.6277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6237537860870361,
|
|
"step": 895,
|
|
"valid_targets_mean": 16321.5,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 0.2401921537229784,
|
|
"grad_norm": 0.44220574683340125,
|
|
"learning_rate": 0.00014981940984848508,
|
|
"loss": 0.6095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5769079923629761,
|
|
"step": 900,
|
|
"valid_targets_mean": 16333.7,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 0.24152655457699493,
|
|
"grad_norm": 0.567649333484972,
|
|
"learning_rate": 0.00014980709163149732,
|
|
"loss": 0.6352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6523852944374084,
|
|
"step": 905,
|
|
"valid_targets_mean": 15671.2,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 0.24286095543101147,
|
|
"grad_norm": 0.39550289799836713,
|
|
"learning_rate": 0.000149794367581653,
|
|
"loss": 0.6286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6379982233047485,
|
|
"step": 910,
|
|
"valid_targets_mean": 16212.5,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 0.24419535628502803,
|
|
"grad_norm": 0.5657359448312539,
|
|
"learning_rate": 0.00014978123776798082,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6022660732269287,
|
|
"step": 915,
|
|
"valid_targets_mean": 17185.0,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 0.24552975713904457,
|
|
"grad_norm": 0.46551913820916074,
|
|
"learning_rate": 0.00014976770226171084,
|
|
"loss": 0.6211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5838291049003601,
|
|
"step": 920,
|
|
"valid_targets_mean": 17635.7,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 0.2468641579930611,
|
|
"grad_norm": 0.628573995916106,
|
|
"learning_rate": 0.00014975376113627394,
|
|
"loss": 0.6243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6010740399360657,
|
|
"step": 925,
|
|
"valid_targets_mean": 17160.6,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 0.24819855884707767,
|
|
"grad_norm": 0.5886488234344668,
|
|
"learning_rate": 0.00014973941446730154,
|
|
"loss": 0.6417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6382604837417603,
|
|
"step": 930,
|
|
"valid_targets_mean": 16632.9,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 0.2495329597010942,
|
|
"grad_norm": 0.48658287775706377,
|
|
"learning_rate": 0.00014972466233262517,
|
|
"loss": 0.6268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6358413696289062,
|
|
"step": 935,
|
|
"valid_targets_mean": 16748.2,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 0.2508673605551108,
|
|
"grad_norm": 0.6165528347204787,
|
|
"learning_rate": 0.00014970950481227603,
|
|
"loss": 0.6271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6221519708633423,
|
|
"step": 940,
|
|
"valid_targets_mean": 15600.3,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 0.2522017614091273,
|
|
"grad_norm": 0.5163784973495187,
|
|
"learning_rate": 0.00014969394198848456,
|
|
"loss": 0.6291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6757415533065796,
|
|
"step": 945,
|
|
"valid_targets_mean": 14300.9,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 0.25353616226314385,
|
|
"grad_norm": 0.4121567337420054,
|
|
"learning_rate": 0.00014967797394567993,
|
|
"loss": 0.6102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5853270292282104,
|
|
"step": 950,
|
|
"valid_targets_mean": 16034.4,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 0.2548705631171604,
|
|
"grad_norm": 0.7577521485230333,
|
|
"learning_rate": 0.00014966160077048982,
|
|
"loss": 0.6088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6212820410728455,
|
|
"step": 955,
|
|
"valid_targets_mean": 16407.6,
|
|
"valid_targets_min": 112
|
|
},
|
|
{
|
|
"epoch": 0.2562049639711769,
|
|
"grad_norm": 0.5797059241277471,
|
|
"learning_rate": 0.00014964482255173958,
|
|
"loss": 0.6355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6175366640090942,
|
|
"step": 960,
|
|
"valid_targets_mean": 15787.3,
|
|
"valid_targets_min": 17
|
|
},
|
|
{
|
|
"epoch": 0.25753936482519346,
|
|
"grad_norm": 0.6745773228660589,
|
|
"learning_rate": 0.00014962763938045206,
|
|
"loss": 0.6343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6257587671279907,
|
|
"step": 965,
|
|
"valid_targets_mean": 14691.4,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 0.25887376567921005,
|
|
"grad_norm": 0.719661214105207,
|
|
"learning_rate": 0.00014961005134984693,
|
|
"loss": 0.6238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6064175367355347,
|
|
"step": 970,
|
|
"valid_targets_mean": 17340.3,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 0.2602081665332266,
|
|
"grad_norm": 0.7928522671161337,
|
|
"learning_rate": 0.00014959205855534036,
|
|
"loss": 0.6215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6479929685592651,
|
|
"step": 975,
|
|
"valid_targets_mean": 16809.3,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 0.26154256738724313,
|
|
"grad_norm": 0.42078558451857806,
|
|
"learning_rate": 0.00014957366109454427,
|
|
"loss": 0.6165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6084697246551514,
|
|
"step": 980,
|
|
"valid_targets_mean": 16258.8,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.26287696824125967,
|
|
"grad_norm": 0.5847517399099696,
|
|
"learning_rate": 0.00014955485906726596,
|
|
"loss": 0.631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6284216642379761,
|
|
"step": 985,
|
|
"valid_targets_mean": 15576.3,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 0.2642113690952762,
|
|
"grad_norm": 0.5762291412718675,
|
|
"learning_rate": 0.00014953565257550756,
|
|
"loss": 0.6403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.620425283908844,
|
|
"step": 990,
|
|
"valid_targets_mean": 16763.7,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 0.26554576994929274,
|
|
"grad_norm": 0.6979661040448024,
|
|
"learning_rate": 0.00014951604172346535,
|
|
"loss": 0.6321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6719156503677368,
|
|
"step": 995,
|
|
"valid_targets_mean": 14564.9,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.26688017080330934,
|
|
"grad_norm": 0.6488310360831638,
|
|
"learning_rate": 0.00014949602661752944,
|
|
"loss": 0.6361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6312586069107056,
|
|
"step": 1000,
|
|
"valid_targets_mean": 17267.3,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 0.2682145716573259,
|
|
"grad_norm": 0.7100211577778035,
|
|
"learning_rate": 0.0001494756073662829,
|
|
"loss": 0.6048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5785956978797913,
|
|
"step": 1005,
|
|
"valid_targets_mean": 16819.7,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 0.2695489725113424,
|
|
"grad_norm": 0.5186043844844956,
|
|
"learning_rate": 0.00014945478408050135,
|
|
"loss": 0.6241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5791152119636536,
|
|
"step": 1010,
|
|
"valid_targets_mean": 17192.7,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 0.27088337336535895,
|
|
"grad_norm": 0.4865216909323894,
|
|
"learning_rate": 0.00014943355687315239,
|
|
"loss": 0.6346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6338467597961426,
|
|
"step": 1015,
|
|
"valid_targets_mean": 15711.4,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 0.2722177742193755,
|
|
"grad_norm": 2.9839267781053938,
|
|
"learning_rate": 0.0001494119258593948,
|
|
"loss": 0.6198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6054257154464722,
|
|
"step": 1020,
|
|
"valid_targets_mean": 16202.0,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 0.273552175073392,
|
|
"grad_norm": 0.4682102838085987,
|
|
"learning_rate": 0.00014938989115657815,
|
|
"loss": 0.6438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6480819582939148,
|
|
"step": 1025,
|
|
"valid_targets_mean": 16784.9,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 0.2748865759274086,
|
|
"grad_norm": 0.5780484239113524,
|
|
"learning_rate": 0.00014936745288424198,
|
|
"loss": 0.6411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6359916925430298,
|
|
"step": 1030,
|
|
"valid_targets_mean": 16367.8,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 0.27622097678142515,
|
|
"grad_norm": 0.4748800236035067,
|
|
"learning_rate": 0.0001493446111641152,
|
|
"loss": 0.6388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6322864294052124,
|
|
"step": 1035,
|
|
"valid_targets_mean": 17037.6,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 0.2775553776354417,
|
|
"grad_norm": 0.5475367460557771,
|
|
"learning_rate": 0.00014932136612011554,
|
|
"loss": 0.619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5975278615951538,
|
|
"step": 1040,
|
|
"valid_targets_mean": 14834.2,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 0.27888977848945823,
|
|
"grad_norm": 0.5803392107957903,
|
|
"learning_rate": 0.00014929771787834868,
|
|
"loss": 0.6239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6173809170722961,
|
|
"step": 1045,
|
|
"valid_targets_mean": 14989.7,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 0.28022417934347477,
|
|
"grad_norm": 0.5695332004442396,
|
|
"learning_rate": 0.00014927366656710772,
|
|
"loss": 0.6361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6184433102607727,
|
|
"step": 1050,
|
|
"valid_targets_mean": 16500.4,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 0.2815585801974913,
|
|
"grad_norm": 0.5210619813808601,
|
|
"learning_rate": 0.00014924921231687245,
|
|
"loss": 0.6446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6179799437522888,
|
|
"step": 1055,
|
|
"valid_targets_mean": 16106.9,
|
|
"valid_targets_min": 119
|
|
},
|
|
{
|
|
"epoch": 0.2828929810515079,
|
|
"grad_norm": 0.6614658274322637,
|
|
"learning_rate": 0.0001492243552603086,
|
|
"loss": 0.6279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6067185997962952,
|
|
"step": 1060,
|
|
"valid_targets_mean": 16331.0,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 0.28422738190552443,
|
|
"grad_norm": 0.9608258567454809,
|
|
"learning_rate": 0.00014919909553226716,
|
|
"loss": 0.629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5961272716522217,
|
|
"step": 1065,
|
|
"valid_targets_mean": 17423.3,
|
|
"valid_targets_min": 43
|
|
},
|
|
{
|
|
"epoch": 0.28556178275954097,
|
|
"grad_norm": 0.7810276450704127,
|
|
"learning_rate": 0.00014917343326978366,
|
|
"loss": 0.626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6217774152755737,
|
|
"step": 1070,
|
|
"valid_targets_mean": 16327.2,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 0.2868961836135575,
|
|
"grad_norm": 0.8791876590438911,
|
|
"learning_rate": 0.00014914736861207733,
|
|
"loss": 0.614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6118861436843872,
|
|
"step": 1075,
|
|
"valid_targets_mean": 16543.7,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 0.28823058446757405,
|
|
"grad_norm": 1.1384137467784206,
|
|
"learning_rate": 0.0001491209017005505,
|
|
"loss": 0.634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6420910358428955,
|
|
"step": 1080,
|
|
"valid_targets_mean": 15269.9,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 0.2895649853215906,
|
|
"grad_norm": 0.9186506276668647,
|
|
"learning_rate": 0.00014909403267878771,
|
|
"loss": 0.6226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6265588998794556,
|
|
"step": 1085,
|
|
"valid_targets_mean": 15304.5,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 0.2908993861756072,
|
|
"grad_norm": 0.6353711384570834,
|
|
"learning_rate": 0.000149066761692555,
|
|
"loss": 0.6507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6470847129821777,
|
|
"step": 1090,
|
|
"valid_targets_mean": 15632.7,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 0.2922337870296237,
|
|
"grad_norm": 0.8185383898766663,
|
|
"learning_rate": 0.00014903908888979904,
|
|
"loss": 0.6703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.647415816783905,
|
|
"step": 1095,
|
|
"valid_targets_mean": 16790.4,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 0.29356818788364025,
|
|
"grad_norm": 0.8184426394736861,
|
|
"learning_rate": 0.00014901101442064637,
|
|
"loss": 0.6394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6400127410888672,
|
|
"step": 1100,
|
|
"valid_targets_mean": 16172.6,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.2949025887376568,
|
|
"grad_norm": 0.556673335317666,
|
|
"learning_rate": 0.00014898253843740271,
|
|
"loss": 0.6191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6126173734664917,
|
|
"step": 1105,
|
|
"valid_targets_mean": 16876.9,
|
|
"valid_targets_min": 63
|
|
},
|
|
{
|
|
"epoch": 0.2962369895916733,
|
|
"grad_norm": 0.6295377941122368,
|
|
"learning_rate": 0.00014895366109455189,
|
|
"loss": 0.6227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6384031772613525,
|
|
"step": 1110,
|
|
"valid_targets_mean": 14548.6,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 0.29757139044568987,
|
|
"grad_norm": 0.5248335147680199,
|
|
"learning_rate": 0.00014892438254875522,
|
|
"loss": 0.6332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6321662068367004,
|
|
"step": 1115,
|
|
"valid_targets_mean": 15943.8,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 0.29890579129970646,
|
|
"grad_norm": 0.46918917861931686,
|
|
"learning_rate": 0.00014889470295885047,
|
|
"loss": 0.6263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6436111927032471,
|
|
"step": 1120,
|
|
"valid_targets_mean": 15795.2,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 0.300240192153723,
|
|
"grad_norm": 0.5296984184134944,
|
|
"learning_rate": 0.0001488646224858512,
|
|
"loss": 0.622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6454466581344604,
|
|
"step": 1125,
|
|
"valid_targets_mean": 16186.7,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.30157459300773953,
|
|
"grad_norm": 0.45293649555504717,
|
|
"learning_rate": 0.00014883414129294575,
|
|
"loss": 0.6249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6144802570343018,
|
|
"step": 1130,
|
|
"valid_targets_mean": 16900.4,
|
|
"valid_targets_min": 208
|
|
},
|
|
{
|
|
"epoch": 0.30290899386175607,
|
|
"grad_norm": 0.6521647010742185,
|
|
"learning_rate": 0.00014880325954549635,
|
|
"loss": 0.6111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6205244064331055,
|
|
"step": 1135,
|
|
"valid_targets_mean": 16414.0,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 0.3042433947157726,
|
|
"grad_norm": 0.47504287072364365,
|
|
"learning_rate": 0.00014877197741103827,
|
|
"loss": 0.6169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6137683391571045,
|
|
"step": 1140,
|
|
"valid_targets_mean": 16874.0,
|
|
"valid_targets_min": 20
|
|
},
|
|
{
|
|
"epoch": 0.30557779556978915,
|
|
"grad_norm": 0.48482488804825913,
|
|
"learning_rate": 0.00014874029505927897,
|
|
"loss": 0.628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.600907564163208,
|
|
"step": 1145,
|
|
"valid_targets_mean": 15816.4,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 0.30691219642380574,
|
|
"grad_norm": 0.590376880482206,
|
|
"learning_rate": 0.00014870821266209705,
|
|
"loss": 0.6389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6424787640571594,
|
|
"step": 1150,
|
|
"valid_targets_mean": 16383.6,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 0.3082465972778223,
|
|
"grad_norm": 0.7933903651046492,
|
|
"learning_rate": 0.00014867573039354138,
|
|
"loss": 0.636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.624193012714386,
|
|
"step": 1155,
|
|
"valid_targets_mean": 16836.2,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 0.3095809981318388,
|
|
"grad_norm": 0.9457148294169705,
|
|
"learning_rate": 0.00014864284842983018,
|
|
"loss": 0.6384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6270631551742554,
|
|
"step": 1160,
|
|
"valid_targets_mean": 17508.7,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 0.31091539898585535,
|
|
"grad_norm": 0.7378226203734249,
|
|
"learning_rate": 0.00014860956694935003,
|
|
"loss": 0.6175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5961403846740723,
|
|
"step": 1165,
|
|
"valid_targets_mean": 15587.0,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 0.3122497998398719,
|
|
"grad_norm": 0.8823224927738156,
|
|
"learning_rate": 0.0001485758861326549,
|
|
"loss": 0.611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6161226034164429,
|
|
"step": 1170,
|
|
"valid_targets_mean": 15393.7,
|
|
"valid_targets_min": 213
|
|
},
|
|
{
|
|
"epoch": 0.3135842006938884,
|
|
"grad_norm": 0.639113418443212,
|
|
"learning_rate": 0.00014854180616246523,
|
|
"loss": 0.6145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6032258868217468,
|
|
"step": 1175,
|
|
"valid_targets_mean": 14761.5,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 0.31491860154790496,
|
|
"grad_norm": 0.9033165887917867,
|
|
"learning_rate": 0.00014850732722366682,
|
|
"loss": 0.6354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6307148933410645,
|
|
"step": 1180,
|
|
"valid_targets_mean": 16037.6,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 0.31625300240192156,
|
|
"grad_norm": 0.5532447648291372,
|
|
"learning_rate": 0.00014847244950330998,
|
|
"loss": 0.627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6076141595840454,
|
|
"step": 1185,
|
|
"valid_targets_mean": 15901.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 0.3175874032559381,
|
|
"grad_norm": 0.5412595599268045,
|
|
"learning_rate": 0.00014843717319060833,
|
|
"loss": 0.6318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.623320460319519,
|
|
"step": 1190,
|
|
"valid_targets_mean": 15982.0,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 0.31892180410995463,
|
|
"grad_norm": 0.712340033605185,
|
|
"learning_rate": 0.00014840149847693794,
|
|
"loss": 0.6272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6320945024490356,
|
|
"step": 1195,
|
|
"valid_targets_mean": 16020.9,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 0.32025620496397117,
|
|
"grad_norm": 0.5170839825925028,
|
|
"learning_rate": 0.00014836542555583628,
|
|
"loss": 0.6184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6051146984100342,
|
|
"step": 1200,
|
|
"valid_targets_mean": 15806.9,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.3215906058179877,
|
|
"grad_norm": 0.42832852584741143,
|
|
"learning_rate": 0.000148328954623001,
|
|
"loss": 0.6301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6123482584953308,
|
|
"step": 1205,
|
|
"valid_targets_mean": 16524.5,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.32292500667200424,
|
|
"grad_norm": 0.4873892662092641,
|
|
"learning_rate": 0.00014829208587628908,
|
|
"loss": 0.6165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6147938370704651,
|
|
"step": 1210,
|
|
"valid_targets_mean": 15223.3,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 0.32425940752602084,
|
|
"grad_norm": 0.493152065483101,
|
|
"learning_rate": 0.0001482548195157156,
|
|
"loss": 0.6172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6495429277420044,
|
|
"step": 1215,
|
|
"valid_targets_mean": 14667.0,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 0.3255938083800374,
|
|
"grad_norm": 0.5157810714523037,
|
|
"learning_rate": 0.00014821715574345277,
|
|
"loss": 0.6091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6138304471969604,
|
|
"step": 1220,
|
|
"valid_targets_mean": 16354.4,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 0.3269282092340539,
|
|
"grad_norm": 0.6056960255078041,
|
|
"learning_rate": 0.0001481790947638288,
|
|
"loss": 0.6176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6478211283683777,
|
|
"step": 1225,
|
|
"valid_targets_mean": 15326.2,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 0.32826261008807045,
|
|
"grad_norm": 0.45968806774958004,
|
|
"learning_rate": 0.00014814063678332667,
|
|
"loss": 0.6144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6101811528205872,
|
|
"step": 1230,
|
|
"valid_targets_mean": 16237.8,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 0.329597010942087,
|
|
"grad_norm": 0.466696207487788,
|
|
"learning_rate": 0.00014810178201058323,
|
|
"loss": 0.6105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6029187440872192,
|
|
"step": 1235,
|
|
"valid_targets_mean": 15636.9,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 0.3309314117961035,
|
|
"grad_norm": 0.4547794279301651,
|
|
"learning_rate": 0.00014806253065638786,
|
|
"loss": 0.6111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5900842547416687,
|
|
"step": 1240,
|
|
"valid_targets_mean": 17622.9,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 0.3322658126501201,
|
|
"grad_norm": 0.5035430798808832,
|
|
"learning_rate": 0.00014802288293368148,
|
|
"loss": 0.6128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6072174310684204,
|
|
"step": 1245,
|
|
"valid_targets_mean": 16158.9,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 0.33360021350413666,
|
|
"grad_norm": 0.5291824098972122,
|
|
"learning_rate": 0.0001479828390575553,
|
|
"loss": 0.6247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6279348134994507,
|
|
"step": 1250,
|
|
"valid_targets_mean": 15845.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.3349346143581532,
|
|
"grad_norm": 0.4913251882169057,
|
|
"learning_rate": 0.00014794239924524968,
|
|
"loss": 0.6225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6031299233436584,
|
|
"step": 1255,
|
|
"valid_targets_mean": 16978.3,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 0.33626901521216973,
|
|
"grad_norm": 0.49690910233477986,
|
|
"learning_rate": 0.00014790156371615303,
|
|
"loss": 0.6246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6335940361022949,
|
|
"step": 1260,
|
|
"valid_targets_mean": 16518.6,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 0.33760341606618627,
|
|
"grad_norm": 0.5468966794986143,
|
|
"learning_rate": 0.00014786033269180044,
|
|
"loss": 0.6068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6059638261795044,
|
|
"step": 1265,
|
|
"valid_targets_mean": 16844.9,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 0.3389378169202028,
|
|
"grad_norm": 0.4983062105935614,
|
|
"learning_rate": 0.00014781870639587262,
|
|
"loss": 0.6282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6372385025024414,
|
|
"step": 1270,
|
|
"valid_targets_mean": 15898.1,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 0.3402722177742194,
|
|
"grad_norm": 0.3892479665426188,
|
|
"learning_rate": 0.0001477766850541947,
|
|
"loss": 0.6341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6155767440795898,
|
|
"step": 1275,
|
|
"valid_targets_mean": 17163.2,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 0.34160661862823594,
|
|
"grad_norm": 0.652787264480223,
|
|
"learning_rate": 0.00014773426889473493,
|
|
"loss": 0.629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.622187077999115,
|
|
"step": 1280,
|
|
"valid_targets_mean": 15787.9,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 0.3429410194822525,
|
|
"grad_norm": 0.5070064814221472,
|
|
"learning_rate": 0.0001476914581476034,
|
|
"loss": 0.6411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6333944797515869,
|
|
"step": 1285,
|
|
"valid_targets_mean": 16448.7,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 0.344275420336269,
|
|
"grad_norm": 0.48844856808258474,
|
|
"learning_rate": 0.000147648253045051,
|
|
"loss": 0.616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5810629725456238,
|
|
"step": 1290,
|
|
"valid_targets_mean": 16389.3,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 0.34560982119028555,
|
|
"grad_norm": 0.41518816293543753,
|
|
"learning_rate": 0.0001476046538214679,
|
|
"loss": 0.6124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6386691331863403,
|
|
"step": 1295,
|
|
"valid_targets_mean": 14671.7,
|
|
"valid_targets_min": 156
|
|
},
|
|
{
|
|
"epoch": 0.3469442220443021,
|
|
"grad_norm": 0.47948509536370426,
|
|
"learning_rate": 0.00014756066071338247,
|
|
"loss": 0.6378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6576275825500488,
|
|
"step": 1300,
|
|
"valid_targets_mean": 15235.0,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 0.3482786228983187,
|
|
"grad_norm": 0.4595820910884694,
|
|
"learning_rate": 0.00014751627395945984,
|
|
"loss": 0.6217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6192502379417419,
|
|
"step": 1305,
|
|
"valid_targets_mean": 16238.1,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 0.3496130237523352,
|
|
"grad_norm": 0.4291054247965342,
|
|
"learning_rate": 0.0001474714938005008,
|
|
"loss": 0.6174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6437174081802368,
|
|
"step": 1310,
|
|
"valid_targets_mean": 15684.9,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 0.35094742460635175,
|
|
"grad_norm": 0.39671682397098906,
|
|
"learning_rate": 0.00014742632047944033,
|
|
"loss": 0.637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6374814510345459,
|
|
"step": 1315,
|
|
"valid_targets_mean": 15847.1,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 0.3522818254603683,
|
|
"grad_norm": 0.47859741034274256,
|
|
"learning_rate": 0.00014738075424134634,
|
|
"loss": 0.6263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6308225989341736,
|
|
"step": 1320,
|
|
"valid_targets_mean": 15888.9,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 0.35361622631438483,
|
|
"grad_norm": 0.6507671044429603,
|
|
"learning_rate": 0.00014733479533341837,
|
|
"loss": 0.6195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6275373697280884,
|
|
"step": 1325,
|
|
"valid_targets_mean": 15963.1,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 0.35495062716840137,
|
|
"grad_norm": 0.6399738378817154,
|
|
"learning_rate": 0.00014728844400498616,
|
|
"loss": 0.6254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6041469573974609,
|
|
"step": 1330,
|
|
"valid_targets_mean": 17098.5,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 0.35628502802241796,
|
|
"grad_norm": 0.4984880266031328,
|
|
"learning_rate": 0.00014724170050750836,
|
|
"loss": 0.6215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6045820713043213,
|
|
"step": 1335,
|
|
"valid_targets_mean": 16711.2,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.3576194288764345,
|
|
"grad_norm": 0.5423169689023574,
|
|
"learning_rate": 0.00014719456509457122,
|
|
"loss": 0.6297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6203999519348145,
|
|
"step": 1340,
|
|
"valid_targets_mean": 15233.5,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 0.35895382973045104,
|
|
"grad_norm": 0.4838132943128504,
|
|
"learning_rate": 0.00014714703802188713,
|
|
"loss": 0.6265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6400591135025024,
|
|
"step": 1345,
|
|
"valid_targets_mean": 16153.9,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 0.3602882305844676,
|
|
"grad_norm": 0.413527598858469,
|
|
"learning_rate": 0.0001470991195472932,
|
|
"loss": 0.6318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5825143456459045,
|
|
"step": 1350,
|
|
"valid_targets_mean": 17444.9,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 0.3616226314384841,
|
|
"grad_norm": 0.49583200763125385,
|
|
"learning_rate": 0.00014705080993075,
|
|
"loss": 0.6186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6185584664344788,
|
|
"step": 1355,
|
|
"valid_targets_mean": 16056.9,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 0.36295703229250065,
|
|
"grad_norm": 0.4207625378865751,
|
|
"learning_rate": 0.00014700210943433998,
|
|
"loss": 0.6284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6178081035614014,
|
|
"step": 1360,
|
|
"valid_targets_mean": 16616.0,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 0.36429143314651724,
|
|
"grad_norm": 0.47838668743172985,
|
|
"learning_rate": 0.00014695301832226627,
|
|
"loss": 0.6353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6175183057785034,
|
|
"step": 1365,
|
|
"valid_targets_mean": 15540.1,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 0.3656258340005338,
|
|
"grad_norm": 0.44229725453471697,
|
|
"learning_rate": 0.00014690353686085098,
|
|
"loss": 0.6165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6205130815505981,
|
|
"step": 1370,
|
|
"valid_targets_mean": 15817.3,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 0.3669602348545503,
|
|
"grad_norm": 0.46623196564659064,
|
|
"learning_rate": 0.00014685366531853395,
|
|
"loss": 0.6232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6131946444511414,
|
|
"step": 1375,
|
|
"valid_targets_mean": 15407.4,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.36829463570856685,
|
|
"grad_norm": 0.44006049355952626,
|
|
"learning_rate": 0.00014680340396587118,
|
|
"loss": 0.6233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6441381573677063,
|
|
"step": 1380,
|
|
"valid_targets_mean": 15845.8,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 0.3696290365625834,
|
|
"grad_norm": 0.6111464695074394,
|
|
"learning_rate": 0.0001467527530755335,
|
|
"loss": 0.6084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6167534589767456,
|
|
"step": 1385,
|
|
"valid_targets_mean": 16684.4,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 0.37096343741659993,
|
|
"grad_norm": 0.8324813489326134,
|
|
"learning_rate": 0.000146701712922305,
|
|
"loss": 0.6202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.612213671207428,
|
|
"step": 1390,
|
|
"valid_targets_mean": 16478.6,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 0.37229783827061647,
|
|
"grad_norm": 0.670291546771116,
|
|
"learning_rate": 0.00014665028378308138,
|
|
"loss": 0.6251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6236619353294373,
|
|
"step": 1395,
|
|
"valid_targets_mean": 15837.5,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 0.37363223912463306,
|
|
"grad_norm": 0.8512140790165293,
|
|
"learning_rate": 0.00014659846593686885,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6356359124183655,
|
|
"step": 1400,
|
|
"valid_targets_mean": 15852.3,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 0.3749666399786496,
|
|
"grad_norm": 0.5723739765713398,
|
|
"learning_rate": 0.0001465462596647822,
|
|
"loss": 0.6298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6180834770202637,
|
|
"step": 1405,
|
|
"valid_targets_mean": 15452.5,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 0.37630104083266613,
|
|
"grad_norm": 0.6075008541552803,
|
|
"learning_rate": 0.0001464936652500435,
|
|
"loss": 0.6008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6029921770095825,
|
|
"step": 1410,
|
|
"valid_targets_mean": 16629.1,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 0.37763544168668267,
|
|
"grad_norm": 0.4676746690221598,
|
|
"learning_rate": 0.0001464406829779806,
|
|
"loss": 0.6157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6156923770904541,
|
|
"step": 1415,
|
|
"valid_targets_mean": 15352.8,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 0.3789698425406992,
|
|
"grad_norm": 0.5817747303529482,
|
|
"learning_rate": 0.0001463873131360254,
|
|
"loss": 0.6106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6119555234909058,
|
|
"step": 1420,
|
|
"valid_targets_mean": 16914.2,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.38030424339471575,
|
|
"grad_norm": 0.432992609011377,
|
|
"learning_rate": 0.0001463335560137124,
|
|
"loss": 0.6156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6189119815826416,
|
|
"step": 1425,
|
|
"valid_targets_mean": 16088.6,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 0.38163864424873234,
|
|
"grad_norm": 0.40579421448961717,
|
|
"learning_rate": 0.00014627941190267717,
|
|
"loss": 0.6152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6119743585586548,
|
|
"step": 1430,
|
|
"valid_targets_mean": 16528.0,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 0.3829730451027489,
|
|
"grad_norm": 0.6198382928720174,
|
|
"learning_rate": 0.00014622488109665468,
|
|
"loss": 0.6285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6378297805786133,
|
|
"step": 1435,
|
|
"valid_targets_mean": 16510.3,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 0.3843074459567654,
|
|
"grad_norm": 0.5220468831850059,
|
|
"learning_rate": 0.0001461699638914777,
|
|
"loss": 0.6203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6250759363174438,
|
|
"step": 1440,
|
|
"valid_targets_mean": 15598.2,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 0.38564184681078195,
|
|
"grad_norm": 0.5528435347529946,
|
|
"learning_rate": 0.00014611466058507536,
|
|
"loss": 0.616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6076961755752563,
|
|
"step": 1445,
|
|
"valid_targets_mean": 15165.0,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 0.3869762476647985,
|
|
"grad_norm": 0.4755246533026965,
|
|
"learning_rate": 0.00014605897147747132,
|
|
"loss": 0.6251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6179888248443604,
|
|
"step": 1450,
|
|
"valid_targets_mean": 15599.8,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 0.388310648518815,
|
|
"grad_norm": 0.4844726542546722,
|
|
"learning_rate": 0.0001460028968707822,
|
|
"loss": 0.6302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6419791579246521,
|
|
"step": 1455,
|
|
"valid_targets_mean": 14379.4,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 0.3896450493728316,
|
|
"grad_norm": 0.48494595176664035,
|
|
"learning_rate": 0.0001459464370692161,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6191474795341492,
|
|
"step": 1460,
|
|
"valid_targets_mean": 16336.0,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.39097945022684816,
|
|
"grad_norm": 0.6255867368178756,
|
|
"learning_rate": 0.0001458895923790707,
|
|
"loss": 0.6212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6456395387649536,
|
|
"step": 1465,
|
|
"valid_targets_mean": 15574.0,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 0.3923138510808647,
|
|
"grad_norm": 0.4644640635135548,
|
|
"learning_rate": 0.0001458323631087318,
|
|
"loss": 0.6206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6031760573387146,
|
|
"step": 1470,
|
|
"valid_targets_mean": 16420.4,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 0.39364825193488123,
|
|
"grad_norm": 0.387187368792563,
|
|
"learning_rate": 0.00014577474956867155,
|
|
"loss": 0.6028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6073260307312012,
|
|
"step": 1475,
|
|
"valid_targets_mean": 15389.3,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 0.39498265278889777,
|
|
"grad_norm": 0.57060581290841,
|
|
"learning_rate": 0.00014571675207144676,
|
|
"loss": 0.6285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6272798776626587,
|
|
"step": 1480,
|
|
"valid_targets_mean": 14589.4,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 0.3963170536429143,
|
|
"grad_norm": 0.5222086273973913,
|
|
"learning_rate": 0.00014565837093169728,
|
|
"loss": 0.6326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.619460940361023,
|
|
"step": 1485,
|
|
"valid_targets_mean": 16074.4,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.3976514544969309,
|
|
"grad_norm": 0.43806267201083776,
|
|
"learning_rate": 0.00014559960646614418,
|
|
"loss": 0.6314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6153547763824463,
|
|
"step": 1490,
|
|
"valid_targets_mean": 15761.4,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 0.39898585535094744,
|
|
"grad_norm": 0.4311441690905246,
|
|
"learning_rate": 0.00014554045899358814,
|
|
"loss": 0.6203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6338887214660645,
|
|
"step": 1495,
|
|
"valid_targets_mean": 16588.6,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.400320256204964,
|
|
"grad_norm": 0.39354710766071277,
|
|
"learning_rate": 0.00014548092883490772,
|
|
"loss": 0.6219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6525273323059082,
|
|
"step": 1500,
|
|
"valid_targets_mean": 16091.5,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 0.4016546570589805,
|
|
"grad_norm": 0.3264072177146715,
|
|
"learning_rate": 0.00014542101631305745,
|
|
"loss": 0.6341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6408512592315674,
|
|
"step": 1505,
|
|
"valid_targets_mean": 15003.0,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 0.40298905791299705,
|
|
"grad_norm": 0.42613217106930396,
|
|
"learning_rate": 0.00014536072175306628,
|
|
"loss": 0.616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6036968231201172,
|
|
"step": 1510,
|
|
"valid_targets_mean": 15678.4,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 0.4043234587670136,
|
|
"grad_norm": 0.5010042319742402,
|
|
"learning_rate": 0.00014530004548203573,
|
|
"loss": 0.6096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6010334491729736,
|
|
"step": 1515,
|
|
"valid_targets_mean": 15991.1,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 0.4056578596210302,
|
|
"grad_norm": 0.46145582386046646,
|
|
"learning_rate": 0.00014523898782913818,
|
|
"loss": 0.6363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5833079218864441,
|
|
"step": 1520,
|
|
"valid_targets_mean": 17355.7,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 0.4069922604750467,
|
|
"grad_norm": 0.4769221033091848,
|
|
"learning_rate": 0.00014517754912561496,
|
|
"loss": 0.6225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6384181976318359,
|
|
"step": 1525,
|
|
"valid_targets_mean": 16321.8,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 0.40832666132906326,
|
|
"grad_norm": 0.40610205983033637,
|
|
"learning_rate": 0.00014511572970477457,
|
|
"loss": 0.6092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5959339737892151,
|
|
"step": 1530,
|
|
"valid_targets_mean": 16648.0,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 0.4096610621830798,
|
|
"grad_norm": 0.6294123329894471,
|
|
"learning_rate": 0.00014505352990199107,
|
|
"loss": 0.6212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.617108941078186,
|
|
"step": 1535,
|
|
"valid_targets_mean": 16604.0,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.41099546303709633,
|
|
"grad_norm": 0.6048603260456543,
|
|
"learning_rate": 0.000144990950054702,
|
|
"loss": 0.6203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6036360263824463,
|
|
"step": 1540,
|
|
"valid_targets_mean": 15739.4,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 0.41232986389111287,
|
|
"grad_norm": 0.5753985039391059,
|
|
"learning_rate": 0.0001449279905024067,
|
|
"loss": 0.6229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.624218225479126,
|
|
"step": 1545,
|
|
"valid_targets_mean": 17004.7,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 0.41366426474512946,
|
|
"grad_norm": 0.4074871541259797,
|
|
"learning_rate": 0.00014486465158666443,
|
|
"loss": 0.622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6364642977714539,
|
|
"step": 1550,
|
|
"valid_targets_mean": 15261.5,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 0.414998665599146,
|
|
"grad_norm": 0.7777037216192905,
|
|
"learning_rate": 0.00014480093365109252,
|
|
"loss": 0.6375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.625815212726593,
|
|
"step": 1555,
|
|
"valid_targets_mean": 16165.8,
|
|
"valid_targets_min": 86
|
|
},
|
|
{
|
|
"epoch": 0.41633306645316254,
|
|
"grad_norm": 0.813927001051695,
|
|
"learning_rate": 0.00014473683704136454,
|
|
"loss": 0.6183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.613219141960144,
|
|
"step": 1560,
|
|
"valid_targets_mean": 15473.5,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 0.4176674673071791,
|
|
"grad_norm": 0.624851558395919,
|
|
"learning_rate": 0.0001446723621052083,
|
|
"loss": 0.6182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6244171857833862,
|
|
"step": 1565,
|
|
"valid_targets_mean": 15542.5,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 0.4190018681611956,
|
|
"grad_norm": 0.7299434520023005,
|
|
"learning_rate": 0.00014460750919240416,
|
|
"loss": 0.6069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.597712516784668,
|
|
"step": 1570,
|
|
"valid_targets_mean": 16187.9,
|
|
"valid_targets_min": 126
|
|
},
|
|
{
|
|
"epoch": 0.42033626901521215,
|
|
"grad_norm": 0.7996751395350323,
|
|
"learning_rate": 0.00014454227865478292,
|
|
"loss": 0.6221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6417670249938965,
|
|
"step": 1575,
|
|
"valid_targets_mean": 15753.5,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 0.42167066986922874,
|
|
"grad_norm": 0.7643157528008785,
|
|
"learning_rate": 0.0001444766708462241,
|
|
"loss": 0.636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.638425350189209,
|
|
"step": 1580,
|
|
"valid_targets_mean": 15052.7,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 0.4230050707232453,
|
|
"grad_norm": 1.6220602331806422,
|
|
"learning_rate": 0.0001444106861226539,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6398119926452637,
|
|
"step": 1585,
|
|
"valid_targets_mean": 16265.2,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 0.4243394715772618,
|
|
"grad_norm": 0.8975330770047389,
|
|
"learning_rate": 0.00014434432484204335,
|
|
"loss": 0.6263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6489125490188599,
|
|
"step": 1590,
|
|
"valid_targets_mean": 15977.6,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.42567387243127836,
|
|
"grad_norm": 0.6599629728623814,
|
|
"learning_rate": 0.0001442775873644062,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6266319155693054,
|
|
"step": 1595,
|
|
"valid_targets_mean": 16949.8,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 0.4270082732852949,
|
|
"grad_norm": 1.0255720761078193,
|
|
"learning_rate": 0.00014421047405179725,
|
|
"loss": 0.6314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.655253529548645,
|
|
"step": 1600,
|
|
"valid_targets_mean": 16475.8,
|
|
"valid_targets_min": 122
|
|
},
|
|
{
|
|
"epoch": 0.42834267413931143,
|
|
"grad_norm": 0.5940148209535145,
|
|
"learning_rate": 0.00014414298526831012,
|
|
"loss": 0.6274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.604961097240448,
|
|
"step": 1605,
|
|
"valid_targets_mean": 16166.2,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 0.429677074993328,
|
|
"grad_norm": 0.6891749029440575,
|
|
"learning_rate": 0.00014407512138007547,
|
|
"loss": 0.6304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6220706701278687,
|
|
"step": 1610,
|
|
"valid_targets_mean": 15656.4,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 0.43101147584734456,
|
|
"grad_norm": 0.4005516569017509,
|
|
"learning_rate": 0.0001440068827552588,
|
|
"loss": 0.6082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.625106692314148,
|
|
"step": 1615,
|
|
"valid_targets_mean": 15519.3,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 0.4323458767013611,
|
|
"grad_norm": 0.5272590178673885,
|
|
"learning_rate": 0.0001439382697640587,
|
|
"loss": 0.6416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6334394216537476,
|
|
"step": 1620,
|
|
"valid_targets_mean": 15268.3,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 0.43368027755537764,
|
|
"grad_norm": 0.3943863363540411,
|
|
"learning_rate": 0.00014386928277870461,
|
|
"loss": 0.6022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6156237721443176,
|
|
"step": 1625,
|
|
"valid_targets_mean": 16021.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.4350146784093942,
|
|
"grad_norm": 0.44002913415369016,
|
|
"learning_rate": 0.00014379992217345504,
|
|
"loss": 0.6372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6078697443008423,
|
|
"step": 1630,
|
|
"valid_targets_mean": 15966.3,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.4363490792634107,
|
|
"grad_norm": 0.5474567909345269,
|
|
"learning_rate": 0.0001437301883245953,
|
|
"loss": 0.6217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5955013036727905,
|
|
"step": 1635,
|
|
"valid_targets_mean": 16371.5,
|
|
"valid_targets_min": 74
|
|
},
|
|
{
|
|
"epoch": 0.43768348011742725,
|
|
"grad_norm": 0.37821018625798675,
|
|
"learning_rate": 0.0001436600816104356,
|
|
"loss": 0.6169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6408905982971191,
|
|
"step": 1640,
|
|
"valid_targets_mean": 16821.0,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 0.43901788097144384,
|
|
"grad_norm": 0.633186477597186,
|
|
"learning_rate": 0.000143589602411309,
|
|
"loss": 0.6192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6234664916992188,
|
|
"step": 1645,
|
|
"valid_targets_mean": 15221.4,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 0.4403522818254604,
|
|
"grad_norm": 0.6339409024098717,
|
|
"learning_rate": 0.00014351875110956927,
|
|
"loss": 0.6159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6487512588500977,
|
|
"step": 1650,
|
|
"valid_targets_mean": 15942.3,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 0.4416866826794769,
|
|
"grad_norm": 0.3375878572865069,
|
|
"learning_rate": 0.00014344752808958887,
|
|
"loss": 0.6345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6520563960075378,
|
|
"step": 1655,
|
|
"valid_targets_mean": 16000.6,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 0.44302108353349345,
|
|
"grad_norm": 0.38773310648770776,
|
|
"learning_rate": 0.0001433759337377569,
|
|
"loss": 0.6159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6261415481567383,
|
|
"step": 1660,
|
|
"valid_targets_mean": 15071.0,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 0.44435548438751,
|
|
"grad_norm": 0.38867253963185605,
|
|
"learning_rate": 0.00014330396844247685,
|
|
"loss": 0.6133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5830848217010498,
|
|
"step": 1665,
|
|
"valid_targets_mean": 17879.8,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 0.44568988524152653,
|
|
"grad_norm": 1.5177148462768428,
|
|
"learning_rate": 0.0001432316325941647,
|
|
"loss": 0.6224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6349464058876038,
|
|
"step": 1670,
|
|
"valid_targets_mean": 15402.8,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 0.4470242860955431,
|
|
"grad_norm": 0.4944692858526399,
|
|
"learning_rate": 0.00014315892658524665,
|
|
"loss": 0.6286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6335951685905457,
|
|
"step": 1675,
|
|
"valid_targets_mean": 15292.9,
|
|
"valid_targets_min": 143
|
|
},
|
|
{
|
|
"epoch": 0.44835868694955966,
|
|
"grad_norm": 0.49903902666348443,
|
|
"learning_rate": 0.0001430858508101571,
|
|
"loss": 0.6024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6011691689491272,
|
|
"step": 1680,
|
|
"valid_targets_mean": 16015.8,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 0.4496930878035762,
|
|
"grad_norm": 0.5951332149681916,
|
|
"learning_rate": 0.00014301240566533637,
|
|
"loss": 0.6395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6320478916168213,
|
|
"step": 1685,
|
|
"valid_targets_mean": 16208.7,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 0.45102748865759273,
|
|
"grad_norm": 0.4581806243474604,
|
|
"learning_rate": 0.00014293859154922866,
|
|
"loss": 0.6121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6136428117752075,
|
|
"step": 1690,
|
|
"valid_targets_mean": 15048.1,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 0.45236188951160927,
|
|
"grad_norm": 0.5610547921187975,
|
|
"learning_rate": 0.00014286440886227995,
|
|
"loss": 0.6259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6516221761703491,
|
|
"step": 1695,
|
|
"valid_targets_mean": 15288.6,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 0.4536962903656258,
|
|
"grad_norm": 0.4888275216939204,
|
|
"learning_rate": 0.00014278985800693562,
|
|
"loss": 0.6394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6383839845657349,
|
|
"step": 1700,
|
|
"valid_targets_mean": 16269.8,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 0.4550306912196424,
|
|
"grad_norm": 0.3750982099561412,
|
|
"learning_rate": 0.0001427149393876384,
|
|
"loss": 0.6132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5994255542755127,
|
|
"step": 1705,
|
|
"valid_targets_mean": 15503.7,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 0.45636509207365894,
|
|
"grad_norm": 0.5220259390241854,
|
|
"learning_rate": 0.00014263965341082622,
|
|
"loss": 0.6216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6135061383247375,
|
|
"step": 1710,
|
|
"valid_targets_mean": 16014.8,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 0.4576994929276755,
|
|
"grad_norm": 0.44611862398704427,
|
|
"learning_rate": 0.00014256400048492994,
|
|
"loss": 0.6389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6459407806396484,
|
|
"step": 1715,
|
|
"valid_targets_mean": 16696.7,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.459033893781692,
|
|
"grad_norm": 0.7215196076760709,
|
|
"learning_rate": 0.00014248798102037106,
|
|
"loss": 0.6025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5831271409988403,
|
|
"step": 1720,
|
|
"valid_targets_mean": 16603.4,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 0.46036829463570855,
|
|
"grad_norm": 0.68883263340678,
|
|
"learning_rate": 0.0001424115954295597,
|
|
"loss": 0.6276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6232255101203918,
|
|
"step": 1725,
|
|
"valid_targets_mean": 15403.0,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 0.4617026954897251,
|
|
"grad_norm": 0.6747839578442204,
|
|
"learning_rate": 0.00014233484412689212,
|
|
"loss": 0.6234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6511488556861877,
|
|
"step": 1730,
|
|
"valid_targets_mean": 14797.5,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 0.4630370963437417,
|
|
"grad_norm": 0.6377290200940263,
|
|
"learning_rate": 0.00014225772752874866,
|
|
"loss": 0.6114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5981515645980835,
|
|
"step": 1735,
|
|
"valid_targets_mean": 16110.6,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 0.4643714971977582,
|
|
"grad_norm": 0.5027780947825067,
|
|
"learning_rate": 0.00014218024605349133,
|
|
"loss": 0.6043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6172059774398804,
|
|
"step": 1740,
|
|
"valid_targets_mean": 16348.3,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 0.46570589805177476,
|
|
"grad_norm": 0.40475216068056086,
|
|
"learning_rate": 0.00014210240012146178,
|
|
"loss": 0.6141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6198212504386902,
|
|
"step": 1745,
|
|
"valid_targets_mean": 16360.2,
|
|
"valid_targets_min": 11
|
|
},
|
|
{
|
|
"epoch": 0.4670402989057913,
|
|
"grad_norm": 0.36279653577965554,
|
|
"learning_rate": 0.00014202419015497863,
|
|
"loss": 0.6251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5892041921615601,
|
|
"step": 1750,
|
|
"valid_targets_mean": 14806.2,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 0.46837469975980783,
|
|
"grad_norm": 0.5529952048740383,
|
|
"learning_rate": 0.00014194561657833563,
|
|
"loss": 0.6238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6133667826652527,
|
|
"step": 1755,
|
|
"valid_targets_mean": 15809.3,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.46970910061382437,
|
|
"grad_norm": 0.6537360232818689,
|
|
"learning_rate": 0.000141866679817799,
|
|
"loss": 0.6126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6035851240158081,
|
|
"step": 1760,
|
|
"valid_targets_mean": 16271.0,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 0.47104350146784096,
|
|
"grad_norm": 0.5789436462540626,
|
|
"learning_rate": 0.00014178738030160527,
|
|
"loss": 0.623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6485693454742432,
|
|
"step": 1765,
|
|
"valid_targets_mean": 15430.4,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 0.4723779023218575,
|
|
"grad_norm": 0.8720659405356321,
|
|
"learning_rate": 0.000141707718459959,
|
|
"loss": 0.6191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6286308765411377,
|
|
"step": 1770,
|
|
"valid_targets_mean": 16587.6,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 0.47371230317587404,
|
|
"grad_norm": 0.7511852245287411,
|
|
"learning_rate": 0.00014162769472503032,
|
|
"loss": 0.6017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5902373790740967,
|
|
"step": 1775,
|
|
"valid_targets_mean": 16709.7,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 0.4750467040298906,
|
|
"grad_norm": 0.8184258903731754,
|
|
"learning_rate": 0.00014154730953095274,
|
|
"loss": 0.6015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5919774174690247,
|
|
"step": 1780,
|
|
"valid_targets_mean": 16877.2,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 0.4763811048839071,
|
|
"grad_norm": 0.773629076068772,
|
|
"learning_rate": 0.0001414665633138206,
|
|
"loss": 0.6276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6243022680282593,
|
|
"step": 1785,
|
|
"valid_targets_mean": 16179.8,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 0.47771550573792365,
|
|
"grad_norm": 0.8716843697927165,
|
|
"learning_rate": 0.00014138545651168683,
|
|
"loss": 0.6299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6102423667907715,
|
|
"step": 1790,
|
|
"valid_targets_mean": 16395.6,
|
|
"valid_targets_min": 20
|
|
},
|
|
{
|
|
"epoch": 0.47904990659194024,
|
|
"grad_norm": 0.6836681436980766,
|
|
"learning_rate": 0.00014130398956456062,
|
|
"loss": 0.6121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6184238195419312,
|
|
"step": 1795,
|
|
"valid_targets_mean": 16681.8,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 0.4803843074459568,
|
|
"grad_norm": 0.7379214609664572,
|
|
"learning_rate": 0.00014122216291440496,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6008614301681519,
|
|
"step": 1800,
|
|
"valid_targets_mean": 16840.3,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 0.4817187082999733,
|
|
"grad_norm": 0.6448470422300319,
|
|
"learning_rate": 0.00014113997700513422,
|
|
"loss": 0.6247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6125494241714478,
|
|
"step": 1805,
|
|
"valid_targets_mean": 16632.3,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 0.48305310915398986,
|
|
"grad_norm": 0.7507255061402137,
|
|
"learning_rate": 0.00014105743228261174,
|
|
"loss": 0.5971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5912197232246399,
|
|
"step": 1810,
|
|
"valid_targets_mean": 15968.4,
|
|
"valid_targets_min": 14
|
|
},
|
|
{
|
|
"epoch": 0.4843875100080064,
|
|
"grad_norm": 0.6923025633359248,
|
|
"learning_rate": 0.00014097452919464748,
|
|
"loss": 0.6275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6405831575393677,
|
|
"step": 1815,
|
|
"valid_targets_mean": 15824.7,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 0.48572191086202293,
|
|
"grad_norm": 0.7189502878051086,
|
|
"learning_rate": 0.00014089126819099557,
|
|
"loss": 0.6076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5731747150421143,
|
|
"step": 1820,
|
|
"valid_targets_mean": 15942.4,
|
|
"valid_targets_min": 76
|
|
},
|
|
{
|
|
"epoch": 0.4870563117160395,
|
|
"grad_norm": 0.6224947383076709,
|
|
"learning_rate": 0.00014080764972335184,
|
|
"loss": 0.6276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6464651823043823,
|
|
"step": 1825,
|
|
"valid_targets_mean": 16088.4,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 0.48839071257005606,
|
|
"grad_norm": 0.7083902377315511,
|
|
"learning_rate": 0.00014072367424535135,
|
|
"loss": 0.6162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6100757122039795,
|
|
"step": 1830,
|
|
"valid_targets_mean": 14989.7,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 0.4897251134240726,
|
|
"grad_norm": 0.56069038755494,
|
|
"learning_rate": 0.000140639342212566,
|
|
"loss": 0.6172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5954641699790955,
|
|
"step": 1835,
|
|
"valid_targets_mean": 17395.5,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 0.49105951427808914,
|
|
"grad_norm": 0.7780348082684239,
|
|
"learning_rate": 0.000140554654082502,
|
|
"loss": 0.6136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6116977334022522,
|
|
"step": 1840,
|
|
"valid_targets_mean": 16951.9,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 0.4923939151321057,
|
|
"grad_norm": 0.5613734141188126,
|
|
"learning_rate": 0.0001404696103145975,
|
|
"loss": 0.6327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6341204047203064,
|
|
"step": 1845,
|
|
"valid_targets_mean": 15912.0,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 0.4937283159861222,
|
|
"grad_norm": 0.5646169834365188,
|
|
"learning_rate": 0.0001403842113702198,
|
|
"loss": 0.6096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6076234579086304,
|
|
"step": 1850,
|
|
"valid_targets_mean": 16483.9,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 0.49506271684013875,
|
|
"grad_norm": 0.42454431593189007,
|
|
"learning_rate": 0.00014029845771266325,
|
|
"loss": 0.6279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5989246368408203,
|
|
"step": 1855,
|
|
"valid_targets_mean": 15265.2,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 0.49639711769415534,
|
|
"grad_norm": 0.42970395034884246,
|
|
"learning_rate": 0.00014021234980714648,
|
|
"loss": 0.6311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6091680526733398,
|
|
"step": 1860,
|
|
"valid_targets_mean": 15967.2,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 0.4977315185481719,
|
|
"grad_norm": 0.467813457018178,
|
|
"learning_rate": 0.00014012588812080992,
|
|
"loss": 0.6301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6140797138214111,
|
|
"step": 1865,
|
|
"valid_targets_mean": 16113.7,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 0.4990659194021884,
|
|
"grad_norm": 0.4443571651380167,
|
|
"learning_rate": 0.00014003907312271319,
|
|
"loss": 0.6167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6168869137763977,
|
|
"step": 1870,
|
|
"valid_targets_mean": 16914.2,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 0.500400320256205,
|
|
"grad_norm": 0.3688517851382228,
|
|
"learning_rate": 0.0001399519052838329,
|
|
"loss": 0.6169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6044825315475464,
|
|
"step": 1875,
|
|
"valid_targets_mean": 16600.2,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 0.5017347211102215,
|
|
"grad_norm": 0.4272311555518661,
|
|
"learning_rate": 0.00013986438507705958,
|
|
"loss": 0.6232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6084432005882263,
|
|
"step": 1880,
|
|
"valid_targets_mean": 15952.6,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 0.503069121964238,
|
|
"grad_norm": 0.4084671985107604,
|
|
"learning_rate": 0.00013977651297719548,
|
|
"loss": 0.6223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6196306347846985,
|
|
"step": 1885,
|
|
"valid_targets_mean": 16205.8,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 0.5044035228182546,
|
|
"grad_norm": 0.47467835510882866,
|
|
"learning_rate": 0.00013968828946095193,
|
|
"loss": 0.6094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6280183792114258,
|
|
"step": 1890,
|
|
"valid_targets_mean": 15579.6,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 0.5057379236722711,
|
|
"grad_norm": 0.3734668638187962,
|
|
"learning_rate": 0.00013959971500694668,
|
|
"loss": 0.6166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5934363603591919,
|
|
"step": 1895,
|
|
"valid_targets_mean": 15840.3,
|
|
"valid_targets_min": 16
|
|
},
|
|
{
|
|
"epoch": 0.5070723245262877,
|
|
"grad_norm": 0.3692138856481344,
|
|
"learning_rate": 0.00013951079009570132,
|
|
"loss": 0.6212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6200092434883118,
|
|
"step": 1900,
|
|
"valid_targets_mean": 16037.6,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.5084067253803043,
|
|
"grad_norm": 0.3823174475553723,
|
|
"learning_rate": 0.00013942151520963868,
|
|
"loss": 0.6174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6433672904968262,
|
|
"step": 1905,
|
|
"valid_targets_mean": 16325.7,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.5097411262343208,
|
|
"grad_norm": 0.3497586948857913,
|
|
"learning_rate": 0.00013933189083308031,
|
|
"loss": 0.6246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6540859937667847,
|
|
"step": 1910,
|
|
"valid_targets_mean": 17292.9,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 0.5110755270883374,
|
|
"grad_norm": 0.4591645468700426,
|
|
"learning_rate": 0.00013924191745224364,
|
|
"loss": 0.612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6253089904785156,
|
|
"step": 1915,
|
|
"valid_targets_mean": 15474.2,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 0.5124099279423538,
|
|
"grad_norm": 0.31031712977802206,
|
|
"learning_rate": 0.00013915159555523957,
|
|
"loss": 0.6345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5882706642150879,
|
|
"step": 1920,
|
|
"valid_targets_mean": 16923.8,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 0.5137443287963704,
|
|
"grad_norm": 0.4022558108020764,
|
|
"learning_rate": 0.00013906092563206968,
|
|
"loss": 0.6057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6023075580596924,
|
|
"step": 1925,
|
|
"valid_targets_mean": 15661.2,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 0.5150787296503869,
|
|
"grad_norm": 0.4368678641461343,
|
|
"learning_rate": 0.00013896990817462363,
|
|
"loss": 0.6312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6641368865966797,
|
|
"step": 1930,
|
|
"valid_targets_mean": 16154.9,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 0.5164131305044035,
|
|
"grad_norm": 0.6467331098405801,
|
|
"learning_rate": 0.00013887854367667645,
|
|
"loss": 0.6222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6303449273109436,
|
|
"step": 1935,
|
|
"valid_targets_mean": 16546.6,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 0.5177475313584201,
|
|
"grad_norm": 0.5977986197066303,
|
|
"learning_rate": 0.00013878683263388587,
|
|
"loss": 0.6267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6208200454711914,
|
|
"step": 1940,
|
|
"valid_targets_mean": 14945.0,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 0.5190819322124366,
|
|
"grad_norm": 0.5304666492399011,
|
|
"learning_rate": 0.00013869477554378972,
|
|
"loss": 0.6228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6293059587478638,
|
|
"step": 1945,
|
|
"valid_targets_mean": 14679.3,
|
|
"valid_targets_min": 105
|
|
},
|
|
{
|
|
"epoch": 0.5204163330664532,
|
|
"grad_norm": 0.5484478107642148,
|
|
"learning_rate": 0.00013860237290580306,
|
|
"loss": 0.6051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5901466012001038,
|
|
"step": 1950,
|
|
"valid_targets_mean": 16352.2,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 0.5217507339204697,
|
|
"grad_norm": 0.4753019705870865,
|
|
"learning_rate": 0.00013850962522121569,
|
|
"loss": 0.6279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5876643657684326,
|
|
"step": 1955,
|
|
"valid_targets_mean": 16744.3,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 0.5230851347744863,
|
|
"grad_norm": 0.41422385606660383,
|
|
"learning_rate": 0.00013841653299318915,
|
|
"loss": 0.616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6483991146087646,
|
|
"step": 1960,
|
|
"valid_targets_mean": 16353.5,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 0.5244195356285029,
|
|
"grad_norm": 0.36682178106347624,
|
|
"learning_rate": 0.00013832309672675428,
|
|
"loss": 0.6014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6216505169868469,
|
|
"step": 1965,
|
|
"valid_targets_mean": 15764.5,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 0.5257539364825193,
|
|
"grad_norm": 0.46701722935809503,
|
|
"learning_rate": 0.00013822931692880828,
|
|
"loss": 0.6229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6098369359970093,
|
|
"step": 1970,
|
|
"valid_targets_mean": 16463.2,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 0.5270883373365359,
|
|
"grad_norm": 0.4445692469326501,
|
|
"learning_rate": 0.00013813519410811208,
|
|
"loss": 0.617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6049668788909912,
|
|
"step": 1975,
|
|
"valid_targets_mean": 15474.1,
|
|
"valid_targets_min": 103
|
|
},
|
|
{
|
|
"epoch": 0.5284227381905524,
|
|
"grad_norm": 0.5320742477880946,
|
|
"learning_rate": 0.00013804072877528746,
|
|
"loss": 0.6305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6137555241584778,
|
|
"step": 1980,
|
|
"valid_targets_mean": 16337.1,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 0.529757139044569,
|
|
"grad_norm": 0.34825869011443694,
|
|
"learning_rate": 0.0001379459214428144,
|
|
"loss": 0.6107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.595025897026062,
|
|
"step": 1985,
|
|
"valid_targets_mean": 15993.6,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 0.5310915398985855,
|
|
"grad_norm": 0.42944867097054756,
|
|
"learning_rate": 0.0001378507726250283,
|
|
"loss": 0.6194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.606879472732544,
|
|
"step": 1990,
|
|
"valid_targets_mean": 16933.6,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 0.5324259407526021,
|
|
"grad_norm": 0.30582752584201905,
|
|
"learning_rate": 0.00013775528283811695,
|
|
"loss": 0.6142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.582551896572113,
|
|
"step": 1995,
|
|
"valid_targets_mean": 17717.3,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 0.5337603416066187,
|
|
"grad_norm": 0.5787030570229632,
|
|
"learning_rate": 0.00013765945260011815,
|
|
"loss": 0.6181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6295760273933411,
|
|
"step": 2000,
|
|
"valid_targets_mean": 15776.8,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 0.5350947424606352,
|
|
"grad_norm": 0.3901846487164271,
|
|
"learning_rate": 0.0001375632824309165,
|
|
"loss": 0.6153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5916974544525146,
|
|
"step": 2005,
|
|
"valid_targets_mean": 17248.5,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 0.5364291433146517,
|
|
"grad_norm": 0.3384774790194208,
|
|
"learning_rate": 0.00013746677285224082,
|
|
"loss": 0.6166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6312397718429565,
|
|
"step": 2010,
|
|
"valid_targets_mean": 15347.2,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 0.5377635441686682,
|
|
"grad_norm": 0.501250109397787,
|
|
"learning_rate": 0.00013736992438766123,
|
|
"loss": 0.61,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6018697023391724,
|
|
"step": 2015,
|
|
"valid_targets_mean": 15687.4,
|
|
"valid_targets_min": 24
|
|
},
|
|
{
|
|
"epoch": 0.5390979450226848,
|
|
"grad_norm": 0.4100739032531526,
|
|
"learning_rate": 0.00013727273756258633,
|
|
"loss": 0.6096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6109263300895691,
|
|
"step": 2020,
|
|
"valid_targets_mean": 16601.7,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 0.5404323458767014,
|
|
"grad_norm": 0.4246630081006453,
|
|
"learning_rate": 0.00013717521290426034,
|
|
"loss": 0.6103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6461702585220337,
|
|
"step": 2025,
|
|
"valid_targets_mean": 17573.6,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 0.5417667467307179,
|
|
"grad_norm": 0.35833577488702584,
|
|
"learning_rate": 0.00013707735094176026,
|
|
"loss": 0.6141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5970995426177979,
|
|
"step": 2030,
|
|
"valid_targets_mean": 17222.7,
|
|
"valid_targets_min": 184
|
|
},
|
|
{
|
|
"epoch": 0.5431011475847345,
|
|
"grad_norm": 0.4129068769918055,
|
|
"learning_rate": 0.00013697915220599294,
|
|
"loss": 0.62,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6218748092651367,
|
|
"step": 2035,
|
|
"valid_targets_mean": 15113.5,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 0.544435548438751,
|
|
"grad_norm": 0.3387496968435161,
|
|
"learning_rate": 0.0001368806172296923,
|
|
"loss": 0.6114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.612634539604187,
|
|
"step": 2040,
|
|
"valid_targets_mean": 17110.6,
|
|
"valid_targets_min": 188
|
|
},
|
|
{
|
|
"epoch": 0.5457699492927676,
|
|
"grad_norm": 0.3745412071832409,
|
|
"learning_rate": 0.00013678174654741638,
|
|
"loss": 0.599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5897716879844666,
|
|
"step": 2045,
|
|
"valid_targets_mean": 17232.0,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 0.547104350146784,
|
|
"grad_norm": 0.38399513390890655,
|
|
"learning_rate": 0.00013668254069554439,
|
|
"loss": 0.6259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6049580574035645,
|
|
"step": 2050,
|
|
"valid_targets_mean": 15198.6,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 0.5484387510008006,
|
|
"grad_norm": 0.48936350323324096,
|
|
"learning_rate": 0.00013658300021227387,
|
|
"loss": 0.6241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6280665397644043,
|
|
"step": 2055,
|
|
"valid_targets_mean": 16052.8,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 0.5497731518548172,
|
|
"grad_norm": 0.41651646905962314,
|
|
"learning_rate": 0.00013648312563761784,
|
|
"loss": 0.5985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6201366186141968,
|
|
"step": 2060,
|
|
"valid_targets_mean": 15825.8,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 0.5511075527088337,
|
|
"grad_norm": 0.5106499293556461,
|
|
"learning_rate": 0.0001363829175134017,
|
|
"loss": 0.6066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5888843536376953,
|
|
"step": 2065,
|
|
"valid_targets_mean": 16064.7,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 0.5524419535628503,
|
|
"grad_norm": 0.502558200477223,
|
|
"learning_rate": 0.00013628237638326038,
|
|
"loss": 0.6182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6001168489456177,
|
|
"step": 2070,
|
|
"valid_targets_mean": 16753.5,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 0.5537763544168668,
|
|
"grad_norm": 0.4044504847668435,
|
|
"learning_rate": 0.0001361815027926354,
|
|
"loss": 0.611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6267122030258179,
|
|
"step": 2075,
|
|
"valid_targets_mean": 17083.9,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 0.5551107552708834,
|
|
"grad_norm": 0.35978073598074256,
|
|
"learning_rate": 0.00013608029728877195,
|
|
"loss": 0.6056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.602839469909668,
|
|
"step": 2080,
|
|
"valid_targets_mean": 16182.3,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 0.5564451561248999,
|
|
"grad_norm": 0.427878030892968,
|
|
"learning_rate": 0.00013597876042071574,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6435970664024353,
|
|
"step": 2085,
|
|
"valid_targets_mean": 14700.1,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 0.5577795569789165,
|
|
"grad_norm": 0.35363582855209424,
|
|
"learning_rate": 0.00013587689273931032,
|
|
"loss": 0.612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.631229817867279,
|
|
"step": 2090,
|
|
"valid_targets_mean": 15248.1,
|
|
"valid_targets_min": 115
|
|
},
|
|
{
|
|
"epoch": 0.559113957832933,
|
|
"grad_norm": 0.3728911369275915,
|
|
"learning_rate": 0.00013577469479719376,
|
|
"loss": 0.6292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6266062259674072,
|
|
"step": 2095,
|
|
"valid_targets_mean": 16060.7,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 0.5604483586869495,
|
|
"grad_norm": 0.44761295623444686,
|
|
"learning_rate": 0.00013567216714879593,
|
|
"loss": 0.6036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5844216346740723,
|
|
"step": 2100,
|
|
"valid_targets_mean": 16994.4,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 0.5617827595409661,
|
|
"grad_norm": 0.576099808761112,
|
|
"learning_rate": 0.00013556931035033526,
|
|
"loss": 0.6206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6165690422058105,
|
|
"step": 2105,
|
|
"valid_targets_mean": 15846.1,
|
|
"valid_targets_min": 99
|
|
},
|
|
{
|
|
"epoch": 0.5631171603949826,
|
|
"grad_norm": 0.6302577465009878,
|
|
"learning_rate": 0.00013546612495981603,
|
|
"loss": 0.6209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6065033674240112,
|
|
"step": 2110,
|
|
"valid_targets_mean": 15524.7,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.5644515612489992,
|
|
"grad_norm": 0.42523423516149084,
|
|
"learning_rate": 0.00013536261153702494,
|
|
"loss": 0.6081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.613208532333374,
|
|
"step": 2115,
|
|
"valid_targets_mean": 15701.8,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 0.5657859621030158,
|
|
"grad_norm": 0.4123570282242918,
|
|
"learning_rate": 0.0001352587706435284,
|
|
"loss": 0.5997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.571455717086792,
|
|
"step": 2120,
|
|
"valid_targets_mean": 15546.9,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 0.5671203629570323,
|
|
"grad_norm": 0.5144475642174654,
|
|
"learning_rate": 0.00013515460284266933,
|
|
"loss": 0.6137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6285332441329956,
|
|
"step": 2125,
|
|
"valid_targets_mean": 15709.9,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 0.5684547638110489,
|
|
"grad_norm": 0.3301739903679853,
|
|
"learning_rate": 0.0001350501086995642,
|
|
"loss": 0.6081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6021836400032043,
|
|
"step": 2130,
|
|
"valid_targets_mean": 16203.4,
|
|
"valid_targets_min": 100
|
|
},
|
|
{
|
|
"epoch": 0.5697891646650654,
|
|
"grad_norm": 0.575131486232284,
|
|
"learning_rate": 0.00013494528878109978,
|
|
"loss": 0.6266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6023844480514526,
|
|
"step": 2135,
|
|
"valid_targets_mean": 15848.4,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.5711235655190819,
|
|
"grad_norm": 0.4526311432190505,
|
|
"learning_rate": 0.00013484014365593036,
|
|
"loss": 0.618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5874459743499756,
|
|
"step": 2140,
|
|
"valid_targets_mean": 16963.9,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 0.5724579663730984,
|
|
"grad_norm": 0.3441773102647398,
|
|
"learning_rate": 0.00013473467389447436,
|
|
"loss": 0.6084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6037788987159729,
|
|
"step": 2145,
|
|
"valid_targets_mean": 15695.8,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 0.573792367227115,
|
|
"grad_norm": 0.5246780101265172,
|
|
"learning_rate": 0.0001346288800689114,
|
|
"loss": 0.6039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6016116738319397,
|
|
"step": 2150,
|
|
"valid_targets_mean": 15866.1,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 0.5751267680811316,
|
|
"grad_norm": 0.4340969676204016,
|
|
"learning_rate": 0.00013452276275317926,
|
|
"loss": 0.6276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6183035373687744,
|
|
"step": 2155,
|
|
"valid_targets_mean": 15315.3,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 0.5764611689351481,
|
|
"grad_norm": 0.35865324150374944,
|
|
"learning_rate": 0.00013441632252297054,
|
|
"loss": 0.6048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5798181295394897,
|
|
"step": 2160,
|
|
"valid_targets_mean": 16757.2,
|
|
"valid_targets_min": 148
|
|
},
|
|
{
|
|
"epoch": 0.5777955697891647,
|
|
"grad_norm": 0.33904093328772805,
|
|
"learning_rate": 0.0001343095599557297,
|
|
"loss": 0.6178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6391230821609497,
|
|
"step": 2165,
|
|
"valid_targets_mean": 15397.4,
|
|
"valid_targets_min": 119
|
|
},
|
|
{
|
|
"epoch": 0.5791299706431812,
|
|
"grad_norm": 0.4478119414772001,
|
|
"learning_rate": 0.00013420247563064998,
|
|
"loss": 0.6254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5979142189025879,
|
|
"step": 2170,
|
|
"valid_targets_mean": 15297.2,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 0.5804643714971978,
|
|
"grad_norm": 0.5707769603321923,
|
|
"learning_rate": 0.0001340950701286701,
|
|
"loss": 0.6199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5975048542022705,
|
|
"step": 2175,
|
|
"valid_targets_mean": 16655.5,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 0.5817987723512144,
|
|
"grad_norm": 0.4824505778403902,
|
|
"learning_rate": 0.0001339873440324712,
|
|
"loss": 0.6229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6108227372169495,
|
|
"step": 2180,
|
|
"valid_targets_mean": 15969.7,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 0.5831331732052308,
|
|
"grad_norm": 0.4770877551156547,
|
|
"learning_rate": 0.00013387929792647366,
|
|
"loss": 0.6077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6048794984817505,
|
|
"step": 2185,
|
|
"valid_targets_mean": 15896.6,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 0.5844675740592474,
|
|
"grad_norm": 0.4814622819368835,
|
|
"learning_rate": 0.00013377093239683396,
|
|
"loss": 0.6245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6156436800956726,
|
|
"step": 2190,
|
|
"valid_targets_mean": 15846.9,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 0.5858019749132639,
|
|
"grad_norm": 0.5145484936853344,
|
|
"learning_rate": 0.0001336622480314414,
|
|
"loss": 0.6367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6479911804199219,
|
|
"step": 2195,
|
|
"valid_targets_mean": 14547.5,
|
|
"valid_targets_min": 122
|
|
},
|
|
{
|
|
"epoch": 0.5871363757672805,
|
|
"grad_norm": 0.4777740945717533,
|
|
"learning_rate": 0.00013355324541991512,
|
|
"loss": 0.6069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6272017955780029,
|
|
"step": 2200,
|
|
"valid_targets_mean": 15204.7,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 0.588470776621297,
|
|
"grad_norm": 0.44934121785856823,
|
|
"learning_rate": 0.00013344392515360055,
|
|
"loss": 0.5964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6105179786682129,
|
|
"step": 2205,
|
|
"valid_targets_mean": 15131.6,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 0.5898051774753136,
|
|
"grad_norm": 0.43607143864022113,
|
|
"learning_rate": 0.0001333342878255667,
|
|
"loss": 0.6124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6004830598831177,
|
|
"step": 2210,
|
|
"valid_targets_mean": 16907.6,
|
|
"valid_targets_min": 98
|
|
},
|
|
{
|
|
"epoch": 0.5911395783293302,
|
|
"grad_norm": 0.48902472019677856,
|
|
"learning_rate": 0.00013322433403060237,
|
|
"loss": 0.6144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6342414021492004,
|
|
"step": 2215,
|
|
"valid_targets_mean": 15085.4,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 0.5924739791833467,
|
|
"grad_norm": 0.5744440751634579,
|
|
"learning_rate": 0.0001331140643652134,
|
|
"loss": 0.6057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5978758931159973,
|
|
"step": 2220,
|
|
"valid_targets_mean": 16446.8,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 0.5938083800373632,
|
|
"grad_norm": 0.6905757317466992,
|
|
"learning_rate": 0.00013300347942761916,
|
|
"loss": 0.6097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6037940979003906,
|
|
"step": 2225,
|
|
"valid_targets_mean": 16368.0,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 0.5951427808913797,
|
|
"grad_norm": 0.5554220490082593,
|
|
"learning_rate": 0.00013289257981774944,
|
|
"loss": 0.6196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6129052639007568,
|
|
"step": 2230,
|
|
"valid_targets_mean": 15976.8,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 0.5964771817453963,
|
|
"grad_norm": 0.4593156602009199,
|
|
"learning_rate": 0.0001327813661372411,
|
|
"loss": 0.6239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6253645420074463,
|
|
"step": 2235,
|
|
"valid_targets_mean": 15467.9,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 0.5978115825994129,
|
|
"grad_norm": 0.4420033121389991,
|
|
"learning_rate": 0.00013266983898943495,
|
|
"loss": 0.6006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5838278532028198,
|
|
"step": 2240,
|
|
"valid_targets_mean": 17585.7,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 0.5991459834534294,
|
|
"grad_norm": 0.448148923585819,
|
|
"learning_rate": 0.00013255799897937218,
|
|
"loss": 0.6258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6293032169342041,
|
|
"step": 2245,
|
|
"valid_targets_mean": 16851.1,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 0.600480384307446,
|
|
"grad_norm": 0.6604916443254276,
|
|
"learning_rate": 0.0001324458467137915,
|
|
"loss": 0.6229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.650986909866333,
|
|
"step": 2250,
|
|
"valid_targets_mean": 15488.0,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 0.6018147851614625,
|
|
"grad_norm": 0.4361931344736495,
|
|
"learning_rate": 0.00013233338280112548,
|
|
"loss": 0.6093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6127656698226929,
|
|
"step": 2255,
|
|
"valid_targets_mean": 17069.4,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 0.6031491860154791,
|
|
"grad_norm": 0.3799066011210021,
|
|
"learning_rate": 0.00013222060785149744,
|
|
"loss": 0.6198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6276907920837402,
|
|
"step": 2260,
|
|
"valid_targets_mean": 15284.9,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 0.6044835868694955,
|
|
"grad_norm": 0.3698011849611436,
|
|
"learning_rate": 0.00013210752247671813,
|
|
"loss": 0.63,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6076016426086426,
|
|
"step": 2265,
|
|
"valid_targets_mean": 14670.1,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 0.6058179877235121,
|
|
"grad_norm": 0.4567594623942834,
|
|
"learning_rate": 0.00013199412729028226,
|
|
"loss": 0.6124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6216099262237549,
|
|
"step": 2270,
|
|
"valid_targets_mean": 14361.8,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.6071523885775287,
|
|
"grad_norm": 0.4259876052088613,
|
|
"learning_rate": 0.00013188042290736542,
|
|
"loss": 0.6219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6279948949813843,
|
|
"step": 2275,
|
|
"valid_targets_mean": 16835.0,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 0.6084867894315452,
|
|
"grad_norm": 0.469714123948217,
|
|
"learning_rate": 0.00013176640994482056,
|
|
"loss": 0.5975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.577383279800415,
|
|
"step": 2280,
|
|
"valid_targets_mean": 16238.7,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 0.6098211902855618,
|
|
"grad_norm": 0.38761602035741405,
|
|
"learning_rate": 0.00013165208902117466,
|
|
"loss": 0.6194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6232035160064697,
|
|
"step": 2285,
|
|
"valid_targets_mean": 17474.3,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 0.6111555911395783,
|
|
"grad_norm": 0.43628422989244825,
|
|
"learning_rate": 0.0001315374607566254,
|
|
"loss": 0.6161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6185094118118286,
|
|
"step": 2290,
|
|
"valid_targets_mean": 16039.3,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 0.6124899919935949,
|
|
"grad_norm": 0.3939730845351852,
|
|
"learning_rate": 0.0001314225257730379,
|
|
"loss": 0.6226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6232194900512695,
|
|
"step": 2295,
|
|
"valid_targets_mean": 15926.9,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 0.6138243928476115,
|
|
"grad_norm": 0.4853802959237896,
|
|
"learning_rate": 0.00013130728469394113,
|
|
"loss": 0.6149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5909043550491333,
|
|
"step": 2300,
|
|
"valid_targets_mean": 15626.2,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 0.615158793701628,
|
|
"grad_norm": 0.5303294991223937,
|
|
"learning_rate": 0.00013119173814452474,
|
|
"loss": 0.6145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6119722127914429,
|
|
"step": 2305,
|
|
"valid_targets_mean": 15990.4,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.6164931945556446,
|
|
"grad_norm": 0.3768392664163849,
|
|
"learning_rate": 0.0001310758867516355,
|
|
"loss": 0.6148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5962955951690674,
|
|
"step": 2310,
|
|
"valid_targets_mean": 16253.9,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 0.617827595409661,
|
|
"grad_norm": 0.29568158860296945,
|
|
"learning_rate": 0.00013095973114377401,
|
|
"loss": 0.6037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6145303845405579,
|
|
"step": 2315,
|
|
"valid_targets_mean": 16354.1,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 0.6191619962636776,
|
|
"grad_norm": 0.3957310816668994,
|
|
"learning_rate": 0.00013084327195109127,
|
|
"loss": 0.6237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6277497410774231,
|
|
"step": 2320,
|
|
"valid_targets_mean": 14964.2,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 0.6204963971176941,
|
|
"grad_norm": 0.40507218781702686,
|
|
"learning_rate": 0.0001307265098053852,
|
|
"loss": 0.6294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.613028883934021,
|
|
"step": 2325,
|
|
"valid_targets_mean": 14705.0,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 0.6218307979717107,
|
|
"grad_norm": 0.4138873732218475,
|
|
"learning_rate": 0.00013060944534009727,
|
|
"loss": 0.6133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6178252696990967,
|
|
"step": 2330,
|
|
"valid_targets_mean": 16135.6,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 0.6231651988257273,
|
|
"grad_norm": 0.44550182331310156,
|
|
"learning_rate": 0.00013049207919030913,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6519665718078613,
|
|
"step": 2335,
|
|
"valid_targets_mean": 15362.8,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 0.6244995996797438,
|
|
"grad_norm": 0.5069298867814076,
|
|
"learning_rate": 0.000130374411992739,
|
|
"loss": 0.6252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6520801186561584,
|
|
"step": 2340,
|
|
"valid_targets_mean": 15502.6,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 0.6258340005337604,
|
|
"grad_norm": 0.44255482876271746,
|
|
"learning_rate": 0.00013025644438573828,
|
|
"loss": 0.6261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.633766770362854,
|
|
"step": 2345,
|
|
"valid_targets_mean": 15763.8,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 0.6271684013877769,
|
|
"grad_norm": 0.34513764917726825,
|
|
"learning_rate": 0.0001301381770092882,
|
|
"loss": 0.613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6351778507232666,
|
|
"step": 2350,
|
|
"valid_targets_mean": 15281.8,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 0.6285028022417934,
|
|
"grad_norm": 0.43679301344497834,
|
|
"learning_rate": 0.00013001961050499618,
|
|
"loss": 0.6101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6534307599067688,
|
|
"step": 2355,
|
|
"valid_targets_mean": 14878.5,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 0.6298372030958099,
|
|
"grad_norm": 0.4870298254856093,
|
|
"learning_rate": 0.00012990074551609248,
|
|
"loss": 0.5978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5946311354637146,
|
|
"step": 2360,
|
|
"valid_targets_mean": 16314.5,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.6311716039498265,
|
|
"grad_norm": 0.6945525869442485,
|
|
"learning_rate": 0.00012978158268742656,
|
|
"loss": 0.5993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6251997947692871,
|
|
"step": 2365,
|
|
"valid_targets_mean": 15368.2,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 0.6325060048038431,
|
|
"grad_norm": 0.5790098785319283,
|
|
"learning_rate": 0.00012966212266546384,
|
|
"loss": 0.6259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6421008110046387,
|
|
"step": 2370,
|
|
"valid_targets_mean": 16325.9,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 0.6338404056578596,
|
|
"grad_norm": 0.792785744317409,
|
|
"learning_rate": 0.0001295423660982819,
|
|
"loss": 0.6286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5836455821990967,
|
|
"step": 2375,
|
|
"valid_targets_mean": 16256.2,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.6351748065118762,
|
|
"grad_norm": 0.5822492847083458,
|
|
"learning_rate": 0.00012942231363556717,
|
|
"loss": 0.6269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6273260116577148,
|
|
"step": 2380,
|
|
"valid_targets_mean": 15313.9,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 0.6365092073658927,
|
|
"grad_norm": 0.9406504357088764,
|
|
"learning_rate": 0.00012930196592861123,
|
|
"loss": 0.5945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6099258661270142,
|
|
"step": 2385,
|
|
"valid_targets_mean": 15458.6,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 0.6378436082199093,
|
|
"grad_norm": 0.607913266165502,
|
|
"learning_rate": 0.0001291813236303075,
|
|
"loss": 0.6173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6085502505302429,
|
|
"step": 2390,
|
|
"valid_targets_mean": 16758.3,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 0.6391780090739259,
|
|
"grad_norm": 0.7781891932389111,
|
|
"learning_rate": 0.0001290603873951475,
|
|
"loss": 0.6167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6008795499801636,
|
|
"step": 2395,
|
|
"valid_targets_mean": 15621.0,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 0.6405124099279423,
|
|
"grad_norm": 0.6539333928835719,
|
|
"learning_rate": 0.0001289391578792174,
|
|
"loss": 0.6095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5965677499771118,
|
|
"step": 2400,
|
|
"valid_targets_mean": 15319.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 0.6418468107819589,
|
|
"grad_norm": 0.8999958153317562,
|
|
"learning_rate": 0.0001288176357401944,
|
|
"loss": 0.614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5982319116592407,
|
|
"step": 2405,
|
|
"valid_targets_mean": 16186.3,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 0.6431812116359754,
|
|
"grad_norm": 0.58521594108338,
|
|
"learning_rate": 0.00012869582163734327,
|
|
"loss": 0.6158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5869446396827698,
|
|
"step": 2410,
|
|
"valid_targets_mean": 14848.0,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.644515612489992,
|
|
"grad_norm": 0.7184768983435939,
|
|
"learning_rate": 0.00012857371623151265,
|
|
"loss": 0.6008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5802595615386963,
|
|
"step": 2415,
|
|
"valid_targets_mean": 16208.6,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.6458500133440085,
|
|
"grad_norm": 0.6102088903744022,
|
|
"learning_rate": 0.00012845132018513147,
|
|
"loss": 0.6121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6337521076202393,
|
|
"step": 2420,
|
|
"valid_targets_mean": 15364.2,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 0.6471844141980251,
|
|
"grad_norm": 0.7605142158941254,
|
|
"learning_rate": 0.00012832863416220556,
|
|
"loss": 0.6229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.648051917552948,
|
|
"step": 2425,
|
|
"valid_targets_mean": 15182.1,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 0.6485188150520417,
|
|
"grad_norm": 0.6256768708161232,
|
|
"learning_rate": 0.00012820565882831365,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6141934394836426,
|
|
"step": 2430,
|
|
"valid_targets_mean": 14725.9,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 0.6498532159060582,
|
|
"grad_norm": 0.7451804423044297,
|
|
"learning_rate": 0.00012808239485060426,
|
|
"loss": 0.6015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6086957454681396,
|
|
"step": 2435,
|
|
"valid_targets_mean": 16796.6,
|
|
"valid_targets_min": 22
|
|
},
|
|
{
|
|
"epoch": 0.6511876167600748,
|
|
"grad_norm": 0.5513334048003805,
|
|
"learning_rate": 0.00012795884289779161,
|
|
"loss": 0.6091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5970527529716492,
|
|
"step": 2440,
|
|
"valid_targets_mean": 16409.0,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 0.6525220176140912,
|
|
"grad_norm": 0.5601606873656281,
|
|
"learning_rate": 0.00012783500364015233,
|
|
"loss": 0.6038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6198552846908569,
|
|
"step": 2445,
|
|
"valid_targets_mean": 15023.3,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 0.6538564184681078,
|
|
"grad_norm": 0.5710220241717955,
|
|
"learning_rate": 0.00012771087774952165,
|
|
"loss": 0.6173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6311722993850708,
|
|
"step": 2450,
|
|
"valid_targets_mean": 16240.5,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 0.6551908193221244,
|
|
"grad_norm": 0.6210885846793824,
|
|
"learning_rate": 0.00012758646589928975,
|
|
"loss": 0.6113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5842134952545166,
|
|
"step": 2455,
|
|
"valid_targets_mean": 15439.6,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 0.6565252201761409,
|
|
"grad_norm": 0.5047368505662938,
|
|
"learning_rate": 0.00012746176876439824,
|
|
"loss": 0.6304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6596218347549438,
|
|
"step": 2460,
|
|
"valid_targets_mean": 14011.9,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 0.6578596210301575,
|
|
"grad_norm": 0.5719219448735884,
|
|
"learning_rate": 0.00012733678702133642,
|
|
"loss": 0.604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6103248596191406,
|
|
"step": 2465,
|
|
"valid_targets_mean": 15900.6,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 0.659194021884174,
|
|
"grad_norm": 0.36671511433182047,
|
|
"learning_rate": 0.00012721152134813755,
|
|
"loss": 0.616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5883936882019043,
|
|
"step": 2470,
|
|
"valid_targets_mean": 16086.2,
|
|
"valid_targets_min": 102
|
|
},
|
|
{
|
|
"epoch": 0.6605284227381906,
|
|
"grad_norm": 0.5035071153244577,
|
|
"learning_rate": 0.00012708597242437524,
|
|
"loss": 0.6111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6350337266921997,
|
|
"step": 2475,
|
|
"valid_targets_mean": 14267.4,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 0.661862823592207,
|
|
"grad_norm": 0.49047354453068903,
|
|
"learning_rate": 0.00012696014093115974,
|
|
"loss": 0.6239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6190519332885742,
|
|
"step": 2480,
|
|
"valid_targets_mean": 16891.8,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 0.6631972244462236,
|
|
"grad_norm": 0.3639431845661133,
|
|
"learning_rate": 0.00012683402755113432,
|
|
"loss": 0.6011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5651915669441223,
|
|
"step": 2485,
|
|
"valid_targets_mean": 16542.9,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 0.6645316253002402,
|
|
"grad_norm": 0.3092175056739848,
|
|
"learning_rate": 0.0001267076329684714,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5737978219985962,
|
|
"step": 2490,
|
|
"valid_targets_mean": 16735.8,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 0.6658660261542567,
|
|
"grad_norm": 0.42866256823259696,
|
|
"learning_rate": 0.0001265809578688691,
|
|
"loss": 0.6128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5863039493560791,
|
|
"step": 2495,
|
|
"valid_targets_mean": 15345.5,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 0.6672004270082733,
|
|
"grad_norm": 0.4798819084708603,
|
|
"learning_rate": 0.00012645400293954714,
|
|
"loss": 0.6182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.620308518409729,
|
|
"step": 2500,
|
|
"valid_targets_mean": 15893.3,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 0.6685348278622898,
|
|
"grad_norm": 0.3719505049119626,
|
|
"learning_rate": 0.0001263267688692435,
|
|
"loss": 0.6264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6290452480316162,
|
|
"step": 2505,
|
|
"valid_targets_mean": 15592.1,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 0.6698692287163064,
|
|
"grad_norm": 0.4398503675850329,
|
|
"learning_rate": 0.00012619925634821052,
|
|
"loss": 0.6123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6252307891845703,
|
|
"step": 2510,
|
|
"valid_targets_mean": 16358.0,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 0.671203629570323,
|
|
"grad_norm": 0.33044335456370993,
|
|
"learning_rate": 0.00012607146606821105,
|
|
"loss": 0.6048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6037730574607849,
|
|
"step": 2515,
|
|
"valid_targets_mean": 16456.2,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 0.6725380304243395,
|
|
"grad_norm": 0.4138312255865989,
|
|
"learning_rate": 0.00012594339872251483,
|
|
"loss": 0.6174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6162407398223877,
|
|
"step": 2520,
|
|
"valid_targets_mean": 16273.4,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 0.673872431278356,
|
|
"grad_norm": 0.5338733241457372,
|
|
"learning_rate": 0.00012581505500589474,
|
|
"loss": 0.6156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.616795539855957,
|
|
"step": 2525,
|
|
"valid_targets_mean": 15318.8,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 0.6752068321323725,
|
|
"grad_norm": 0.5538178472599126,
|
|
"learning_rate": 0.00012568643561462298,
|
|
"loss": 0.6093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6517888307571411,
|
|
"step": 2530,
|
|
"valid_targets_mean": 16193.2,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 0.6765412329863891,
|
|
"grad_norm": 0.39314855325609094,
|
|
"learning_rate": 0.00012555754124646729,
|
|
"loss": 0.6061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6136894226074219,
|
|
"step": 2535,
|
|
"valid_targets_mean": 15608.1,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 0.6778756338404056,
|
|
"grad_norm": 0.28736129685480677,
|
|
"learning_rate": 0.0001254283726006871,
|
|
"loss": 0.6034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6169589757919312,
|
|
"step": 2540,
|
|
"valid_targets_mean": 16580.6,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 0.6792100346944222,
|
|
"grad_norm": 0.40453438597245944,
|
|
"learning_rate": 0.00012529893037803,
|
|
"loss": 0.6168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6047553420066833,
|
|
"step": 2545,
|
|
"valid_targets_mean": 16454.1,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 0.6805444355484388,
|
|
"grad_norm": 0.40703301810718967,
|
|
"learning_rate": 0.00012516921528072752,
|
|
"loss": 0.6037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6294437646865845,
|
|
"step": 2550,
|
|
"valid_targets_mean": 16483.0,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 0.6818788364024553,
|
|
"grad_norm": 0.4332954316346514,
|
|
"learning_rate": 0.00012503922801249172,
|
|
"loss": 0.6323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6062746644020081,
|
|
"step": 2555,
|
|
"valid_targets_mean": 16690.6,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 0.6832132372564719,
|
|
"grad_norm": 0.4330673352480961,
|
|
"learning_rate": 0.00012490896927851118,
|
|
"loss": 0.6149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6028146147727966,
|
|
"step": 2560,
|
|
"valid_targets_mean": 16890.8,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 0.6845476381104884,
|
|
"grad_norm": 0.5758835807464072,
|
|
"learning_rate": 0.0001247784397854471,
|
|
"loss": 0.6063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6124927997589111,
|
|
"step": 2565,
|
|
"valid_targets_mean": 16613.3,
|
|
"valid_targets_min": 336
|
|
},
|
|
{
|
|
"epoch": 0.685882038964505,
|
|
"grad_norm": 0.3759062706106212,
|
|
"learning_rate": 0.0001246476402414297,
|
|
"loss": 0.6173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6371479034423828,
|
|
"step": 2570,
|
|
"valid_targets_mean": 15202.9,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 0.6872164398185214,
|
|
"grad_norm": 0.40747309474183957,
|
|
"learning_rate": 0.0001245165713560541,
|
|
"loss": 0.6197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6243611574172974,
|
|
"step": 2575,
|
|
"valid_targets_mean": 16138.9,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 0.688550840672538,
|
|
"grad_norm": 0.33101064658709073,
|
|
"learning_rate": 0.00012438523384037675,
|
|
"loss": 0.621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6176421642303467,
|
|
"step": 2580,
|
|
"valid_targets_mean": 15277.3,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 0.6898852415265546,
|
|
"grad_norm": 0.38388904529906687,
|
|
"learning_rate": 0.0001242536284069113,
|
|
"loss": 0.5967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5699970126152039,
|
|
"step": 2585,
|
|
"valid_targets_mean": 16336.4,
|
|
"valid_targets_min": 164
|
|
},
|
|
{
|
|
"epoch": 0.6912196423805711,
|
|
"grad_norm": 0.3477844139398198,
|
|
"learning_rate": 0.00012412175576962504,
|
|
"loss": 0.608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5966440439224243,
|
|
"step": 2590,
|
|
"valid_targets_mean": 16631.5,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 0.6925540432345877,
|
|
"grad_norm": 0.5440458794110384,
|
|
"learning_rate": 0.00012398961664393467,
|
|
"loss": 0.6197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6081477403640747,
|
|
"step": 2595,
|
|
"valid_targets_mean": 16263.5,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 0.6938884440886042,
|
|
"grad_norm": 0.3152153844701328,
|
|
"learning_rate": 0.0001238572117467027,
|
|
"loss": 0.6225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6415140628814697,
|
|
"step": 2600,
|
|
"valid_targets_mean": 15497.3,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 0.6952228449426208,
|
|
"grad_norm": 0.5474956644762842,
|
|
"learning_rate": 0.0001237245417962335,
|
|
"loss": 0.6049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6185272336006165,
|
|
"step": 2605,
|
|
"valid_targets_mean": 16156.6,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 0.6965572457966374,
|
|
"grad_norm": 0.37662200390415895,
|
|
"learning_rate": 0.0001235916075122693,
|
|
"loss": 0.6206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6300753355026245,
|
|
"step": 2610,
|
|
"valid_targets_mean": 15374.1,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 0.6978916466506538,
|
|
"grad_norm": 0.41421104102571804,
|
|
"learning_rate": 0.00012345840961598638,
|
|
"loss": 0.6321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6131690740585327,
|
|
"step": 2615,
|
|
"valid_targets_mean": 15966.3,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 0.6992260475046704,
|
|
"grad_norm": 0.43574279785165043,
|
|
"learning_rate": 0.00012332494882999113,
|
|
"loss": 0.6159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6403422355651855,
|
|
"step": 2620,
|
|
"valid_targets_mean": 15165.6,
|
|
"valid_targets_min": 20
|
|
},
|
|
{
|
|
"epoch": 0.7005604483586869,
|
|
"grad_norm": 0.4314840648934718,
|
|
"learning_rate": 0.00012319122587831614,
|
|
"loss": 0.6,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6263567209243774,
|
|
"step": 2625,
|
|
"valid_targets_mean": 15291.8,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 0.7018948492127035,
|
|
"grad_norm": 0.3696921147982537,
|
|
"learning_rate": 0.00012305724148641627,
|
|
"loss": 0.6098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6040724515914917,
|
|
"step": 2630,
|
|
"valid_targets_mean": 15731.2,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 0.70322925006672,
|
|
"grad_norm": 0.36503865159227195,
|
|
"learning_rate": 0.00012292299638116472,
|
|
"loss": 0.6145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6203213334083557,
|
|
"step": 2635,
|
|
"valid_targets_mean": 16185.4,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 0.7045636509207366,
|
|
"grad_norm": 0.6179094979585101,
|
|
"learning_rate": 0.00012278849129084902,
|
|
"loss": 0.6009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.59694904088974,
|
|
"step": 2640,
|
|
"valid_targets_mean": 15701.0,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 0.7058980517747532,
|
|
"grad_norm": 0.554016016939682,
|
|
"learning_rate": 0.00012265372694516714,
|
|
"loss": 0.6179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.613195538520813,
|
|
"step": 2645,
|
|
"valid_targets_mean": 15156.6,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 0.7072324526287697,
|
|
"grad_norm": 0.5981015163954772,
|
|
"learning_rate": 0.00012251870407522364,
|
|
"loss": 0.6177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5936011672019958,
|
|
"step": 2650,
|
|
"valid_targets_mean": 16011.4,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 0.7085668534827863,
|
|
"grad_norm": 0.49781827733569967,
|
|
"learning_rate": 0.00012238342341352546,
|
|
"loss": 0.6005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5743311047554016,
|
|
"step": 2655,
|
|
"valid_targets_mean": 16041.1,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 0.7099012543368027,
|
|
"grad_norm": 0.7260313002631965,
|
|
"learning_rate": 0.00012224788569397806,
|
|
"loss": 0.6208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.623327374458313,
|
|
"step": 2660,
|
|
"valid_targets_mean": 15888.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 0.7112356551908193,
|
|
"grad_norm": 0.7328475215596345,
|
|
"learning_rate": 0.0001221120916518816,
|
|
"loss": 0.6232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6027488708496094,
|
|
"step": 2665,
|
|
"valid_targets_mean": 15739.6,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 0.7125700560448359,
|
|
"grad_norm": 0.7596978663656118,
|
|
"learning_rate": 0.0001219760420239267,
|
|
"loss": 0.5925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5970748662948608,
|
|
"step": 2670,
|
|
"valid_targets_mean": 16778.7,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.7139044568988524,
|
|
"grad_norm": 0.6594651660933113,
|
|
"learning_rate": 0.00012183973754819051,
|
|
"loss": 0.593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5764331817626953,
|
|
"step": 2675,
|
|
"valid_targets_mean": 16637.8,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 0.715238857752869,
|
|
"grad_norm": 0.7628936372865166,
|
|
"learning_rate": 0.00012170317896413284,
|
|
"loss": 0.6018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6267425417900085,
|
|
"step": 2680,
|
|
"valid_targets_mean": 15401.4,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 0.7165732586068855,
|
|
"grad_norm": 0.4698922890537805,
|
|
"learning_rate": 0.00012156636701259202,
|
|
"loss": 0.6038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.596757173538208,
|
|
"step": 2685,
|
|
"valid_targets_mean": 15641.7,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.7179076594609021,
|
|
"grad_norm": 0.605004344317738,
|
|
"learning_rate": 0.00012142930243578092,
|
|
"loss": 0.6007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5908167958259583,
|
|
"step": 2690,
|
|
"valid_targets_mean": 15483.9,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 0.7192420603149186,
|
|
"grad_norm": 0.5619485743558523,
|
|
"learning_rate": 0.00012129198597728285,
|
|
"loss": 0.6157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6099591255187988,
|
|
"step": 2695,
|
|
"valid_targets_mean": 14776.2,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 0.7205764611689351,
|
|
"grad_norm": 0.6598145305998949,
|
|
"learning_rate": 0.00012115441838204767,
|
|
"loss": 0.6158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6329494118690491,
|
|
"step": 2700,
|
|
"valid_targets_mean": 16702.2,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 0.7219108620229517,
|
|
"grad_norm": 0.6613863881062565,
|
|
"learning_rate": 0.00012101660039638766,
|
|
"loss": 0.5824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5732811689376831,
|
|
"step": 2705,
|
|
"valid_targets_mean": 15127.5,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 0.7232452628769682,
|
|
"grad_norm": 0.6893698621225195,
|
|
"learning_rate": 0.00012087853276797346,
|
|
"loss": 0.6129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5944296717643738,
|
|
"step": 2710,
|
|
"valid_targets_mean": 15252.4,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 0.7245796637309848,
|
|
"grad_norm": 0.609549826313975,
|
|
"learning_rate": 0.00012074021624583005,
|
|
"loss": 0.6202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6550873517990112,
|
|
"step": 2715,
|
|
"valid_targets_mean": 15639.0,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 0.7259140645850013,
|
|
"grad_norm": 0.6535800226145355,
|
|
"learning_rate": 0.00012060165158033268,
|
|
"loss": 0.5989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5854440927505493,
|
|
"step": 2720,
|
|
"valid_targets_mean": 16359.4,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 0.7272484654390179,
|
|
"grad_norm": 0.5139845099693757,
|
|
"learning_rate": 0.00012046283952320275,
|
|
"loss": 0.613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6016006469726562,
|
|
"step": 2725,
|
|
"valid_targets_mean": 17065.6,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 0.7285828662930345,
|
|
"grad_norm": 0.6014899717015251,
|
|
"learning_rate": 0.00012032378082750382,
|
|
"loss": 0.5955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5890054702758789,
|
|
"step": 2730,
|
|
"valid_targets_mean": 15969.3,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 0.729917267147051,
|
|
"grad_norm": 0.5691533934968931,
|
|
"learning_rate": 0.00012018447624763748,
|
|
"loss": 0.6203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6279616951942444,
|
|
"step": 2735,
|
|
"valid_targets_mean": 15455.8,
|
|
"valid_targets_min": 119
|
|
},
|
|
{
|
|
"epoch": 0.7312516680010676,
|
|
"grad_norm": 0.6335124799219327,
|
|
"learning_rate": 0.00012004492653933923,
|
|
"loss": 0.6089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6014554500579834,
|
|
"step": 2740,
|
|
"valid_targets_mean": 17150.6,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 0.732586068855084,
|
|
"grad_norm": 0.5169723465682857,
|
|
"learning_rate": 0.0001199051324596744,
|
|
"loss": 0.6106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6194249391555786,
|
|
"step": 2745,
|
|
"valid_targets_mean": 16123.2,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 0.7339204697091006,
|
|
"grad_norm": 0.6738124101769285,
|
|
"learning_rate": 0.00011976509476703408,
|
|
"loss": 0.612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6056435108184814,
|
|
"step": 2750,
|
|
"valid_targets_mean": 16243.4,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 0.7352548705631171,
|
|
"grad_norm": 0.3664524912963626,
|
|
"learning_rate": 0.00011962481422113098,
|
|
"loss": 0.6197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.646386981010437,
|
|
"step": 2755,
|
|
"valid_targets_mean": 16190.9,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 0.7365892714171337,
|
|
"grad_norm": 0.44792117478863985,
|
|
"learning_rate": 0.00011948429158299523,
|
|
"loss": 0.5971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5956047773361206,
|
|
"step": 2760,
|
|
"valid_targets_mean": 15734.4,
|
|
"valid_targets_min": 91
|
|
},
|
|
{
|
|
"epoch": 0.7379236722711503,
|
|
"grad_norm": 0.4124501979620836,
|
|
"learning_rate": 0.00011934352761497043,
|
|
"loss": 0.6128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5855376720428467,
|
|
"step": 2765,
|
|
"valid_targets_mean": 15115.3,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 0.7392580731251668,
|
|
"grad_norm": 0.3230024437807969,
|
|
"learning_rate": 0.00011920252308070936,
|
|
"loss": 0.611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6234885454177856,
|
|
"step": 2770,
|
|
"valid_targets_mean": 16610.5,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 0.7405924739791834,
|
|
"grad_norm": 0.39710487024270547,
|
|
"learning_rate": 0.00011906127874516985,
|
|
"loss": 0.6344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6380666494369507,
|
|
"step": 2775,
|
|
"valid_targets_mean": 15587.7,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 0.7419268748331999,
|
|
"grad_norm": 0.4097828989932021,
|
|
"learning_rate": 0.00011891979537461069,
|
|
"loss": 0.6043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6162174940109253,
|
|
"step": 2780,
|
|
"valid_targets_mean": 14953.7,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 0.7432612756872164,
|
|
"grad_norm": 0.38996309275492613,
|
|
"learning_rate": 0.00011877807373658751,
|
|
"loss": 0.6157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6415692567825317,
|
|
"step": 2785,
|
|
"valid_targets_mean": 15363.2,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 0.7445956765412329,
|
|
"grad_norm": 0.3618667775211813,
|
|
"learning_rate": 0.00011863611459994845,
|
|
"loss": 0.5936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5952189564704895,
|
|
"step": 2790,
|
|
"valid_targets_mean": 16200.4,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 0.7459300773952495,
|
|
"grad_norm": 0.3267117936332756,
|
|
"learning_rate": 0.00011849391873483016,
|
|
"loss": 0.6124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5991407036781311,
|
|
"step": 2795,
|
|
"valid_targets_mean": 17319.6,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 0.7472644782492661,
|
|
"grad_norm": 0.3016648304278165,
|
|
"learning_rate": 0.00011835148691265355,
|
|
"loss": 0.5987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6235275268554688,
|
|
"step": 2800,
|
|
"valid_targets_mean": 15123.5,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 0.7485988791032826,
|
|
"grad_norm": 0.3221823783108136,
|
|
"learning_rate": 0.00011820881990611963,
|
|
"loss": 0.6139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6053809523582458,
|
|
"step": 2805,
|
|
"valid_targets_mean": 16131.7,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.7499332799572992,
|
|
"grad_norm": 0.43562984477012173,
|
|
"learning_rate": 0.00011806591848920521,
|
|
"loss": 0.6111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6194973587989807,
|
|
"step": 2810,
|
|
"valid_targets_mean": 16540.3,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 0.7512676808113157,
|
|
"grad_norm": 0.3698450346299921,
|
|
"learning_rate": 0.00011792278343715892,
|
|
"loss": 0.6255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6236314177513123,
|
|
"step": 2815,
|
|
"valid_targets_mean": 15602.0,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 0.7526020816653323,
|
|
"grad_norm": 0.4818546251164576,
|
|
"learning_rate": 0.00011777941552649674,
|
|
"loss": 0.6097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6257940530776978,
|
|
"step": 2820,
|
|
"valid_targets_mean": 16670.9,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 0.7539364825193489,
|
|
"grad_norm": 0.4397285169168285,
|
|
"learning_rate": 0.00011763581553499803,
|
|
"loss": 0.6251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6217588186264038,
|
|
"step": 2825,
|
|
"valid_targets_mean": 15189.8,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 0.7552708833733653,
|
|
"grad_norm": 0.3478095634132381,
|
|
"learning_rate": 0.00011749198424170117,
|
|
"loss": 0.5954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5979966521263123,
|
|
"step": 2830,
|
|
"valid_targets_mean": 16085.6,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 0.7566052842273819,
|
|
"grad_norm": 0.35110205846176734,
|
|
"learning_rate": 0.00011734792242689934,
|
|
"loss": 0.6087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6225411891937256,
|
|
"step": 2835,
|
|
"valid_targets_mean": 16043.0,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 0.7579396850813984,
|
|
"grad_norm": 0.326064120745074,
|
|
"learning_rate": 0.00011720363087213629,
|
|
"loss": 0.6068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6108662486076355,
|
|
"step": 2840,
|
|
"valid_targets_mean": 15808.2,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 0.759274085935415,
|
|
"grad_norm": 0.49202452785265866,
|
|
"learning_rate": 0.00011705911036020222,
|
|
"loss": 0.6124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6325043439865112,
|
|
"step": 2845,
|
|
"valid_targets_mean": 16098.4,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.7606084867894315,
|
|
"grad_norm": 0.3350239081325133,
|
|
"learning_rate": 0.00011691436167512938,
|
|
"loss": 0.614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6329668164253235,
|
|
"step": 2850,
|
|
"valid_targets_mean": 14940.1,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 0.7619428876434481,
|
|
"grad_norm": 0.3760125143483495,
|
|
"learning_rate": 0.00011676938560218781,
|
|
"loss": 0.6254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.612138032913208,
|
|
"step": 2855,
|
|
"valid_targets_mean": 15901.7,
|
|
"valid_targets_min": 40
|
|
},
|
|
{
|
|
"epoch": 0.7632772884974647,
|
|
"grad_norm": 0.33540908595554625,
|
|
"learning_rate": 0.00011662418292788127,
|
|
"loss": 0.5968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5743190050125122,
|
|
"step": 2860,
|
|
"valid_targets_mean": 16764.1,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 0.7646116893514812,
|
|
"grad_norm": 0.2754574689920214,
|
|
"learning_rate": 0.00011647875443994271,
|
|
"loss": 0.6114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6536933779716492,
|
|
"step": 2865,
|
|
"valid_targets_mean": 15792.1,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 0.7659460902054978,
|
|
"grad_norm": 0.31623825970777014,
|
|
"learning_rate": 0.00011633310092733027,
|
|
"loss": 0.612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6481964588165283,
|
|
"step": 2870,
|
|
"valid_targets_mean": 15686.6,
|
|
"valid_targets_min": 91
|
|
},
|
|
{
|
|
"epoch": 0.7672804910595142,
|
|
"grad_norm": 0.3081228003901501,
|
|
"learning_rate": 0.00011618722318022273,
|
|
"loss": 0.6033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5976066589355469,
|
|
"step": 2875,
|
|
"valid_targets_mean": 16311.7,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 0.7686148919135308,
|
|
"grad_norm": 0.39985682958457286,
|
|
"learning_rate": 0.00011604112199001546,
|
|
"loss": 0.6155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6129624843597412,
|
|
"step": 2880,
|
|
"valid_targets_mean": 16037.5,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 0.7699492927675474,
|
|
"grad_norm": 0.3088978935138083,
|
|
"learning_rate": 0.00011589479814931598,
|
|
"loss": 0.6315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6401019096374512,
|
|
"step": 2885,
|
|
"valid_targets_mean": 15580.1,
|
|
"valid_targets_min": 188
|
|
},
|
|
{
|
|
"epoch": 0.7712836936215639,
|
|
"grad_norm": 0.3825137255624278,
|
|
"learning_rate": 0.0001157482524519397,
|
|
"loss": 0.6069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6044080853462219,
|
|
"step": 2890,
|
|
"valid_targets_mean": 16121.2,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 0.7726180944755805,
|
|
"grad_norm": 0.35342809771327477,
|
|
"learning_rate": 0.00011560148569290558,
|
|
"loss": 0.6207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.639106273651123,
|
|
"step": 2895,
|
|
"valid_targets_mean": 15362.5,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 0.773952495329597,
|
|
"grad_norm": 0.386043678780904,
|
|
"learning_rate": 0.00011545449866843194,
|
|
"loss": 0.61,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5949431657791138,
|
|
"step": 2900,
|
|
"valid_targets_mean": 15466.7,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 0.7752868961836136,
|
|
"grad_norm": 0.3245398180178222,
|
|
"learning_rate": 0.00011530729217593198,
|
|
"loss": 0.6188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5934869050979614,
|
|
"step": 2905,
|
|
"valid_targets_mean": 15376.7,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 0.77662129703763,
|
|
"grad_norm": 0.44593536867847916,
|
|
"learning_rate": 0.00011515986701400955,
|
|
"loss": 0.6055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5876243114471436,
|
|
"step": 2910,
|
|
"valid_targets_mean": 16781.6,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.7779556978916466,
|
|
"grad_norm": 0.46221191788580496,
|
|
"learning_rate": 0.00011501222398245478,
|
|
"loss": 0.6069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6441828012466431,
|
|
"step": 2915,
|
|
"valid_targets_mean": 15654.8,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 0.7792900987456632,
|
|
"grad_norm": 0.40081314933909024,
|
|
"learning_rate": 0.00011486436388223977,
|
|
"loss": 0.6077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.595112144947052,
|
|
"step": 2920,
|
|
"valid_targets_mean": 15317.9,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 0.7806244995996797,
|
|
"grad_norm": 0.5596690090788919,
|
|
"learning_rate": 0.00011471628751551426,
|
|
"loss": 0.6068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5922015905380249,
|
|
"step": 2925,
|
|
"valid_targets_mean": 16044.5,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.7819589004536963,
|
|
"grad_norm": 0.3589995388472471,
|
|
"learning_rate": 0.0001145679956856012,
|
|
"loss": 0.6103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6053141951560974,
|
|
"step": 2930,
|
|
"valid_targets_mean": 17100.2,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 0.7832933013077128,
|
|
"grad_norm": 0.4884923328918617,
|
|
"learning_rate": 0.00011441948919699249,
|
|
"loss": 0.5993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6002500057220459,
|
|
"step": 2935,
|
|
"valid_targets_mean": 16290.3,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 0.7846277021617294,
|
|
"grad_norm": 0.4779545116170051,
|
|
"learning_rate": 0.00011427076885534445,
|
|
"loss": 0.6165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5972024202346802,
|
|
"step": 2940,
|
|
"valid_targets_mean": 15572.7,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 0.785962103015746,
|
|
"grad_norm": 0.36583593978213447,
|
|
"learning_rate": 0.00011412183546747374,
|
|
"loss": 0.6275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6454517841339111,
|
|
"step": 2945,
|
|
"valid_targets_mean": 15348.2,
|
|
"valid_targets_min": 109
|
|
},
|
|
{
|
|
"epoch": 0.7872965038697625,
|
|
"grad_norm": 0.3622837852549798,
|
|
"learning_rate": 0.00011397268984135266,
|
|
"loss": 0.6104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5890398621559143,
|
|
"step": 2950,
|
|
"valid_targets_mean": 16108.4,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 0.7886309047237791,
|
|
"grad_norm": 0.28721242725922963,
|
|
"learning_rate": 0.00011382333278610503,
|
|
"loss": 0.6128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6144457459449768,
|
|
"step": 2955,
|
|
"valid_targets_mean": 15895.4,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 0.7899653055777955,
|
|
"grad_norm": 0.39829683210774786,
|
|
"learning_rate": 0.00011367376511200157,
|
|
"loss": 0.5935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5917114019393921,
|
|
"step": 2960,
|
|
"valid_targets_mean": 15565.9,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 0.7912997064318121,
|
|
"grad_norm": 0.3738329809252565,
|
|
"learning_rate": 0.00011352398763045569,
|
|
"loss": 0.6111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5774213671684265,
|
|
"step": 2965,
|
|
"valid_targets_mean": 15889.3,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 0.7926341072858286,
|
|
"grad_norm": 0.39971448523023917,
|
|
"learning_rate": 0.00011337400115401905,
|
|
"loss": 0.5908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6107337474822998,
|
|
"step": 2970,
|
|
"valid_targets_mean": 15658.9,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 0.7939685081398452,
|
|
"grad_norm": 0.4915041380921517,
|
|
"learning_rate": 0.00011322380649637704,
|
|
"loss": 0.6056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6147445440292358,
|
|
"step": 2975,
|
|
"valid_targets_mean": 16253.0,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.7953029089938618,
|
|
"grad_norm": 0.46231586052243284,
|
|
"learning_rate": 0.00011307340447234449,
|
|
"loss": 0.6138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5767396092414856,
|
|
"step": 2980,
|
|
"valid_targets_mean": 17331.9,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 0.7966373098478783,
|
|
"grad_norm": 0.4211986440934076,
|
|
"learning_rate": 0.00011292279589786115,
|
|
"loss": 0.5993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6300714015960693,
|
|
"step": 2985,
|
|
"valid_targets_mean": 16442.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 0.7979717107018949,
|
|
"grad_norm": 0.31346676252674627,
|
|
"learning_rate": 0.00011277198158998744,
|
|
"loss": 0.6094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5963038206100464,
|
|
"step": 2990,
|
|
"valid_targets_mean": 16258.1,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 0.7993061115559114,
|
|
"grad_norm": 0.3152167692640102,
|
|
"learning_rate": 0.00011262096236689978,
|
|
"loss": 0.6156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6168862581253052,
|
|
"step": 2995,
|
|
"valid_targets_mean": 16518.8,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 0.800640512409928,
|
|
"grad_norm": 0.3798525858644061,
|
|
"learning_rate": 0.0001124697390478863,
|
|
"loss": 0.5946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5764780640602112,
|
|
"step": 3000,
|
|
"valid_targets_mean": 15923.1,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 0.8019749132639445,
|
|
"grad_norm": 0.57419267937008,
|
|
"learning_rate": 0.00011231831245334238,
|
|
"loss": 0.6174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5927503705024719,
|
|
"step": 3005,
|
|
"valid_targets_mean": 17187.9,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 0.803309314117961,
|
|
"grad_norm": 0.43237740910495476,
|
|
"learning_rate": 0.00011216668340476618,
|
|
"loss": 0.5986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6266282796859741,
|
|
"step": 3010,
|
|
"valid_targets_mean": 15425.8,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 0.8046437149719776,
|
|
"grad_norm": 0.4015442907401617,
|
|
"learning_rate": 0.00011201485272475416,
|
|
"loss": 0.6186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6169252395629883,
|
|
"step": 3015,
|
|
"valid_targets_mean": 15839.9,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 0.8059781158259941,
|
|
"grad_norm": 0.9122981083934059,
|
|
"learning_rate": 0.00011186282123699664,
|
|
"loss": 0.6125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5926787853240967,
|
|
"step": 3020,
|
|
"valid_targets_mean": 14898.1,
|
|
"valid_targets_min": 193
|
|
},
|
|
{
|
|
"epoch": 0.8073125166800107,
|
|
"grad_norm": 0.36552132119276587,
|
|
"learning_rate": 0.00011171058976627341,
|
|
"loss": 0.6114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5867595672607422,
|
|
"step": 3025,
|
|
"valid_targets_mean": 15765.2,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 0.8086469175340272,
|
|
"grad_norm": 0.41497109551666145,
|
|
"learning_rate": 0.00011155815913844906,
|
|
"loss": 0.6052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6065448522567749,
|
|
"step": 3030,
|
|
"valid_targets_mean": 14955.9,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 0.8099813183880438,
|
|
"grad_norm": 0.4398597996121229,
|
|
"learning_rate": 0.00011140553018046872,
|
|
"loss": 0.6166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6327465772628784,
|
|
"step": 3035,
|
|
"valid_targets_mean": 14344.6,
|
|
"valid_targets_min": 119
|
|
},
|
|
{
|
|
"epoch": 0.8113157192420604,
|
|
"grad_norm": 0.33965885966385906,
|
|
"learning_rate": 0.00011125270372035342,
|
|
"loss": 0.6056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5998932123184204,
|
|
"step": 3040,
|
|
"valid_targets_mean": 16106.6,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.8126501200960768,
|
|
"grad_norm": 0.3850178772215109,
|
|
"learning_rate": 0.00011109968058719565,
|
|
"loss": 0.6035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6161537766456604,
|
|
"step": 3045,
|
|
"valid_targets_mean": 15463.7,
|
|
"valid_targets_min": 68
|
|
},
|
|
{
|
|
"epoch": 0.8139845209500934,
|
|
"grad_norm": 0.3256813424259837,
|
|
"learning_rate": 0.00011094646161115489,
|
|
"loss": 0.5999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5910052061080933,
|
|
"step": 3050,
|
|
"valid_targets_mean": 16663.1,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 0.8153189218041099,
|
|
"grad_norm": 0.334038106311323,
|
|
"learning_rate": 0.00011079304762345307,
|
|
"loss": 0.6041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.598740816116333,
|
|
"step": 3055,
|
|
"valid_targets_mean": 16783.4,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 0.8166533226581265,
|
|
"grad_norm": 0.48713259307598056,
|
|
"learning_rate": 0.00011063943945637005,
|
|
"loss": 0.5932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5953688621520996,
|
|
"step": 3060,
|
|
"valid_targets_mean": 17433.8,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 0.817987723512143,
|
|
"grad_norm": 0.4173434520228628,
|
|
"learning_rate": 0.00011048563794323915,
|
|
"loss": 0.6041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6344270706176758,
|
|
"step": 3065,
|
|
"valid_targets_mean": 15606.3,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 0.8193221243661596,
|
|
"grad_norm": 0.3197471666286986,
|
|
"learning_rate": 0.00011033164391844259,
|
|
"loss": 0.6086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6131141185760498,
|
|
"step": 3070,
|
|
"valid_targets_mean": 15585.1,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 0.8206565252201762,
|
|
"grad_norm": 0.3067640922164882,
|
|
"learning_rate": 0.00011017745821740696,
|
|
"loss": 0.6091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6086834669113159,
|
|
"step": 3075,
|
|
"valid_targets_mean": 15632.0,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 0.8219909260741927,
|
|
"grad_norm": 0.443444899287974,
|
|
"learning_rate": 0.00011002308167659877,
|
|
"loss": 0.6186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5912847518920898,
|
|
"step": 3080,
|
|
"valid_targets_mean": 16956.1,
|
|
"valid_targets_min": 133
|
|
},
|
|
{
|
|
"epoch": 0.8233253269282093,
|
|
"grad_norm": 0.464873034571983,
|
|
"learning_rate": 0.00010986851513351976,
|
|
"loss": 0.6211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6237614154815674,
|
|
"step": 3085,
|
|
"valid_targets_mean": 15150.4,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 0.8246597277822257,
|
|
"grad_norm": 0.34073479026514153,
|
|
"learning_rate": 0.00010971375942670251,
|
|
"loss": 0.6453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.65798020362854,
|
|
"step": 3090,
|
|
"valid_targets_mean": 16809.9,
|
|
"valid_targets_min": 143
|
|
},
|
|
{
|
|
"epoch": 0.8259941286362423,
|
|
"grad_norm": 0.3599346066190022,
|
|
"learning_rate": 0.00010955881539570581,
|
|
"loss": 0.6047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6177526116371155,
|
|
"step": 3095,
|
|
"valid_targets_mean": 15655.3,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 0.8273285294902589,
|
|
"grad_norm": 0.2629035887316631,
|
|
"learning_rate": 0.00010940368388111008,
|
|
"loss": 0.5913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5859917402267456,
|
|
"step": 3100,
|
|
"valid_targets_mean": 15790.0,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 0.8286629303442754,
|
|
"grad_norm": 0.3614709856971453,
|
|
"learning_rate": 0.00010924836572451287,
|
|
"loss": 0.6101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6259796023368835,
|
|
"step": 3105,
|
|
"valid_targets_mean": 16146.6,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 0.829997331198292,
|
|
"grad_norm": 0.36915904463117755,
|
|
"learning_rate": 0.00010909286176852432,
|
|
"loss": 0.6102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.619566798210144,
|
|
"step": 3110,
|
|
"valid_targets_mean": 16112.4,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 0.8313317320523085,
|
|
"grad_norm": 0.4478320563958947,
|
|
"learning_rate": 0.0001089371728567625,
|
|
"loss": 0.6019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5884507298469543,
|
|
"step": 3115,
|
|
"valid_targets_mean": 16453.3,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 0.8326661329063251,
|
|
"grad_norm": 0.48113079680139226,
|
|
"learning_rate": 0.00010878129983384886,
|
|
"loss": 0.6014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5973682999610901,
|
|
"step": 3120,
|
|
"valid_targets_mean": 16576.3,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 0.8340005337603416,
|
|
"grad_norm": 0.41747659934580694,
|
|
"learning_rate": 0.00010862524354540369,
|
|
"loss": 0.5978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5979155898094177,
|
|
"step": 3125,
|
|
"valid_targets_mean": 16300.0,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 0.8353349346143581,
|
|
"grad_norm": 0.403178875264998,
|
|
"learning_rate": 0.00010846900483804152,
|
|
"loss": 0.6117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6228663325309753,
|
|
"step": 3130,
|
|
"valid_targets_mean": 16295.0,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 0.8366693354683747,
|
|
"grad_norm": 0.6520737963100881,
|
|
"learning_rate": 0.00010831258455936645,
|
|
"loss": 0.6267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.634268045425415,
|
|
"step": 3135,
|
|
"valid_targets_mean": 15397.2,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 0.8380037363223912,
|
|
"grad_norm": 0.4924864447083112,
|
|
"learning_rate": 0.00010815598355796771,
|
|
"loss": 0.6126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6175791025161743,
|
|
"step": 3140,
|
|
"valid_targets_mean": 15168.5,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.8393381371764078,
|
|
"grad_norm": 0.3766799859442458,
|
|
"learning_rate": 0.0001079992026834149,
|
|
"loss": 0.6168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6296804547309875,
|
|
"step": 3145,
|
|
"valid_targets_mean": 16058.6,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.8406725380304243,
|
|
"grad_norm": 0.3022993394113949,
|
|
"learning_rate": 0.00010784224278625345,
|
|
"loss": 0.6087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5977729558944702,
|
|
"step": 3150,
|
|
"valid_targets_mean": 16427.4,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 0.8420069388844409,
|
|
"grad_norm": 0.3598287609839585,
|
|
"learning_rate": 0.00010768510471799996,
|
|
"loss": 0.5997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6242843866348267,
|
|
"step": 3155,
|
|
"valid_targets_mean": 16881.4,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 0.8433413397384575,
|
|
"grad_norm": 0.3742423879144425,
|
|
"learning_rate": 0.00010752778933113774,
|
|
"loss": 0.6084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.609407365322113,
|
|
"step": 3160,
|
|
"valid_targets_mean": 15541.2,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 0.844675740592474,
|
|
"grad_norm": 0.36522569966843305,
|
|
"learning_rate": 0.00010737029747911191,
|
|
"loss": 0.6241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6038820743560791,
|
|
"step": 3165,
|
|
"valid_targets_mean": 16314.2,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 0.8460101414464906,
|
|
"grad_norm": 0.3285028310007243,
|
|
"learning_rate": 0.00010721263001632503,
|
|
"loss": 0.6125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6167506575584412,
|
|
"step": 3170,
|
|
"valid_targets_mean": 16436.5,
|
|
"valid_targets_min": 98
|
|
},
|
|
{
|
|
"epoch": 0.847344542300507,
|
|
"grad_norm": 0.36921107249552737,
|
|
"learning_rate": 0.00010705478779813235,
|
|
"loss": 0.6148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6403708457946777,
|
|
"step": 3175,
|
|
"valid_targets_mean": 15671.0,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 0.8486789431545236,
|
|
"grad_norm": 0.3430439116198908,
|
|
"learning_rate": 0.00010689677168083711,
|
|
"loss": 0.5979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6421139240264893,
|
|
"step": 3180,
|
|
"valid_targets_mean": 14922.0,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 0.8500133440085401,
|
|
"grad_norm": 0.3799550119601048,
|
|
"learning_rate": 0.00010673858252168603,
|
|
"loss": 0.6144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6063963174819946,
|
|
"step": 3185,
|
|
"valid_targets_mean": 15838.3,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 0.8513477448625567,
|
|
"grad_norm": 0.3060774420584932,
|
|
"learning_rate": 0.00010658022117886457,
|
|
"loss": 0.6103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6487347483634949,
|
|
"step": 3190,
|
|
"valid_targets_mean": 16051.5,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 0.8526821457165733,
|
|
"grad_norm": 0.3733579759265676,
|
|
"learning_rate": 0.00010642168851149229,
|
|
"loss": 0.6168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6070621013641357,
|
|
"step": 3195,
|
|
"valid_targets_mean": 16092.5,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 0.8540165465705898,
|
|
"grad_norm": 0.38626581096154744,
|
|
"learning_rate": 0.00010626298537961821,
|
|
"loss": 0.6018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6229579448699951,
|
|
"step": 3200,
|
|
"valid_targets_mean": 16702.4,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 0.8553509474246064,
|
|
"grad_norm": 0.36477867910412903,
|
|
"learning_rate": 0.00010610411264421611,
|
|
"loss": 0.6099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6154232025146484,
|
|
"step": 3205,
|
|
"valid_targets_mean": 16684.9,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.8566853482786229,
|
|
"grad_norm": 0.36198405688505025,
|
|
"learning_rate": 0.0001059450711671799,
|
|
"loss": 0.6128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6101678013801575,
|
|
"step": 3210,
|
|
"valid_targets_mean": 15619.2,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 0.8580197491326395,
|
|
"grad_norm": 0.32113133660955206,
|
|
"learning_rate": 0.0001057858618113189,
|
|
"loss": 0.6224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6320436596870422,
|
|
"step": 3215,
|
|
"valid_targets_mean": 15215.8,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.859354149986656,
|
|
"grad_norm": 0.3435642080760044,
|
|
"learning_rate": 0.00010562648544035323,
|
|
"loss": 0.606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6011517643928528,
|
|
"step": 3220,
|
|
"valid_targets_mean": 15546.4,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 0.8606885508406725,
|
|
"grad_norm": 0.31579139858324407,
|
|
"learning_rate": 0.00010546694291890902,
|
|
"loss": 0.5854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6084245443344116,
|
|
"step": 3225,
|
|
"valid_targets_mean": 14603.2,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 0.8620229516946891,
|
|
"grad_norm": 0.34765535513552315,
|
|
"learning_rate": 0.00010530723511251382,
|
|
"loss": 0.6144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6162877082824707,
|
|
"step": 3230,
|
|
"valid_targets_mean": 15704.7,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 0.8633573525487056,
|
|
"grad_norm": 0.4299871031279047,
|
|
"learning_rate": 0.0001051473628875918,
|
|
"loss": 0.6078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6121437549591064,
|
|
"step": 3235,
|
|
"valid_targets_mean": 15767.7,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 0.8646917534027222,
|
|
"grad_norm": 0.5574544856474637,
|
|
"learning_rate": 0.00010498732711145918,
|
|
"loss": 0.6201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6257040500640869,
|
|
"step": 3240,
|
|
"valid_targets_mean": 15540.5,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 0.8660261542567387,
|
|
"grad_norm": 0.42323183273983017,
|
|
"learning_rate": 0.00010482712865231942,
|
|
"loss": 0.5843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5765583515167236,
|
|
"step": 3245,
|
|
"valid_targets_mean": 16329.5,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 0.8673605551107553,
|
|
"grad_norm": 0.28174129439551565,
|
|
"learning_rate": 0.00010466676837925857,
|
|
"loss": 0.6054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6117812395095825,
|
|
"step": 3250,
|
|
"valid_targets_mean": 16464.4,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 0.8686949559647719,
|
|
"grad_norm": 0.3346385254721476,
|
|
"learning_rate": 0.00010450624716224045,
|
|
"loss": 0.6173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6023174524307251,
|
|
"step": 3255,
|
|
"valid_targets_mean": 15990.7,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 0.8700293568187883,
|
|
"grad_norm": 0.2584890129520337,
|
|
"learning_rate": 0.00010434556587210214,
|
|
"loss": 0.6004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6040849685668945,
|
|
"step": 3260,
|
|
"valid_targets_mean": 17105.3,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 0.8713637576728049,
|
|
"grad_norm": 0.3297204401029785,
|
|
"learning_rate": 0.000104184725380549,
|
|
"loss": 0.6214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5984177589416504,
|
|
"step": 3265,
|
|
"valid_targets_mean": 16792.4,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 0.8726981585268214,
|
|
"grad_norm": 0.3848946663409018,
|
|
"learning_rate": 0.0001040237265601502,
|
|
"loss": 0.6112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.589970588684082,
|
|
"step": 3270,
|
|
"valid_targets_mean": 17034.3,
|
|
"valid_targets_min": 10
|
|
},
|
|
{
|
|
"epoch": 0.874032559380838,
|
|
"grad_norm": 0.2959705381837207,
|
|
"learning_rate": 0.00010386257028433366,
|
|
"loss": 0.6037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6227315664291382,
|
|
"step": 3275,
|
|
"valid_targets_mean": 14472.7,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 0.8753669602348545,
|
|
"grad_norm": 0.4417862059825601,
|
|
"learning_rate": 0.00010370125742738173,
|
|
"loss": 0.6033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5997108221054077,
|
|
"step": 3280,
|
|
"valid_targets_mean": 16168.1,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.8767013610888711,
|
|
"grad_norm": 0.37525797564764835,
|
|
"learning_rate": 0.00010353978886442605,
|
|
"loss": 0.6264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6209875345230103,
|
|
"step": 3285,
|
|
"valid_targets_mean": 15834.1,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 0.8780357619428877,
|
|
"grad_norm": 0.3241866530396012,
|
|
"learning_rate": 0.00010337816547144308,
|
|
"loss": 0.6302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6284918189048767,
|
|
"step": 3290,
|
|
"valid_targets_mean": 15657.5,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 0.8793701627969042,
|
|
"grad_norm": 0.3160018728632203,
|
|
"learning_rate": 0.00010321638812524917,
|
|
"loss": 0.6155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6200350522994995,
|
|
"step": 3295,
|
|
"valid_targets_mean": 16819.1,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 0.8807045636509208,
|
|
"grad_norm": 0.322845974931051,
|
|
"learning_rate": 0.00010305445770349593,
|
|
"loss": 0.6024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6223856806755066,
|
|
"step": 3300,
|
|
"valid_targets_mean": 16542.1,
|
|
"valid_targets_min": 402
|
|
},
|
|
{
|
|
"epoch": 0.8820389645049372,
|
|
"grad_norm": 0.4608365284178052,
|
|
"learning_rate": 0.00010289237508466536,
|
|
"loss": 0.6057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6043040752410889,
|
|
"step": 3305,
|
|
"valid_targets_mean": 15211.4,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.8833733653589538,
|
|
"grad_norm": 0.4690910299930012,
|
|
"learning_rate": 0.00010273014114806517,
|
|
"loss": 0.5999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6026057004928589,
|
|
"step": 3310,
|
|
"valid_targets_mean": 15156.6,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 0.8847077662129704,
|
|
"grad_norm": 0.4201149153301678,
|
|
"learning_rate": 0.000102567756773824,
|
|
"loss": 0.6134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5694378018379211,
|
|
"step": 3315,
|
|
"valid_targets_mean": 16189.5,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 0.8860421670669869,
|
|
"grad_norm": 0.4201365080338633,
|
|
"learning_rate": 0.00010240522284288657,
|
|
"loss": 0.6009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6202242374420166,
|
|
"step": 3320,
|
|
"valid_targets_mean": 14022.6,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 0.8873765679210035,
|
|
"grad_norm": 0.4654591928652114,
|
|
"learning_rate": 0.00010224254023700899,
|
|
"loss": 0.6131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6067278385162354,
|
|
"step": 3325,
|
|
"valid_targets_mean": 16067.8,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 0.88871096877502,
|
|
"grad_norm": 0.35180283178941474,
|
|
"learning_rate": 0.00010207970983875395,
|
|
"loss": 0.6147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6386433839797974,
|
|
"step": 3330,
|
|
"valid_targets_mean": 15235.7,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 0.8900453696290366,
|
|
"grad_norm": 0.33352068396479145,
|
|
"learning_rate": 0.00010191673253148589,
|
|
"loss": 0.609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6070728302001953,
|
|
"step": 3335,
|
|
"valid_targets_mean": 15675.0,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 0.8913797704830531,
|
|
"grad_norm": 0.3736462263125514,
|
|
"learning_rate": 0.00010175360919936623,
|
|
"loss": 0.611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5979476571083069,
|
|
"step": 3340,
|
|
"valid_targets_mean": 16730.8,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 0.8927141713370697,
|
|
"grad_norm": 0.3270191279863833,
|
|
"learning_rate": 0.00010159034072734865,
|
|
"loss": 0.5914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5909374356269836,
|
|
"step": 3345,
|
|
"valid_targets_mean": 16453.6,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 0.8940485721910862,
|
|
"grad_norm": 0.36081202928209205,
|
|
"learning_rate": 0.00010142692800117416,
|
|
"loss": 0.6028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6175713539123535,
|
|
"step": 3350,
|
|
"valid_targets_mean": 16360.4,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 0.8953829730451027,
|
|
"grad_norm": 0.28276237364863666,
|
|
"learning_rate": 0.00010126337190736636,
|
|
"loss": 0.6085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6301509141921997,
|
|
"step": 3355,
|
|
"valid_targets_mean": 15152.6,
|
|
"valid_targets_min": 71
|
|
},
|
|
{
|
|
"epoch": 0.8967173738991193,
|
|
"grad_norm": 0.2872918873302433,
|
|
"learning_rate": 0.00010109967333322669,
|
|
"loss": 0.6069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6228970289230347,
|
|
"step": 3360,
|
|
"valid_targets_mean": 15857.6,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 0.8980517747531358,
|
|
"grad_norm": 0.3265392681478234,
|
|
"learning_rate": 0.00010093583316682945,
|
|
"loss": 0.6048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.63560950756073,
|
|
"step": 3365,
|
|
"valid_targets_mean": 14328.4,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 0.8993861756071524,
|
|
"grad_norm": 0.28161790268306824,
|
|
"learning_rate": 0.00010077185229701722,
|
|
"loss": 0.6036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.597403883934021,
|
|
"step": 3370,
|
|
"valid_targets_mean": 15919.6,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 0.900720576461169,
|
|
"grad_norm": 0.3825874335065564,
|
|
"learning_rate": 0.00010060773161339574,
|
|
"loss": 0.5992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.624705970287323,
|
|
"step": 3375,
|
|
"valid_targets_mean": 15714.4,
|
|
"valid_targets_min": 82
|
|
},
|
|
{
|
|
"epoch": 0.9020549773151855,
|
|
"grad_norm": 0.5458605458372956,
|
|
"learning_rate": 0.00010044347200632943,
|
|
"loss": 0.5989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5926228761672974,
|
|
"step": 3380,
|
|
"valid_targets_mean": 15382.2,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 0.9033893781692021,
|
|
"grad_norm": 0.44645818132465365,
|
|
"learning_rate": 0.00010027907436693623,
|
|
"loss": 0.6095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6042912006378174,
|
|
"step": 3385,
|
|
"valid_targets_mean": 14443.6,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 0.9047237790232185,
|
|
"grad_norm": 0.34305195074771816,
|
|
"learning_rate": 0.00010011453958708297,
|
|
"loss": 0.6125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5844123363494873,
|
|
"step": 3390,
|
|
"valid_targets_mean": 15720.4,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 0.9060581798772351,
|
|
"grad_norm": 0.41868384483343835,
|
|
"learning_rate": 9.994986855938047e-05,
|
|
"loss": 0.609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.588081955909729,
|
|
"step": 3395,
|
|
"valid_targets_mean": 16701.1,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.9073925807312516,
|
|
"grad_norm": 0.3945502563027158,
|
|
"learning_rate": 9.978506217717874e-05,
|
|
"loss": 0.6019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.579233705997467,
|
|
"step": 3400,
|
|
"valid_targets_mean": 16139.0,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 0.9087269815852682,
|
|
"grad_norm": 0.4392212981591805,
|
|
"learning_rate": 9.962012133456204e-05,
|
|
"loss": 0.6129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6045866012573242,
|
|
"step": 3405,
|
|
"valid_targets_mean": 16258.9,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 0.9100613824392848,
|
|
"grad_norm": 0.5664907112861385,
|
|
"learning_rate": 9.945504692634409e-05,
|
|
"loss": 0.5989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.576043963432312,
|
|
"step": 3410,
|
|
"valid_targets_mean": 15018.4,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.9113957832933013,
|
|
"grad_norm": 0.5588994820568607,
|
|
"learning_rate": 9.928983984806326e-05,
|
|
"loss": 0.6009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6139967441558838,
|
|
"step": 3415,
|
|
"valid_targets_mean": 16057.7,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.9127301841473179,
|
|
"grad_norm": 0.5421048040367675,
|
|
"learning_rate": 9.912450099597765e-05,
|
|
"loss": 0.6028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6360303163528442,
|
|
"step": 3420,
|
|
"valid_targets_mean": 15415.0,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 0.9140645850013344,
|
|
"grad_norm": 0.6742729587621604,
|
|
"learning_rate": 9.895903126706019e-05,
|
|
"loss": 0.608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6016721129417419,
|
|
"step": 3425,
|
|
"valid_targets_mean": 15982.4,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 0.915398985855351,
|
|
"grad_norm": 0.523298202464321,
|
|
"learning_rate": 9.879343155899382e-05,
|
|
"loss": 0.5938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5959682464599609,
|
|
"step": 3430,
|
|
"valid_targets_mean": 15555.7,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 0.9167333867093675,
|
|
"grad_norm": 0.556504006712715,
|
|
"learning_rate": 9.862770277016676e-05,
|
|
"loss": 0.6054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6146102547645569,
|
|
"step": 3435,
|
|
"valid_targets_mean": 16316.7,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 0.918067787563384,
|
|
"grad_norm": 0.5926906258765555,
|
|
"learning_rate": 9.846184579966733e-05,
|
|
"loss": 0.6017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5958269238471985,
|
|
"step": 3440,
|
|
"valid_targets_mean": 16558.8,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 0.9194021884174006,
|
|
"grad_norm": 0.6279864400454699,
|
|
"learning_rate": 9.829586154727933e-05,
|
|
"loss": 0.6113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5914964079856873,
|
|
"step": 3445,
|
|
"valid_targets_mean": 15705.2,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 0.9207365892714171,
|
|
"grad_norm": 0.5296792410221006,
|
|
"learning_rate": 9.812975091347706e-05,
|
|
"loss": 0.5973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6114808320999146,
|
|
"step": 3450,
|
|
"valid_targets_mean": 15067.1,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 0.9220709901254337,
|
|
"grad_norm": 0.5816183075645648,
|
|
"learning_rate": 9.796351479942047e-05,
|
|
"loss": 0.6098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.593274712562561,
|
|
"step": 3455,
|
|
"valid_targets_mean": 16594.1,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 0.9234053909794502,
|
|
"grad_norm": 0.5769841624751353,
|
|
"learning_rate": 9.779715410695015e-05,
|
|
"loss": 0.6065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5925379395484924,
|
|
"step": 3460,
|
|
"valid_targets_mean": 16822.5,
|
|
"valid_targets_min": 408
|
|
},
|
|
{
|
|
"epoch": 0.9247397918334668,
|
|
"grad_norm": 0.6119656142684935,
|
|
"learning_rate": 9.76306697385827e-05,
|
|
"loss": 0.6102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5965239405632019,
|
|
"step": 3465,
|
|
"valid_targets_mean": 16336.2,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 0.9260741926874834,
|
|
"grad_norm": 0.5978678471828447,
|
|
"learning_rate": 9.746406259750552e-05,
|
|
"loss": 0.6204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.623969554901123,
|
|
"step": 3470,
|
|
"valid_targets_mean": 14696.4,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 0.9274085935414998,
|
|
"grad_norm": 0.5392574518557786,
|
|
"learning_rate": 9.729733358757213e-05,
|
|
"loss": 0.5921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6138155460357666,
|
|
"step": 3475,
|
|
"valid_targets_mean": 16144.3,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 0.9287429943955164,
|
|
"grad_norm": 0.546975568741761,
|
|
"learning_rate": 9.713048361329715e-05,
|
|
"loss": 0.6044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5964043140411377,
|
|
"step": 3480,
|
|
"valid_targets_mean": 16867.3,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 0.9300773952495329,
|
|
"grad_norm": 0.590321559873573,
|
|
"learning_rate": 9.696351357985154e-05,
|
|
"loss": 0.6209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6271519660949707,
|
|
"step": 3485,
|
|
"valid_targets_mean": 14657.9,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 0.9314117961035495,
|
|
"grad_norm": 0.4609890304723043,
|
|
"learning_rate": 9.679642439305744e-05,
|
|
"loss": 0.618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6284685730934143,
|
|
"step": 3490,
|
|
"valid_targets_mean": 16037.7,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 0.932746196957566,
|
|
"grad_norm": 0.5460416444420783,
|
|
"learning_rate": 9.662921695938354e-05,
|
|
"loss": 0.5986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6472452282905579,
|
|
"step": 3495,
|
|
"valid_targets_mean": 15298.9,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 0.9340805978115826,
|
|
"grad_norm": 0.4599174267849713,
|
|
"learning_rate": 9.646189218593992e-05,
|
|
"loss": 0.6074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5882945656776428,
|
|
"step": 3500,
|
|
"valid_targets_mean": 15542.3,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 0.9354149986655992,
|
|
"grad_norm": 0.5595444795518102,
|
|
"learning_rate": 9.629445098047334e-05,
|
|
"loss": 0.6169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6238095760345459,
|
|
"step": 3505,
|
|
"valid_targets_mean": 16269.1,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 0.9367493995196157,
|
|
"grad_norm": 0.3894759895544851,
|
|
"learning_rate": 9.61268942513621e-05,
|
|
"loss": 0.5954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5966178774833679,
|
|
"step": 3510,
|
|
"valid_targets_mean": 15001.4,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 0.9380838003736323,
|
|
"grad_norm": 0.4979215250445888,
|
|
"learning_rate": 9.595922290761128e-05,
|
|
"loss": 0.6126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6272135972976685,
|
|
"step": 3515,
|
|
"valid_targets_mean": 16634.0,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 0.9394182012276487,
|
|
"grad_norm": 0.3927564720256912,
|
|
"learning_rate": 9.579143785884779e-05,
|
|
"loss": 0.5916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5718698501586914,
|
|
"step": 3520,
|
|
"valid_targets_mean": 16832.0,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 0.9407526020816653,
|
|
"grad_norm": 0.43211562604456244,
|
|
"learning_rate": 9.562354001531532e-05,
|
|
"loss": 0.6158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5840474367141724,
|
|
"step": 3525,
|
|
"valid_targets_mean": 15508.1,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 0.9420870029356819,
|
|
"grad_norm": 0.4128672941000394,
|
|
"learning_rate": 9.545553028786952e-05,
|
|
"loss": 0.5935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5857966542243958,
|
|
"step": 3530,
|
|
"valid_targets_mean": 15651.6,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.9434214037896984,
|
|
"grad_norm": 0.3634747021780349,
|
|
"learning_rate": 9.5287409587973e-05,
|
|
"loss": 0.6033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6025322675704956,
|
|
"step": 3535,
|
|
"valid_targets_mean": 15089.4,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 0.944755804643715,
|
|
"grad_norm": 0.3521546765730756,
|
|
"learning_rate": 9.511917882769042e-05,
|
|
"loss": 0.6005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6014447808265686,
|
|
"step": 3540,
|
|
"valid_targets_mean": 16103.0,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 0.9460902054977315,
|
|
"grad_norm": 0.3187808407216376,
|
|
"learning_rate": 9.495083891968351e-05,
|
|
"loss": 0.6097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5850682854652405,
|
|
"step": 3545,
|
|
"valid_targets_mean": 15095.1,
|
|
"valid_targets_min": 163
|
|
},
|
|
{
|
|
"epoch": 0.9474246063517481,
|
|
"grad_norm": 0.3198484300211676,
|
|
"learning_rate": 9.478239077720615e-05,
|
|
"loss": 0.6122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6104428768157959,
|
|
"step": 3550,
|
|
"valid_targets_mean": 16228.7,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 0.9487590072057646,
|
|
"grad_norm": 0.3181476862431669,
|
|
"learning_rate": 9.461383531409937e-05,
|
|
"loss": 0.6013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6085827350616455,
|
|
"step": 3555,
|
|
"valid_targets_mean": 16036.3,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 0.9500934080597812,
|
|
"grad_norm": 0.29736598906741296,
|
|
"learning_rate": 9.444517344478645e-05,
|
|
"loss": 0.6099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5716111660003662,
|
|
"step": 3560,
|
|
"valid_targets_mean": 16294.4,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 0.9514278089137977,
|
|
"grad_norm": 0.3542780709853529,
|
|
"learning_rate": 9.427640608426789e-05,
|
|
"loss": 0.6045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.600184977054596,
|
|
"step": 3565,
|
|
"valid_targets_mean": 16048.9,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 0.9527622097678142,
|
|
"grad_norm": 0.2694226538412474,
|
|
"learning_rate": 9.410753414811654e-05,
|
|
"loss": 0.5991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5748517513275146,
|
|
"step": 3570,
|
|
"valid_targets_mean": 16511.5,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 0.9540966106218308,
|
|
"grad_norm": 0.34003100557255933,
|
|
"learning_rate": 9.393855855247254e-05,
|
|
"loss": 0.6299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6436910033226013,
|
|
"step": 3575,
|
|
"valid_targets_mean": 16154.0,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.9554310114758473,
|
|
"grad_norm": 0.30757421140605506,
|
|
"learning_rate": 9.376948021403838e-05,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5963718891143799,
|
|
"step": 3580,
|
|
"valid_targets_mean": 16212.5,
|
|
"valid_targets_min": 185
|
|
},
|
|
{
|
|
"epoch": 0.9567654123298639,
|
|
"grad_norm": 0.24627952846755352,
|
|
"learning_rate": 9.360030005007399e-05,
|
|
"loss": 0.6087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5852136015892029,
|
|
"step": 3585,
|
|
"valid_targets_mean": 15584.9,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 0.9580998131838805,
|
|
"grad_norm": 0.4008105415564908,
|
|
"learning_rate": 9.343101897839169e-05,
|
|
"loss": 0.6124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.637090802192688,
|
|
"step": 3590,
|
|
"valid_targets_mean": 15035.9,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 0.959434214037897,
|
|
"grad_norm": 0.3175824226253769,
|
|
"learning_rate": 9.326163791735116e-05,
|
|
"loss": 0.5915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6040235161781311,
|
|
"step": 3595,
|
|
"valid_targets_mean": 16532.5,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 0.9607686148919136,
|
|
"grad_norm": 0.2967552407153772,
|
|
"learning_rate": 9.309215778585461e-05,
|
|
"loss": 0.6081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6449960470199585,
|
|
"step": 3600,
|
|
"valid_targets_mean": 16095.4,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 0.96210301574593,
|
|
"grad_norm": 0.27300528101957167,
|
|
"learning_rate": 9.29225795033417e-05,
|
|
"loss": 0.6151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.600953221321106,
|
|
"step": 3605,
|
|
"valid_targets_mean": 16097.2,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 0.9634374165999466,
|
|
"grad_norm": 0.34659082635686456,
|
|
"learning_rate": 9.275290398978454e-05,
|
|
"loss": 0.6258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6250200271606445,
|
|
"step": 3610,
|
|
"valid_targets_mean": 14943.5,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 0.9647718174539631,
|
|
"grad_norm": 0.2768182893205293,
|
|
"learning_rate": 9.258313216568273e-05,
|
|
"loss": 0.6089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6249918937683105,
|
|
"step": 3615,
|
|
"valid_targets_mean": 16153.6,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 0.9661062183079797,
|
|
"grad_norm": 0.2856620183403404,
|
|
"learning_rate": 9.241326495205836e-05,
|
|
"loss": 0.6155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6104844808578491,
|
|
"step": 3620,
|
|
"valid_targets_mean": 15287.8,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 0.9674406191619963,
|
|
"grad_norm": 0.28353965965788425,
|
|
"learning_rate": 9.224330327045105e-05,
|
|
"loss": 0.5971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5877224802970886,
|
|
"step": 3625,
|
|
"valid_targets_mean": 15522.9,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 0.9687750200160128,
|
|
"grad_norm": 0.24175798876635027,
|
|
"learning_rate": 9.207324804291285e-05,
|
|
"loss": 0.6101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5926316976547241,
|
|
"step": 3630,
|
|
"valid_targets_mean": 16090.8,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 0.9701094208700294,
|
|
"grad_norm": 0.27316066602702954,
|
|
"learning_rate": 9.190310019200338e-05,
|
|
"loss": 0.6062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5892231464385986,
|
|
"step": 3635,
|
|
"valid_targets_mean": 16473.6,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 0.9714438217240459,
|
|
"grad_norm": 0.2955367891918511,
|
|
"learning_rate": 9.173286064078465e-05,
|
|
"loss": 0.6246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6512964963912964,
|
|
"step": 3640,
|
|
"valid_targets_mean": 15582.8,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 0.9727782225780625,
|
|
"grad_norm": 0.27556938116596297,
|
|
"learning_rate": 9.156253031281625e-05,
|
|
"loss": 0.6205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6214442253112793,
|
|
"step": 3645,
|
|
"valid_targets_mean": 14921.4,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 0.974112623432079,
|
|
"grad_norm": 0.2582864150794311,
|
|
"learning_rate": 9.139211013215013e-05,
|
|
"loss": 0.6159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.62605881690979,
|
|
"step": 3650,
|
|
"valid_targets_mean": 15899.8,
|
|
"valid_targets_min": 147
|
|
},
|
|
{
|
|
"epoch": 0.9754470242860955,
|
|
"grad_norm": 0.27919860547561176,
|
|
"learning_rate": 9.122160102332583e-05,
|
|
"loss": 0.6124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5860695242881775,
|
|
"step": 3655,
|
|
"valid_targets_mean": 16154.2,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 0.9767814251401121,
|
|
"grad_norm": 0.3429673890231037,
|
|
"learning_rate": 9.105100391136523e-05,
|
|
"loss": 0.5969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6048210263252258,
|
|
"step": 3660,
|
|
"valid_targets_mean": 15284.5,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 0.9781158259941286,
|
|
"grad_norm": 0.25992997840803017,
|
|
"learning_rate": 9.088031972176764e-05,
|
|
"loss": 0.597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5927121639251709,
|
|
"step": 3665,
|
|
"valid_targets_mean": 16107.3,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 0.9794502268481452,
|
|
"grad_norm": 0.3105241091189908,
|
|
"learning_rate": 9.070954938050482e-05,
|
|
"loss": 0.6177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6217231750488281,
|
|
"step": 3670,
|
|
"valid_targets_mean": 15208.2,
|
|
"valid_targets_min": 336
|
|
},
|
|
{
|
|
"epoch": 0.9807846277021617,
|
|
"grad_norm": 0.29322134040149855,
|
|
"learning_rate": 9.053869381401589e-05,
|
|
"loss": 0.6078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6112452745437622,
|
|
"step": 3675,
|
|
"valid_targets_mean": 15493.6,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 0.9821190285561783,
|
|
"grad_norm": 0.25629999684865057,
|
|
"learning_rate": 9.036775394920228e-05,
|
|
"loss": 0.6087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5787304639816284,
|
|
"step": 3680,
|
|
"valid_targets_mean": 17701.9,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 0.9834534294101949,
|
|
"grad_norm": 0.31739063948027607,
|
|
"learning_rate": 9.01967307134228e-05,
|
|
"loss": 0.6044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5753624439239502,
|
|
"step": 3685,
|
|
"valid_targets_mean": 16942.6,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 0.9847878302642114,
|
|
"grad_norm": 0.2630316702217527,
|
|
"learning_rate": 9.00256250344885e-05,
|
|
"loss": 0.5962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5995844602584839,
|
|
"step": 3690,
|
|
"valid_targets_mean": 17047.1,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 0.9861222311182279,
|
|
"grad_norm": 0.2870742144270906,
|
|
"learning_rate": 8.985443784065774e-05,
|
|
"loss": 0.5963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6234308481216431,
|
|
"step": 3695,
|
|
"valid_targets_mean": 16177.1,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 0.9874566319722444,
|
|
"grad_norm": 0.2870968766409778,
|
|
"learning_rate": 8.968317006063107e-05,
|
|
"loss": 0.6052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6069310903549194,
|
|
"step": 3700,
|
|
"valid_targets_mean": 15818.8,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 0.988791032826261,
|
|
"grad_norm": 0.3201633458743414,
|
|
"learning_rate": 8.951182262354624e-05,
|
|
"loss": 0.5855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.596049427986145,
|
|
"step": 3705,
|
|
"valid_targets_mean": 15979.8,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 0.9901254336802775,
|
|
"grad_norm": 0.45292184212025816,
|
|
"learning_rate": 8.934039645897316e-05,
|
|
"loss": 0.6119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6308547258377075,
|
|
"step": 3710,
|
|
"valid_targets_mean": 16358.6,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 0.9914598345342941,
|
|
"grad_norm": 0.37136557232397777,
|
|
"learning_rate": 8.916889249690877e-05,
|
|
"loss": 0.6218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6080734133720398,
|
|
"step": 3715,
|
|
"valid_targets_mean": 15775.9,
|
|
"valid_targets_min": 124
|
|
},
|
|
{
|
|
"epoch": 0.9927942353883107,
|
|
"grad_norm": 0.3059224517778049,
|
|
"learning_rate": 8.899731166777216e-05,
|
|
"loss": 0.6203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5962896943092346,
|
|
"step": 3720,
|
|
"valid_targets_mean": 16839.4,
|
|
"valid_targets_min": 102
|
|
},
|
|
{
|
|
"epoch": 0.9941286362423272,
|
|
"grad_norm": 0.41060114220604527,
|
|
"learning_rate": 8.882565490239935e-05,
|
|
"loss": 0.5876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5867825746536255,
|
|
"step": 3725,
|
|
"valid_targets_mean": 15651.4,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 0.9954630370963438,
|
|
"grad_norm": 0.287734059966752,
|
|
"learning_rate": 8.865392313203839e-05,
|
|
"loss": 0.6016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5873230695724487,
|
|
"step": 3730,
|
|
"valid_targets_mean": 15009.5,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 0.9967974379503602,
|
|
"grad_norm": 0.3042666420646906,
|
|
"learning_rate": 8.848211728834415e-05,
|
|
"loss": 0.625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6030747890472412,
|
|
"step": 3735,
|
|
"valid_targets_mean": 15472.2,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 0.9981318388043768,
|
|
"grad_norm": 0.2796652765372023,
|
|
"learning_rate": 8.831023830337348e-05,
|
|
"loss": 0.6217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6056950092315674,
|
|
"step": 3740,
|
|
"valid_targets_mean": 14817.1,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 0.9994662396583934,
|
|
"grad_norm": 0.28730214935887727,
|
|
"learning_rate": 8.813828710957987e-05,
|
|
"loss": 0.5884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6174252033233643,
|
|
"step": 3745,
|
|
"valid_targets_mean": 15888.3,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 1.00080064051241,
|
|
"grad_norm": 0.2735137322124431,
|
|
"learning_rate": 8.796626463980863e-05,
|
|
"loss": 0.5954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6183410286903381,
|
|
"step": 3750,
|
|
"valid_targets_mean": 17187.5,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 1.0021350413664265,
|
|
"grad_norm": 0.23838304440126826,
|
|
"learning_rate": 8.779417182729181e-05,
|
|
"loss": 0.6026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5817262530326843,
|
|
"step": 3755,
|
|
"valid_targets_mean": 16220.9,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 1.003469442220443,
|
|
"grad_norm": 0.44839831654591217,
|
|
"learning_rate": 8.762200960564295e-05,
|
|
"loss": 0.6066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6195690035820007,
|
|
"step": 3760,
|
|
"valid_targets_mean": 16122.4,
|
|
"valid_targets_min": 215
|
|
},
|
|
{
|
|
"epoch": 1.0048038430744595,
|
|
"grad_norm": 0.27158246380773216,
|
|
"learning_rate": 8.744977890885218e-05,
|
|
"loss": 0.6009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6100062131881714,
|
|
"step": 3765,
|
|
"valid_targets_mean": 15919.8,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 1.006138243928476,
|
|
"grad_norm": 0.2806714085545213,
|
|
"learning_rate": 8.727748067128116e-05,
|
|
"loss": 0.6074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6355049014091492,
|
|
"step": 3770,
|
|
"valid_targets_mean": 14672.5,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 1.0074726447824927,
|
|
"grad_norm": 0.2658647270458597,
|
|
"learning_rate": 8.71051158276579e-05,
|
|
"loss": 0.6039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.594117283821106,
|
|
"step": 3775,
|
|
"valid_targets_mean": 17941.1,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.0088070456365092,
|
|
"grad_norm": 0.3368849544158226,
|
|
"learning_rate": 8.693268531307182e-05,
|
|
"loss": 0.6119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6125128269195557,
|
|
"step": 3780,
|
|
"valid_targets_mean": 15490.7,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 1.0101414464905258,
|
|
"grad_norm": 0.2855946808005626,
|
|
"learning_rate": 8.676019006296851e-05,
|
|
"loss": 0.6057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5923916101455688,
|
|
"step": 3785,
|
|
"valid_targets_mean": 16882.1,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 1.0114758473445422,
|
|
"grad_norm": 0.2487870371227702,
|
|
"learning_rate": 8.658763101314484e-05,
|
|
"loss": 0.6095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5815153121948242,
|
|
"step": 3790,
|
|
"valid_targets_mean": 15732.1,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 1.0128102481985588,
|
|
"grad_norm": 0.2702522146979926,
|
|
"learning_rate": 8.64150090997438e-05,
|
|
"loss": 0.5956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6120627522468567,
|
|
"step": 3795,
|
|
"valid_targets_mean": 15799.6,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 1.0141446490525754,
|
|
"grad_norm": 0.31275492888858597,
|
|
"learning_rate": 8.624232525924936e-05,
|
|
"loss": 0.607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6010204553604126,
|
|
"step": 3800,
|
|
"valid_targets_mean": 16183.2,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 1.015479049906592,
|
|
"grad_norm": 0.2714797189470768,
|
|
"learning_rate": 8.606958042848145e-05,
|
|
"loss": 0.6059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5856226682662964,
|
|
"step": 3805,
|
|
"valid_targets_mean": 16296.6,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 1.0168134507606086,
|
|
"grad_norm": 0.24184649907456296,
|
|
"learning_rate": 8.589677554459094e-05,
|
|
"loss": 0.5885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5947768688201904,
|
|
"step": 3810,
|
|
"valid_targets_mean": 17623.0,
|
|
"valid_targets_min": 91
|
|
},
|
|
{
|
|
"epoch": 1.018147851614625,
|
|
"grad_norm": 0.21960448002011657,
|
|
"learning_rate": 8.572391154505444e-05,
|
|
"loss": 0.5863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5928741693496704,
|
|
"step": 3815,
|
|
"valid_targets_mean": 15627.8,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 1.0194822524686415,
|
|
"grad_norm": 0.33082622819338225,
|
|
"learning_rate": 8.555098936766927e-05,
|
|
"loss": 0.5957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6214810013771057,
|
|
"step": 3820,
|
|
"valid_targets_mean": 14932.7,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 1.0208166533226581,
|
|
"grad_norm": 0.3375992376978062,
|
|
"learning_rate": 8.537800995054838e-05,
|
|
"loss": 0.6157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6424602270126343,
|
|
"step": 3825,
|
|
"valid_targets_mean": 15787.0,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 1.0221510541766747,
|
|
"grad_norm": 0.293029681044579,
|
|
"learning_rate": 8.520497423211527e-05,
|
|
"loss": 0.5826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5864099860191345,
|
|
"step": 3830,
|
|
"valid_targets_mean": 15670.6,
|
|
"valid_targets_min": 114
|
|
},
|
|
{
|
|
"epoch": 1.0234854550306913,
|
|
"grad_norm": 0.26359125702311575,
|
|
"learning_rate": 8.503188315109881e-05,
|
|
"loss": 0.6114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.592563807964325,
|
|
"step": 3835,
|
|
"valid_targets_mean": 15773.3,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.0248198558847077,
|
|
"grad_norm": 0.2762342311611941,
|
|
"learning_rate": 8.485873764652832e-05,
|
|
"loss": 0.5968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5965045094490051,
|
|
"step": 3840,
|
|
"valid_targets_mean": 16424.9,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 1.0261542567387243,
|
|
"grad_norm": 0.29960406026858055,
|
|
"learning_rate": 8.468553865772826e-05,
|
|
"loss": 0.585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5900242328643799,
|
|
"step": 3845,
|
|
"valid_targets_mean": 16089.0,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 1.0274886575927409,
|
|
"grad_norm": 0.3654350862443785,
|
|
"learning_rate": 8.451228712431332e-05,
|
|
"loss": 0.5997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6008077263832092,
|
|
"step": 3850,
|
|
"valid_targets_mean": 17085.4,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.0288230584467575,
|
|
"grad_norm": 0.39564743981799033,
|
|
"learning_rate": 8.433898398618319e-05,
|
|
"loss": 0.6076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5913037657737732,
|
|
"step": 3855,
|
|
"valid_targets_mean": 15768.2,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 1.0301574593007738,
|
|
"grad_norm": 0.47340182038873624,
|
|
"learning_rate": 8.416563018351758e-05,
|
|
"loss": 0.5866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6292645931243896,
|
|
"step": 3860,
|
|
"valid_targets_mean": 14999.9,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 1.0314918601547904,
|
|
"grad_norm": 0.39882755526734276,
|
|
"learning_rate": 8.399222665677105e-05,
|
|
"loss": 0.6109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6066784858703613,
|
|
"step": 3865,
|
|
"valid_targets_mean": 16274.1,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 1.032826261008807,
|
|
"grad_norm": 0.5133907696664676,
|
|
"learning_rate": 8.381877434666784e-05,
|
|
"loss": 0.5897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6070998907089233,
|
|
"step": 3870,
|
|
"valid_targets_mean": 14476.0,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 1.0341606618628236,
|
|
"grad_norm": 0.37803881156149816,
|
|
"learning_rate": 8.364527419419696e-05,
|
|
"loss": 0.5923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5785161256790161,
|
|
"step": 3875,
|
|
"valid_targets_mean": 15344.3,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 1.0354950627168402,
|
|
"grad_norm": 0.38545354922670294,
|
|
"learning_rate": 8.347172714060686e-05,
|
|
"loss": 0.6003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.62616366147995,
|
|
"step": 3880,
|
|
"valid_targets_mean": 15722.0,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 1.0368294635708566,
|
|
"grad_norm": 0.27601606744784446,
|
|
"learning_rate": 8.32981341274005e-05,
|
|
"loss": 0.587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5717325210571289,
|
|
"step": 3885,
|
|
"valid_targets_mean": 15027.5,
|
|
"valid_targets_min": 24
|
|
},
|
|
{
|
|
"epoch": 1.0381638644248732,
|
|
"grad_norm": 0.283209337819396,
|
|
"learning_rate": 8.312449609633014e-05,
|
|
"loss": 0.598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6005629301071167,
|
|
"step": 3890,
|
|
"valid_targets_mean": 16904.1,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 1.0394982652788898,
|
|
"grad_norm": 0.2589442465778229,
|
|
"learning_rate": 8.295081398939227e-05,
|
|
"loss": 0.591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.622482180595398,
|
|
"step": 3895,
|
|
"valid_targets_mean": 15561.6,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 1.0408326661329064,
|
|
"grad_norm": 0.2538028332575704,
|
|
"learning_rate": 8.277708874882252e-05,
|
|
"loss": 0.5922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5734326243400574,
|
|
"step": 3900,
|
|
"valid_targets_mean": 16120.2,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 1.042167066986923,
|
|
"grad_norm": 0.2951707638591912,
|
|
"learning_rate": 8.26033213170905e-05,
|
|
"loss": 0.5935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5737336874008179,
|
|
"step": 3905,
|
|
"valid_targets_mean": 16694.7,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 1.0435014678409393,
|
|
"grad_norm": 0.23428151714943055,
|
|
"learning_rate": 8.242951263689468e-05,
|
|
"loss": 0.6112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6021207571029663,
|
|
"step": 3910,
|
|
"valid_targets_mean": 16208.7,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.044835868694956,
|
|
"grad_norm": 0.3700479615812931,
|
|
"learning_rate": 8.225566365115738e-05,
|
|
"loss": 0.5971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.592542290687561,
|
|
"step": 3915,
|
|
"valid_targets_mean": 16273.6,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.0461702695489725,
|
|
"grad_norm": 0.2645971281118714,
|
|
"learning_rate": 8.20817753030195e-05,
|
|
"loss": 0.5968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5795402526855469,
|
|
"step": 3920,
|
|
"valid_targets_mean": 16697.3,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 1.0475046704029891,
|
|
"grad_norm": 0.3425426380482205,
|
|
"learning_rate": 8.190784853583554e-05,
|
|
"loss": 0.6191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.623998761177063,
|
|
"step": 3925,
|
|
"valid_targets_mean": 15397.3,
|
|
"valid_targets_min": 91
|
|
},
|
|
{
|
|
"epoch": 1.0488390712570057,
|
|
"grad_norm": 0.24545813741267344,
|
|
"learning_rate": 8.17338842931684e-05,
|
|
"loss": 0.6043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5687215328216553,
|
|
"step": 3930,
|
|
"valid_targets_mean": 17506.0,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 1.050173472111022,
|
|
"grad_norm": 0.33126881411854414,
|
|
"learning_rate": 8.155988351878433e-05,
|
|
"loss": 0.5914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5726214647293091,
|
|
"step": 3935,
|
|
"valid_targets_mean": 15834.2,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 1.0515078729650387,
|
|
"grad_norm": 0.5763101772706276,
|
|
"learning_rate": 8.138584715664766e-05,
|
|
"loss": 0.621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5805267095565796,
|
|
"step": 3940,
|
|
"valid_targets_mean": 15561.8,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 1.0528422738190553,
|
|
"grad_norm": 0.494034853848136,
|
|
"learning_rate": 8.121177615091591e-05,
|
|
"loss": 0.6087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5953424572944641,
|
|
"step": 3945,
|
|
"valid_targets_mean": 15862.3,
|
|
"valid_targets_min": 40
|
|
},
|
|
{
|
|
"epoch": 1.0541766746730719,
|
|
"grad_norm": 0.5229883887191916,
|
|
"learning_rate": 8.103767144593445e-05,
|
|
"loss": 0.5978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5901090502738953,
|
|
"step": 3950,
|
|
"valid_targets_mean": 16749.9,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 1.0555110755270882,
|
|
"grad_norm": 0.4728196990146652,
|
|
"learning_rate": 8.086353398623154e-05,
|
|
"loss": 0.5914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5970526337623596,
|
|
"step": 3955,
|
|
"valid_targets_mean": 16564.7,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.0568454763811048,
|
|
"grad_norm": 0.5396700684131644,
|
|
"learning_rate": 8.068936471651308e-05,
|
|
"loss": 0.6052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5742120146751404,
|
|
"step": 3960,
|
|
"valid_targets_mean": 15665.1,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.0581798772351214,
|
|
"grad_norm": 0.39065735194712553,
|
|
"learning_rate": 8.051516458165759e-05,
|
|
"loss": 0.594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6119595170021057,
|
|
"step": 3965,
|
|
"valid_targets_mean": 15060.5,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 1.059514278089138,
|
|
"grad_norm": 0.4593056562963595,
|
|
"learning_rate": 8.0340934526711e-05,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6290844678878784,
|
|
"step": 3970,
|
|
"valid_targets_mean": 15527.7,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 1.0608486789431546,
|
|
"grad_norm": 0.40920946492053945,
|
|
"learning_rate": 8.016667549688157e-05,
|
|
"loss": 0.5954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5855837464332581,
|
|
"step": 3975,
|
|
"valid_targets_mean": 16526.8,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 1.062183079797171,
|
|
"grad_norm": 0.46026687220809426,
|
|
"learning_rate": 7.999238843753474e-05,
|
|
"loss": 0.5988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.588485598564148,
|
|
"step": 3980,
|
|
"valid_targets_mean": 15916.0,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 1.0635174806511876,
|
|
"grad_norm": 0.41338177927826364,
|
|
"learning_rate": 7.981807429418803e-05,
|
|
"loss": 0.6255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6234038472175598,
|
|
"step": 3985,
|
|
"valid_targets_mean": 15702.3,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 1.0648518815052042,
|
|
"grad_norm": 0.44812067294712354,
|
|
"learning_rate": 7.96437340125059e-05,
|
|
"loss": 0.5849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5726232528686523,
|
|
"step": 3990,
|
|
"valid_targets_mean": 17405.6,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 1.0661862823592207,
|
|
"grad_norm": 0.37211637583987855,
|
|
"learning_rate": 7.946936853829458e-05,
|
|
"loss": 0.6035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6075381636619568,
|
|
"step": 3995,
|
|
"valid_targets_mean": 16482.9,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 1.0675206832132373,
|
|
"grad_norm": 0.42373416536725417,
|
|
"learning_rate": 7.929497881749699e-05,
|
|
"loss": 0.5986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.594200611114502,
|
|
"step": 4000,
|
|
"valid_targets_mean": 17066.9,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 1.0688550840672537,
|
|
"grad_norm": 0.3914037196829223,
|
|
"learning_rate": 7.912056579618759e-05,
|
|
"loss": 0.6101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5865424871444702,
|
|
"step": 4005,
|
|
"valid_targets_mean": 16414.9,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 1.0701894849212703,
|
|
"grad_norm": 0.36788284503140656,
|
|
"learning_rate": 7.894613042056721e-05,
|
|
"loss": 0.5826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6006873250007629,
|
|
"step": 4010,
|
|
"valid_targets_mean": 15932.2,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 1.071523885775287,
|
|
"grad_norm": 0.2916092674005148,
|
|
"learning_rate": 7.877167363695805e-05,
|
|
"loss": 0.6027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6191377639770508,
|
|
"step": 4015,
|
|
"valid_targets_mean": 15171.0,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.0728582866293035,
|
|
"grad_norm": 0.3703056092797146,
|
|
"learning_rate": 7.859719639179834e-05,
|
|
"loss": 0.5863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5682851672172546,
|
|
"step": 4020,
|
|
"valid_targets_mean": 16206.3,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 1.07419268748332,
|
|
"grad_norm": 0.2758510848401202,
|
|
"learning_rate": 7.842269963163735e-05,
|
|
"loss": 0.6048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5797070264816284,
|
|
"step": 4025,
|
|
"valid_targets_mean": 17413.4,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 1.0755270883373365,
|
|
"grad_norm": 0.3275128650166002,
|
|
"learning_rate": 7.824818430313028e-05,
|
|
"loss": 0.5987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5855237245559692,
|
|
"step": 4030,
|
|
"valid_targets_mean": 16071.4,
|
|
"valid_targets_min": 61
|
|
},
|
|
{
|
|
"epoch": 1.076861489191353,
|
|
"grad_norm": 0.3412770087192014,
|
|
"learning_rate": 7.807365135303299e-05,
|
|
"loss": 0.6152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6232606172561646,
|
|
"step": 4035,
|
|
"valid_targets_mean": 15833.4,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.0781958900453696,
|
|
"grad_norm": 0.33999788333188585,
|
|
"learning_rate": 7.789910172819693e-05,
|
|
"loss": 0.5986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5924199819564819,
|
|
"step": 4040,
|
|
"valid_targets_mean": 16710.9,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.0795302908993862,
|
|
"grad_norm": 0.36260912648773397,
|
|
"learning_rate": 7.772453637556411e-05,
|
|
"loss": 0.6208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6322931051254272,
|
|
"step": 4045,
|
|
"valid_targets_mean": 16038.3,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 1.0808646917534026,
|
|
"grad_norm": 0.35220327417259206,
|
|
"learning_rate": 7.754995624216176e-05,
|
|
"loss": 0.6002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5823273658752441,
|
|
"step": 4050,
|
|
"valid_targets_mean": 16727.7,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.0821990926074192,
|
|
"grad_norm": 0.4864103700141048,
|
|
"learning_rate": 7.737536227509734e-05,
|
|
"loss": 0.5955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5839403867721558,
|
|
"step": 4055,
|
|
"valid_targets_mean": 15676.5,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 1.0835334934614358,
|
|
"grad_norm": 0.3790317408511732,
|
|
"learning_rate": 7.720075542155336e-05,
|
|
"loss": 0.6277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6375101804733276,
|
|
"step": 4060,
|
|
"valid_targets_mean": 15747.6,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 1.0848678943154524,
|
|
"grad_norm": 0.34787300200592175,
|
|
"learning_rate": 7.702613662878223e-05,
|
|
"loss": 0.6163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6184200048446655,
|
|
"step": 4065,
|
|
"valid_targets_mean": 15998.6,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 1.086202295169469,
|
|
"grad_norm": 0.27071999547248415,
|
|
"learning_rate": 7.685150684410114e-05,
|
|
"loss": 0.5985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5919818878173828,
|
|
"step": 4070,
|
|
"valid_targets_mean": 15357.0,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 1.0875366960234856,
|
|
"grad_norm": 0.32451365522562825,
|
|
"learning_rate": 7.667686701488688e-05,
|
|
"loss": 0.6014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5560876131057739,
|
|
"step": 4075,
|
|
"valid_targets_mean": 17877.9,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 1.088871096877502,
|
|
"grad_norm": 0.3881096398697237,
|
|
"learning_rate": 7.650221808857081e-05,
|
|
"loss": 0.6066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.596640408039093,
|
|
"step": 4080,
|
|
"valid_targets_mean": 16333.7,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 1.0902054977315185,
|
|
"grad_norm": 0.28008609577192756,
|
|
"learning_rate": 7.632756101263358e-05,
|
|
"loss": 0.5888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5497901439666748,
|
|
"step": 4085,
|
|
"valid_targets_mean": 17502.5,
|
|
"valid_targets_min": 415
|
|
},
|
|
{
|
|
"epoch": 1.0915398985855351,
|
|
"grad_norm": 0.40852074144248085,
|
|
"learning_rate": 7.615289673460003e-05,
|
|
"loss": 0.6023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6026955842971802,
|
|
"step": 4090,
|
|
"valid_targets_mean": 14722.9,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 1.0928742994395517,
|
|
"grad_norm": 0.3377885391386289,
|
|
"learning_rate": 7.59782262020341e-05,
|
|
"loss": 0.5906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6042886972427368,
|
|
"step": 4095,
|
|
"valid_targets_mean": 16119.0,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 1.094208700293568,
|
|
"grad_norm": 0.3636416145030373,
|
|
"learning_rate": 7.580355036253372e-05,
|
|
"loss": 0.6024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5962235331535339,
|
|
"step": 4100,
|
|
"valid_targets_mean": 14884.0,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 1.0955431011475847,
|
|
"grad_norm": 0.2796195315960068,
|
|
"learning_rate": 7.562887016372551e-05,
|
|
"loss": 0.6077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6036214828491211,
|
|
"step": 4105,
|
|
"valid_targets_mean": 15731.4,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 1.0968775020016013,
|
|
"grad_norm": 0.3801739652085176,
|
|
"learning_rate": 7.54541865532598e-05,
|
|
"loss": 0.602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5916723012924194,
|
|
"step": 4110,
|
|
"valid_targets_mean": 16717.8,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 1.0982119028556179,
|
|
"grad_norm": 0.29474613873650163,
|
|
"learning_rate": 7.527950047880543e-05,
|
|
"loss": 0.6016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6182796359062195,
|
|
"step": 4115,
|
|
"valid_targets_mean": 15451.0,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 1.0995463037096345,
|
|
"grad_norm": 0.2653542372013782,
|
|
"learning_rate": 7.51048128880446e-05,
|
|
"loss": 0.6074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6237516403198242,
|
|
"step": 4120,
|
|
"valid_targets_mean": 14658.4,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.1008807045636508,
|
|
"grad_norm": 0.2580311694584571,
|
|
"learning_rate": 7.493012472866769e-05,
|
|
"loss": 0.6027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5956270098686218,
|
|
"step": 4125,
|
|
"valid_targets_mean": 16665.0,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 1.1022151054176674,
|
|
"grad_norm": 0.33966126879080366,
|
|
"learning_rate": 7.47554369483682e-05,
|
|
"loss": 0.6022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6200779676437378,
|
|
"step": 4130,
|
|
"valid_targets_mean": 15626.4,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 1.103549506271684,
|
|
"grad_norm": 0.3325337972812342,
|
|
"learning_rate": 7.458075049483762e-05,
|
|
"loss": 0.5945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5936934947967529,
|
|
"step": 4135,
|
|
"valid_targets_mean": 16802.8,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 1.1048839071257006,
|
|
"grad_norm": 0.2800792855939298,
|
|
"learning_rate": 7.44060663157602e-05,
|
|
"loss": 0.6033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6194767951965332,
|
|
"step": 4140,
|
|
"valid_targets_mean": 16379.5,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 1.1062183079797172,
|
|
"grad_norm": 0.29176434819055114,
|
|
"learning_rate": 7.423138535880779e-05,
|
|
"loss": 0.6026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5879778861999512,
|
|
"step": 4145,
|
|
"valid_targets_mean": 16280.6,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 1.1075527088337336,
|
|
"grad_norm": 0.2835063383721158,
|
|
"learning_rate": 7.405670857163489e-05,
|
|
"loss": 0.6106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6160388588905334,
|
|
"step": 4150,
|
|
"valid_targets_mean": 16408.5,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 1.1088871096877502,
|
|
"grad_norm": 0.31251798770839306,
|
|
"learning_rate": 7.388203690187325e-05,
|
|
"loss": 0.6031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6266306638717651,
|
|
"step": 4155,
|
|
"valid_targets_mean": 14556.0,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 1.1102215105417668,
|
|
"grad_norm": 0.3064354700564066,
|
|
"learning_rate": 7.370737129712695e-05,
|
|
"loss": 0.5892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5773193836212158,
|
|
"step": 4160,
|
|
"valid_targets_mean": 16209.6,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 1.1115559113957834,
|
|
"grad_norm": 0.26141133680604506,
|
|
"learning_rate": 7.353271270496713e-05,
|
|
"loss": 0.6097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6453293561935425,
|
|
"step": 4165,
|
|
"valid_targets_mean": 15267.4,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 1.1128903122498,
|
|
"grad_norm": 0.26244489234882723,
|
|
"learning_rate": 7.335806207292687e-05,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5935405492782593,
|
|
"step": 4170,
|
|
"valid_targets_mean": 16030.8,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 1.1142247131038163,
|
|
"grad_norm": 0.3910474355938962,
|
|
"learning_rate": 7.31834203484961e-05,
|
|
"loss": 0.6063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6182336211204529,
|
|
"step": 4175,
|
|
"valid_targets_mean": 14981.8,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 1.115559113957833,
|
|
"grad_norm": 0.3150996733557663,
|
|
"learning_rate": 7.300878847911642e-05,
|
|
"loss": 0.6083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6151427626609802,
|
|
"step": 4180,
|
|
"valid_targets_mean": 15251.6,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.1168935148118495,
|
|
"grad_norm": 0.24806222257823488,
|
|
"learning_rate": 7.283416741217595e-05,
|
|
"loss": 0.5863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5705355405807495,
|
|
"step": 4185,
|
|
"valid_targets_mean": 17316.5,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 1.118227915665866,
|
|
"grad_norm": 0.3124041203185742,
|
|
"learning_rate": 7.26595580950042e-05,
|
|
"loss": 0.6158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6485847234725952,
|
|
"step": 4190,
|
|
"valid_targets_mean": 15875.1,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 1.1195623165198825,
|
|
"grad_norm": 0.37971496808314315,
|
|
"learning_rate": 7.248496147486695e-05,
|
|
"loss": 0.5857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5784488916397095,
|
|
"step": 4195,
|
|
"valid_targets_mean": 17062.4,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 1.120896717373899,
|
|
"grad_norm": 0.7932004381188809,
|
|
"learning_rate": 7.23103784989611e-05,
|
|
"loss": 0.6139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6217626929283142,
|
|
"step": 4200,
|
|
"valid_targets_mean": 15889.3,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 1.1222311182279157,
|
|
"grad_norm": 0.4868104239933692,
|
|
"learning_rate": 7.213581011440954e-05,
|
|
"loss": 0.633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6174220442771912,
|
|
"step": 4205,
|
|
"valid_targets_mean": 16341.5,
|
|
"valid_targets_min": 203
|
|
},
|
|
{
|
|
"epoch": 1.1235655190819323,
|
|
"grad_norm": 0.5902127688698259,
|
|
"learning_rate": 7.1961257268256e-05,
|
|
"loss": 0.6032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6067975759506226,
|
|
"step": 4210,
|
|
"valid_targets_mean": 14791.9,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 1.1248999199359488,
|
|
"grad_norm": 0.3536516478373195,
|
|
"learning_rate": 7.178672090745986e-05,
|
|
"loss": 0.5935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5986104011535645,
|
|
"step": 4215,
|
|
"valid_targets_mean": 16207.2,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 1.1262343207899652,
|
|
"grad_norm": 0.30077100249154387,
|
|
"learning_rate": 7.161220197889114e-05,
|
|
"loss": 0.6054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6378801465034485,
|
|
"step": 4220,
|
|
"valid_targets_mean": 16880.7,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 1.1275687216439818,
|
|
"grad_norm": 0.27729680429800824,
|
|
"learning_rate": 7.143770142932524e-05,
|
|
"loss": 0.6039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5797771215438843,
|
|
"step": 4225,
|
|
"valid_targets_mean": 17065.5,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 1.1289031224979984,
|
|
"grad_norm": 0.27223236223082237,
|
|
"learning_rate": 7.126322020543785e-05,
|
|
"loss": 0.5982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5857571363449097,
|
|
"step": 4230,
|
|
"valid_targets_mean": 15327.6,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 1.130237523352015,
|
|
"grad_norm": 0.2591278577330893,
|
|
"learning_rate": 7.108875925379991e-05,
|
|
"loss": 0.5973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5912514925003052,
|
|
"step": 4235,
|
|
"valid_targets_mean": 14846.1,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.1315719242060316,
|
|
"grad_norm": 0.31767585730342623,
|
|
"learning_rate": 7.091431952087221e-05,
|
|
"loss": 0.6178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6124032735824585,
|
|
"step": 4240,
|
|
"valid_targets_mean": 15918.7,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.132906325060048,
|
|
"grad_norm": 0.24375473388947871,
|
|
"learning_rate": 7.073990195300058e-05,
|
|
"loss": 0.5956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5829209089279175,
|
|
"step": 4245,
|
|
"valid_targets_mean": 15267.2,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 1.1342407259140646,
|
|
"grad_norm": 0.2127941498929142,
|
|
"learning_rate": 7.056550749641056e-05,
|
|
"loss": 0.6004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6242984533309937,
|
|
"step": 4250,
|
|
"valid_targets_mean": 15888.2,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 1.1355751267680811,
|
|
"grad_norm": 0.3167067915599793,
|
|
"learning_rate": 7.039113709720227e-05,
|
|
"loss": 0.6009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5977542400360107,
|
|
"step": 4255,
|
|
"valid_targets_mean": 15727.3,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 1.1369095276220977,
|
|
"grad_norm": 0.27993316101713944,
|
|
"learning_rate": 7.021679170134533e-05,
|
|
"loss": 0.5925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.587902843952179,
|
|
"step": 4260,
|
|
"valid_targets_mean": 16104.0,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 1.1382439284761143,
|
|
"grad_norm": 0.31043922998417844,
|
|
"learning_rate": 7.004247225467381e-05,
|
|
"loss": 0.5966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5865461230278015,
|
|
"step": 4265,
|
|
"valid_targets_mean": 16515.5,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.1395783293301307,
|
|
"grad_norm": 0.2689763541971217,
|
|
"learning_rate": 6.986817970288082e-05,
|
|
"loss": 0.6006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5762760639190674,
|
|
"step": 4270,
|
|
"valid_targets_mean": 16571.0,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 1.1409127301841473,
|
|
"grad_norm": 0.22897307468939943,
|
|
"learning_rate": 6.969391499151371e-05,
|
|
"loss": 0.5863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5768278241157532,
|
|
"step": 4275,
|
|
"valid_targets_mean": 15187.9,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 1.1422471310381639,
|
|
"grad_norm": 0.2574952643742215,
|
|
"learning_rate": 6.95196790659688e-05,
|
|
"loss": 0.5983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6268581748008728,
|
|
"step": 4280,
|
|
"valid_targets_mean": 15087.0,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 1.1435815318921805,
|
|
"grad_norm": 0.31001744346505816,
|
|
"learning_rate": 6.934547287148614e-05,
|
|
"loss": 0.6097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6045074462890625,
|
|
"step": 4285,
|
|
"valid_targets_mean": 15647.8,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 1.1449159327461969,
|
|
"grad_norm": 0.27988511418077017,
|
|
"learning_rate": 6.917129735314455e-05,
|
|
"loss": 0.6038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6157107949256897,
|
|
"step": 4290,
|
|
"valid_targets_mean": 15646.0,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 1.1462503336002134,
|
|
"grad_norm": 0.284623906189759,
|
|
"learning_rate": 6.899715345585649e-05,
|
|
"loss": 0.5861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6071814298629761,
|
|
"step": 4295,
|
|
"valid_targets_mean": 16459.4,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 1.14758473445423,
|
|
"grad_norm": 0.23613016291869346,
|
|
"learning_rate": 6.882304212436272e-05,
|
|
"loss": 0.6057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5901815891265869,
|
|
"step": 4300,
|
|
"valid_targets_mean": 16384.8,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 1.1489191353082466,
|
|
"grad_norm": 0.7099036688341175,
|
|
"learning_rate": 6.864896430322745e-05,
|
|
"loss": 0.6244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6605739593505859,
|
|
"step": 4305,
|
|
"valid_targets_mean": 15084.9,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 1.1502535361622632,
|
|
"grad_norm": 0.2660867664547303,
|
|
"learning_rate": 6.84749209368331e-05,
|
|
"loss": 0.5833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5416364669799805,
|
|
"step": 4310,
|
|
"valid_targets_mean": 17516.2,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 1.1515879370162796,
|
|
"grad_norm": 0.24899135484110319,
|
|
"learning_rate": 6.830091296937509e-05,
|
|
"loss": 0.5968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5933257341384888,
|
|
"step": 4315,
|
|
"valid_targets_mean": 15933.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 1.1529223378702962,
|
|
"grad_norm": 0.28429870191220086,
|
|
"learning_rate": 6.812694134485686e-05,
|
|
"loss": 0.6016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5702193379402161,
|
|
"step": 4320,
|
|
"valid_targets_mean": 16298.5,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 1.1542567387243128,
|
|
"grad_norm": 0.3432717505307207,
|
|
"learning_rate": 6.795300700708466e-05,
|
|
"loss": 0.6048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5982928276062012,
|
|
"step": 4325,
|
|
"valid_targets_mean": 16746.6,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 1.1555911395783294,
|
|
"grad_norm": 0.30284190514883463,
|
|
"learning_rate": 6.777911089966245e-05,
|
|
"loss": 0.5935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6121088266372681,
|
|
"step": 4330,
|
|
"valid_targets_mean": 15415.2,
|
|
"valid_targets_min": 63
|
|
},
|
|
{
|
|
"epoch": 1.156925540432346,
|
|
"grad_norm": 0.30760369252329195,
|
|
"learning_rate": 6.760525396598686e-05,
|
|
"loss": 0.6142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6483354568481445,
|
|
"step": 4335,
|
|
"valid_targets_mean": 15703.1,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 1.1582599412863623,
|
|
"grad_norm": 0.35568703892758397,
|
|
"learning_rate": 6.74314371492419e-05,
|
|
"loss": 0.5889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6182370185852051,
|
|
"step": 4340,
|
|
"valid_targets_mean": 15757.3,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 1.159594342140379,
|
|
"grad_norm": 0.2984066417689251,
|
|
"learning_rate": 6.725766139239399e-05,
|
|
"loss": 0.6017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5672701597213745,
|
|
"step": 4345,
|
|
"valid_targets_mean": 15679.0,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 1.1609287429943955,
|
|
"grad_norm": 0.35563150663132786,
|
|
"learning_rate": 6.708392763818681e-05,
|
|
"loss": 0.5988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5748165249824524,
|
|
"step": 4350,
|
|
"valid_targets_mean": 15702.1,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 1.1622631438484121,
|
|
"grad_norm": 0.4725264739524481,
|
|
"learning_rate": 6.691023682913616e-05,
|
|
"loss": 0.6136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6290553212165833,
|
|
"step": 4355,
|
|
"valid_targets_mean": 17302.8,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 1.1635975447024287,
|
|
"grad_norm": 0.41649020656338065,
|
|
"learning_rate": 6.673658990752484e-05,
|
|
"loss": 0.5954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5825016498565674,
|
|
"step": 4360,
|
|
"valid_targets_mean": 16416.6,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 1.164931945556445,
|
|
"grad_norm": 0.4726742196256588,
|
|
"learning_rate": 6.656298781539764e-05,
|
|
"loss": 0.5896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5909591913223267,
|
|
"step": 4365,
|
|
"valid_targets_mean": 16828.3,
|
|
"valid_targets_min": 133
|
|
},
|
|
{
|
|
"epoch": 1.1662663464104617,
|
|
"grad_norm": 0.3577791658502569,
|
|
"learning_rate": 6.638943149455598e-05,
|
|
"loss": 0.6066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5973008275032043,
|
|
"step": 4370,
|
|
"valid_targets_mean": 15010.5,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.1676007472644783,
|
|
"grad_norm": 0.45347121217280895,
|
|
"learning_rate": 6.621592188655314e-05,
|
|
"loss": 0.5965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5748577117919922,
|
|
"step": 4375,
|
|
"valid_targets_mean": 15569.6,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 1.1689351481184949,
|
|
"grad_norm": 0.3649598802805446,
|
|
"learning_rate": 6.604245993268893e-05,
|
|
"loss": 0.5927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5843199491500854,
|
|
"step": 4380,
|
|
"valid_targets_mean": 16538.5,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 1.1702695489725112,
|
|
"grad_norm": 0.397739643309116,
|
|
"learning_rate": 6.586904657400457e-05,
|
|
"loss": 0.5992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5765359401702881,
|
|
"step": 4385,
|
|
"valid_targets_mean": 15469.5,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 1.1716039498265278,
|
|
"grad_norm": 0.37300616388581065,
|
|
"learning_rate": 6.569568275127769e-05,
|
|
"loss": 0.5812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5807654857635498,
|
|
"step": 4390,
|
|
"valid_targets_mean": 16243.7,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 1.1729383506805444,
|
|
"grad_norm": 0.3840703505859544,
|
|
"learning_rate": 6.552236940501725e-05,
|
|
"loss": 0.5828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5711965560913086,
|
|
"step": 4395,
|
|
"valid_targets_mean": 14674.8,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 1.174272751534561,
|
|
"grad_norm": 0.37863289411399653,
|
|
"learning_rate": 6.534910747545825e-05,
|
|
"loss": 0.5854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5958431363105774,
|
|
"step": 4400,
|
|
"valid_targets_mean": 15368.2,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 1.1756071523885776,
|
|
"grad_norm": 0.4477581792181912,
|
|
"learning_rate": 6.517589790255686e-05,
|
|
"loss": 0.6129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6343816518783569,
|
|
"step": 4405,
|
|
"valid_targets_mean": 14593.0,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 1.1769415532425942,
|
|
"grad_norm": 0.3270473622838311,
|
|
"learning_rate": 6.500274162598512e-05,
|
|
"loss": 0.606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6062087416648865,
|
|
"step": 4410,
|
|
"valid_targets_mean": 15336.0,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 1.1782759540966106,
|
|
"grad_norm": 0.4103892887347845,
|
|
"learning_rate": 6.482963958512599e-05,
|
|
"loss": 0.598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6063992977142334,
|
|
"step": 4415,
|
|
"valid_targets_mean": 15912.9,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 1.1796103549506272,
|
|
"grad_norm": 0.3631692668594323,
|
|
"learning_rate": 6.465659271906823e-05,
|
|
"loss": 0.593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5838413238525391,
|
|
"step": 4420,
|
|
"valid_targets_mean": 16326.5,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 1.1809447558046438,
|
|
"grad_norm": 0.3752812321904994,
|
|
"learning_rate": 6.448360196660116e-05,
|
|
"loss": 0.6013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5776430368423462,
|
|
"step": 4425,
|
|
"valid_targets_mean": 16754.6,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 1.1822791566586603,
|
|
"grad_norm": 0.3642942093840262,
|
|
"learning_rate": 6.43106682662098e-05,
|
|
"loss": 0.5937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5928678512573242,
|
|
"step": 4430,
|
|
"valid_targets_mean": 15495.9,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 1.1836135575126767,
|
|
"grad_norm": 0.3520645453564209,
|
|
"learning_rate": 6.413779255606961e-05,
|
|
"loss": 0.5813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5934560894966125,
|
|
"step": 4435,
|
|
"valid_targets_mean": 15091.7,
|
|
"valid_targets_min": 171
|
|
},
|
|
{
|
|
"epoch": 1.1849479583666933,
|
|
"grad_norm": 0.21548984155232773,
|
|
"learning_rate": 6.396497577404143e-05,
|
|
"loss": 0.6091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.612282395362854,
|
|
"step": 4440,
|
|
"valid_targets_mean": 16170.2,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 1.18628235922071,
|
|
"grad_norm": 0.21533211012293957,
|
|
"learning_rate": 6.379221885766643e-05,
|
|
"loss": 0.5939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5665504932403564,
|
|
"step": 4445,
|
|
"valid_targets_mean": 16189.7,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 1.1876167600747265,
|
|
"grad_norm": 0.43172732636361405,
|
|
"learning_rate": 6.361952274416106e-05,
|
|
"loss": 0.5896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5815595984458923,
|
|
"step": 4450,
|
|
"valid_targets_mean": 17214.6,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 1.188951160928743,
|
|
"grad_norm": 0.3749421262001143,
|
|
"learning_rate": 6.344688837041177e-05,
|
|
"loss": 0.5932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5799713134765625,
|
|
"step": 4455,
|
|
"valid_targets_mean": 18348.3,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 1.1902855617827595,
|
|
"grad_norm": 0.3493641365006668,
|
|
"learning_rate": 6.32743166729702e-05,
|
|
"loss": 0.6017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5951375365257263,
|
|
"step": 4460,
|
|
"valid_targets_mean": 16188.1,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.191619962636776,
|
|
"grad_norm": 0.3065563699541395,
|
|
"learning_rate": 6.310180858804794e-05,
|
|
"loss": 0.6135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.61883544921875,
|
|
"step": 4465,
|
|
"valid_targets_mean": 15588.0,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 1.1929543634907926,
|
|
"grad_norm": 0.36380231112006145,
|
|
"learning_rate": 6.292936505151145e-05,
|
|
"loss": 0.6066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6210154294967651,
|
|
"step": 4470,
|
|
"valid_targets_mean": 14523.7,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.1942887643448092,
|
|
"grad_norm": 0.3314227814860259,
|
|
"learning_rate": 6.275698699887699e-05,
|
|
"loss": 0.5821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6138677000999451,
|
|
"step": 4475,
|
|
"valid_targets_mean": 16254.9,
|
|
"valid_targets_min": 57
|
|
},
|
|
{
|
|
"epoch": 1.1956231651988256,
|
|
"grad_norm": 0.3529872358081249,
|
|
"learning_rate": 6.258467536530565e-05,
|
|
"loss": 0.5997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5828440189361572,
|
|
"step": 4480,
|
|
"valid_targets_mean": 15555.3,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 1.1969575660528422,
|
|
"grad_norm": 0.27354690781074714,
|
|
"learning_rate": 6.241243108559811e-05,
|
|
"loss": 0.6013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5828770399093628,
|
|
"step": 4485,
|
|
"valid_targets_mean": 16204.0,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 1.1982919669068588,
|
|
"grad_norm": 0.24452469247733344,
|
|
"learning_rate": 6.224025509418971e-05,
|
|
"loss": 0.6148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6097055673599243,
|
|
"step": 4490,
|
|
"valid_targets_mean": 14923.2,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 1.1996263677608754,
|
|
"grad_norm": 0.25794393835991686,
|
|
"learning_rate": 6.206814832514525e-05,
|
|
"loss": 0.5996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6198200583457947,
|
|
"step": 4495,
|
|
"valid_targets_mean": 15636.4,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 1.200960768614892,
|
|
"grad_norm": 0.3697274323238262,
|
|
"learning_rate": 6.189611171215405e-05,
|
|
"loss": 0.5956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6122844219207764,
|
|
"step": 4500,
|
|
"valid_targets_mean": 16200.8,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 1.2022951694689086,
|
|
"grad_norm": 0.2851304729295163,
|
|
"learning_rate": 6.172414618852488e-05,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6054625511169434,
|
|
"step": 4505,
|
|
"valid_targets_mean": 17160.3,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 1.203629570322925,
|
|
"grad_norm": 0.2603392404494807,
|
|
"learning_rate": 6.155225268718069e-05,
|
|
"loss": 0.5858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6194175481796265,
|
|
"step": 4510,
|
|
"valid_targets_mean": 15548.8,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 1.2049639711769415,
|
|
"grad_norm": 0.23632978796159737,
|
|
"learning_rate": 6.138043214065385e-05,
|
|
"loss": 0.5931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5853152871131897,
|
|
"step": 4515,
|
|
"valid_targets_mean": 16253.5,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 1.2062983720309581,
|
|
"grad_norm": 0.4844945781749991,
|
|
"learning_rate": 6.120868548108093e-05,
|
|
"loss": 0.5874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6011490821838379,
|
|
"step": 4520,
|
|
"valid_targets_mean": 15423.4,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 1.2076327728849747,
|
|
"grad_norm": 0.2819030369362368,
|
|
"learning_rate": 6.103701364019754e-05,
|
|
"loss": 0.5992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5920343995094299,
|
|
"step": 4525,
|
|
"valid_targets_mean": 16631.1,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 1.208967173738991,
|
|
"grad_norm": 0.2918306704913203,
|
|
"learning_rate": 6.0865417549333536e-05,
|
|
"loss": 0.6078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6191456317901611,
|
|
"step": 4530,
|
|
"valid_targets_mean": 15389.3,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.2103015745930077,
|
|
"grad_norm": 0.26350998098499123,
|
|
"learning_rate": 6.0693898139407786e-05,
|
|
"loss": 0.6084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6131468415260315,
|
|
"step": 4535,
|
|
"valid_targets_mean": 15042.1,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 1.2116359754470243,
|
|
"grad_norm": 0.34680713886085474,
|
|
"learning_rate": 6.0522456340923085e-05,
|
|
"loss": 0.593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6055951118469238,
|
|
"step": 4540,
|
|
"valid_targets_mean": 15969.4,
|
|
"valid_targets_min": 154
|
|
},
|
|
{
|
|
"epoch": 1.2129703763010409,
|
|
"grad_norm": 0.2991580039441314,
|
|
"learning_rate": 6.0351093083961267e-05,
|
|
"loss": 0.5874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.596157431602478,
|
|
"step": 4545,
|
|
"valid_targets_mean": 15783.5,
|
|
"valid_targets_min": 30
|
|
},
|
|
{
|
|
"epoch": 1.2143047771550575,
|
|
"grad_norm": 0.31461751378381264,
|
|
"learning_rate": 6.017980929817807e-05,
|
|
"loss": 0.596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6240620017051697,
|
|
"step": 4550,
|
|
"valid_targets_mean": 16475.7,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 1.2156391780090738,
|
|
"grad_norm": 0.2880609654236511,
|
|
"learning_rate": 6.000860591279801e-05,
|
|
"loss": 0.5988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6084235906600952,
|
|
"step": 4555,
|
|
"valid_targets_mean": 14790.1,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 1.2169735788630904,
|
|
"grad_norm": 0.4516901675082028,
|
|
"learning_rate": 5.9837483856609527e-05,
|
|
"loss": 0.6011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5949472188949585,
|
|
"step": 4560,
|
|
"valid_targets_mean": 16267.4,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 1.218307979717107,
|
|
"grad_norm": 0.3693560610908532,
|
|
"learning_rate": 5.966644405795982e-05,
|
|
"loss": 0.5963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6293297410011292,
|
|
"step": 4565,
|
|
"valid_targets_mean": 16069.2,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.2196423805711236,
|
|
"grad_norm": 0.3723486353807144,
|
|
"learning_rate": 5.949548744474978e-05,
|
|
"loss": 0.5878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.569554328918457,
|
|
"step": 4570,
|
|
"valid_targets_mean": 15508.8,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 1.22097678142514,
|
|
"grad_norm": 0.3842529043040275,
|
|
"learning_rate": 5.9324614944429095e-05,
|
|
"loss": 0.6043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5914621949195862,
|
|
"step": 4575,
|
|
"valid_targets_mean": 15604.2,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 1.2223111822791566,
|
|
"grad_norm": 0.37655008091615877,
|
|
"learning_rate": 5.915382748399105e-05,
|
|
"loss": 0.6088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5930585265159607,
|
|
"step": 4580,
|
|
"valid_targets_mean": 16328.4,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 1.2236455831331732,
|
|
"grad_norm": 0.37071745643114606,
|
|
"learning_rate": 5.8983125989967646e-05,
|
|
"loss": 0.6147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6511808633804321,
|
|
"step": 4585,
|
|
"valid_targets_mean": 16443.6,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 1.2249799839871898,
|
|
"grad_norm": 0.39020668353439725,
|
|
"learning_rate": 5.881251138842453e-05,
|
|
"loss": 0.5941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.561487078666687,
|
|
"step": 4590,
|
|
"valid_targets_mean": 16465.2,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 1.2263143848412064,
|
|
"grad_norm": 0.23579950042688647,
|
|
"learning_rate": 5.864198460495585e-05,
|
|
"loss": 0.6006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5959711670875549,
|
|
"step": 4595,
|
|
"valid_targets_mean": 15317.1,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 1.227648785695223,
|
|
"grad_norm": 0.3149893359674708,
|
|
"learning_rate": 5.847154656467944e-05,
|
|
"loss": 0.5973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5710833072662354,
|
|
"step": 4600,
|
|
"valid_targets_mean": 16656.5,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 1.2289831865492393,
|
|
"grad_norm": 0.296258079297135,
|
|
"learning_rate": 5.830119819223166e-05,
|
|
"loss": 0.593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5951532125473022,
|
|
"step": 4605,
|
|
"valid_targets_mean": 16023.0,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.230317587403256,
|
|
"grad_norm": 0.3802746063275357,
|
|
"learning_rate": 5.8130940411762406e-05,
|
|
"loss": 0.5908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6089481711387634,
|
|
"step": 4610,
|
|
"valid_targets_mean": 17038.8,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 1.2316519882572725,
|
|
"grad_norm": 0.3478708443012052,
|
|
"learning_rate": 5.79607741469301e-05,
|
|
"loss": 0.6099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5820724964141846,
|
|
"step": 4615,
|
|
"valid_targets_mean": 16394.3,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.232986389111289,
|
|
"grad_norm": 0.38691919321426016,
|
|
"learning_rate": 5.779070032089674e-05,
|
|
"loss": 0.6041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6047302484512329,
|
|
"step": 4620,
|
|
"valid_targets_mean": 15922.8,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 1.2343207899653055,
|
|
"grad_norm": 0.3068426655265928,
|
|
"learning_rate": 5.762071985632276e-05,
|
|
"loss": 0.5923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5962592959403992,
|
|
"step": 4625,
|
|
"valid_targets_mean": 16840.9,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 1.235655190819322,
|
|
"grad_norm": 0.3978013587115205,
|
|
"learning_rate": 5.745083367536217e-05,
|
|
"loss": 0.5906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5848004817962646,
|
|
"step": 4630,
|
|
"valid_targets_mean": 15698.9,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 1.2369895916733387,
|
|
"grad_norm": 0.32933567857837664,
|
|
"learning_rate": 5.7281042699657465e-05,
|
|
"loss": 0.604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6038892865180969,
|
|
"step": 4635,
|
|
"valid_targets_mean": 16009.4,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 1.2383239925273553,
|
|
"grad_norm": 0.38848421103646785,
|
|
"learning_rate": 5.71113478503346e-05,
|
|
"loss": 0.598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6145868301391602,
|
|
"step": 4640,
|
|
"valid_targets_mean": 16181.2,
|
|
"valid_targets_min": 31
|
|
},
|
|
{
|
|
"epoch": 1.2396583933813718,
|
|
"grad_norm": 0.30941335141012877,
|
|
"learning_rate": 5.694175004799814e-05,
|
|
"loss": 0.602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5926676988601685,
|
|
"step": 4645,
|
|
"valid_targets_mean": 16058.1,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 1.2409927942353882,
|
|
"grad_norm": 0.3053881051934955,
|
|
"learning_rate": 5.6772250212726025e-05,
|
|
"loss": 0.5888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5941856503486633,
|
|
"step": 4650,
|
|
"valid_targets_mean": 14780.3,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.2423271950894048,
|
|
"grad_norm": 0.25752081061216897,
|
|
"learning_rate": 5.660284926406484e-05,
|
|
"loss": 0.5901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6042780876159668,
|
|
"step": 4655,
|
|
"valid_targets_mean": 16847.3,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.2436615959434214,
|
|
"grad_norm": 0.3718012087846061,
|
|
"learning_rate": 5.64335481210247e-05,
|
|
"loss": 0.6052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6086214184761047,
|
|
"step": 4660,
|
|
"valid_targets_mean": 17391.6,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 1.244995996797438,
|
|
"grad_norm": 0.30488851628559444,
|
|
"learning_rate": 5.626434770207414e-05,
|
|
"loss": 0.5926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5924539566040039,
|
|
"step": 4665,
|
|
"valid_targets_mean": 16268.7,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 1.2463303976514546,
|
|
"grad_norm": 0.3097058970074245,
|
|
"learning_rate": 5.60952489251354e-05,
|
|
"loss": 0.5929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5813044309616089,
|
|
"step": 4670,
|
|
"valid_targets_mean": 17170.6,
|
|
"valid_targets_min": 43
|
|
},
|
|
{
|
|
"epoch": 1.247664798505471,
|
|
"grad_norm": 0.3720595051065334,
|
|
"learning_rate": 5.592625270757928e-05,
|
|
"loss": 0.6034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6146562099456787,
|
|
"step": 4675,
|
|
"valid_targets_mean": 14678.0,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 1.2489991993594876,
|
|
"grad_norm": 0.2834506741884633,
|
|
"learning_rate": 5.575735996622011e-05,
|
|
"loss": 0.5839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.568305253982544,
|
|
"step": 4680,
|
|
"valid_targets_mean": 17661.6,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.2503336002135041,
|
|
"grad_norm": 0.2976879759606349,
|
|
"learning_rate": 5.558857161731093e-05,
|
|
"loss": 0.5984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6380177736282349,
|
|
"step": 4685,
|
|
"valid_targets_mean": 15599.7,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 1.2516680010675207,
|
|
"grad_norm": 0.2513140469642916,
|
|
"learning_rate": 5.541988857653847e-05,
|
|
"loss": 0.5965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6032347679138184,
|
|
"step": 4690,
|
|
"valid_targets_mean": 15771.1,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.2530024019215373,
|
|
"grad_norm": 0.24123673240298785,
|
|
"learning_rate": 5.5251311759018046e-05,
|
|
"loss": 0.5945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6128364205360413,
|
|
"step": 4695,
|
|
"valid_targets_mean": 15270.5,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.2543368027755537,
|
|
"grad_norm": 0.22916415274619137,
|
|
"learning_rate": 5.508284207928879e-05,
|
|
"loss": 0.5964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6064445972442627,
|
|
"step": 4700,
|
|
"valid_targets_mean": 15476.5,
|
|
"valid_targets_min": 33
|
|
},
|
|
{
|
|
"epoch": 1.2556712036295703,
|
|
"grad_norm": 0.23691348003106713,
|
|
"learning_rate": 5.491448045130865e-05,
|
|
"loss": 0.5897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5725358128547668,
|
|
"step": 4705,
|
|
"valid_targets_mean": 17270.2,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 1.257005604483587,
|
|
"grad_norm": 0.19993496434571217,
|
|
"learning_rate": 5.4746227788449236e-05,
|
|
"loss": 0.5852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5608433485031128,
|
|
"step": 4710,
|
|
"valid_targets_mean": 17248.0,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.2583400053376035,
|
|
"grad_norm": 0.19404118932882974,
|
|
"learning_rate": 5.457808500349115e-05,
|
|
"loss": 0.5992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6061972379684448,
|
|
"step": 4715,
|
|
"valid_targets_mean": 15697.4,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 1.2596744061916199,
|
|
"grad_norm": 0.20080011898223393,
|
|
"learning_rate": 5.441005300861887e-05,
|
|
"loss": 0.5964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6121039986610413,
|
|
"step": 4720,
|
|
"valid_targets_mean": 15350.3,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 1.2610088070456364,
|
|
"grad_norm": 0.21027729915005294,
|
|
"learning_rate": 5.424213271541578e-05,
|
|
"loss": 0.6162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6112270355224609,
|
|
"step": 4725,
|
|
"valid_targets_mean": 16553.9,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 1.262343207899653,
|
|
"grad_norm": 0.19841926561522874,
|
|
"learning_rate": 5.4074325034859336e-05,
|
|
"loss": 0.5928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5819110870361328,
|
|
"step": 4730,
|
|
"valid_targets_mean": 15418.9,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 1.2636776087536696,
|
|
"grad_norm": 0.19495520891773752,
|
|
"learning_rate": 5.390663087731605e-05,
|
|
"loss": 0.6071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5915564894676208,
|
|
"step": 4735,
|
|
"valid_targets_mean": 16489.7,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 1.2650120096076862,
|
|
"grad_norm": 0.28836513173381045,
|
|
"learning_rate": 5.3739051152536506e-05,
|
|
"loss": 0.5859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5791863203048706,
|
|
"step": 4740,
|
|
"valid_targets_mean": 16232.9,
|
|
"valid_targets_min": 60
|
|
},
|
|
{
|
|
"epoch": 1.2663464104617028,
|
|
"grad_norm": 0.25722341253251046,
|
|
"learning_rate": 5.357158676965059e-05,
|
|
"loss": 0.5996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6200342178344727,
|
|
"step": 4745,
|
|
"valid_targets_mean": 16703.4,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 1.2676808113157192,
|
|
"grad_norm": 0.22747725453073225,
|
|
"learning_rate": 5.3404238637162364e-05,
|
|
"loss": 0.6024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6098017692565918,
|
|
"step": 4750,
|
|
"valid_targets_mean": 16243.7,
|
|
"valid_targets_min": 122
|
|
},
|
|
{
|
|
"epoch": 1.2690152121697358,
|
|
"grad_norm": 0.23271601612942164,
|
|
"learning_rate": 5.323700766294526e-05,
|
|
"loss": 0.5936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5844199657440186,
|
|
"step": 4755,
|
|
"valid_targets_mean": 15059.6,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 1.2703496130237524,
|
|
"grad_norm": 0.2282372019798268,
|
|
"learning_rate": 5.306989475423712e-05,
|
|
"loss": 0.5917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.596099853515625,
|
|
"step": 4760,
|
|
"valid_targets_mean": 16810.2,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.2716840138777687,
|
|
"grad_norm": 0.22572255703523167,
|
|
"learning_rate": 5.2902900817635264e-05,
|
|
"loss": 0.6058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6014312505722046,
|
|
"step": 4765,
|
|
"valid_targets_mean": 14895.7,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 1.2730184147317853,
|
|
"grad_norm": 0.28794370507115435,
|
|
"learning_rate": 5.273602675909159e-05,
|
|
"loss": 0.6186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6221282482147217,
|
|
"step": 4770,
|
|
"valid_targets_mean": 16150.4,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.274352815585802,
|
|
"grad_norm": 0.21205356922803897,
|
|
"learning_rate": 5.256927348390765e-05,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5996368527412415,
|
|
"step": 4775,
|
|
"valid_targets_mean": 16298.7,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 1.2756872164398185,
|
|
"grad_norm": 0.21155388741611245,
|
|
"learning_rate": 5.24026418967297e-05,
|
|
"loss": 0.599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6262991428375244,
|
|
"step": 4780,
|
|
"valid_targets_mean": 15940.4,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 1.2770216172938351,
|
|
"grad_norm": 0.22995007264829823,
|
|
"learning_rate": 5.2236132901543895e-05,
|
|
"loss": 0.5832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6046526432037354,
|
|
"step": 4785,
|
|
"valid_targets_mean": 15612.3,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 1.2783560181478517,
|
|
"grad_norm": 0.3215664591779135,
|
|
"learning_rate": 5.2069747401671334e-05,
|
|
"loss": 0.595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6015994548797607,
|
|
"step": 4790,
|
|
"valid_targets_mean": 15822.1,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 1.279690419001868,
|
|
"grad_norm": 0.24392939420188084,
|
|
"learning_rate": 5.1903486299763026e-05,
|
|
"loss": 0.6005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5739015936851501,
|
|
"step": 4795,
|
|
"valid_targets_mean": 16372.1,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 1.2810248198558847,
|
|
"grad_norm": 0.25390096114052607,
|
|
"learning_rate": 5.173735049779523e-05,
|
|
"loss": 0.6083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5909823775291443,
|
|
"step": 4800,
|
|
"valid_targets_mean": 16781.5,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 1.2823592207099013,
|
|
"grad_norm": 0.26585673297614587,
|
|
"learning_rate": 5.1571340897064454e-05,
|
|
"loss": 0.5984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6485387086868286,
|
|
"step": 4805,
|
|
"valid_targets_mean": 14960.3,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 1.2836936215639179,
|
|
"grad_norm": 0.429583415938752,
|
|
"learning_rate": 5.140545839818242e-05,
|
|
"loss": 0.5876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5757549405097961,
|
|
"step": 4810,
|
|
"valid_targets_mean": 16498.8,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 1.2850280224179342,
|
|
"grad_norm": 0.20010540211949035,
|
|
"learning_rate": 5.1239703901071506e-05,
|
|
"loss": 0.5784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5868698358535767,
|
|
"step": 4815,
|
|
"valid_targets_mean": 16137.8,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 1.2863624232719508,
|
|
"grad_norm": 0.277590468805937,
|
|
"learning_rate": 5.1074078304959474e-05,
|
|
"loss": 0.5942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5904673933982849,
|
|
"step": 4820,
|
|
"valid_targets_mean": 15318.7,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.2876968241259674,
|
|
"grad_norm": 0.19379932646177,
|
|
"learning_rate": 5.090858250837495e-05,
|
|
"loss": 0.6019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5824383497238159,
|
|
"step": 4825,
|
|
"valid_targets_mean": 15670.2,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 1.289031224979984,
|
|
"grad_norm": 0.22667207340100606,
|
|
"learning_rate": 5.0743217409142344e-05,
|
|
"loss": 0.5932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.565783679485321,
|
|
"step": 4830,
|
|
"valid_targets_mean": 17687.1,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 1.2903656258340006,
|
|
"grad_norm": 0.21887335909403685,
|
|
"learning_rate": 5.057798390437696e-05,
|
|
"loss": 0.6023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5865060091018677,
|
|
"step": 4835,
|
|
"valid_targets_mean": 17334.3,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 1.2917000266880172,
|
|
"grad_norm": 0.19635681305442218,
|
|
"learning_rate": 5.0412882890480266e-05,
|
|
"loss": 0.5968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5752499103546143,
|
|
"step": 4840,
|
|
"valid_targets_mean": 16667.8,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 1.2930344275420336,
|
|
"grad_norm": 0.22509479283485295,
|
|
"learning_rate": 5.0247915263134984e-05,
|
|
"loss": 0.6027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5966780781745911,
|
|
"step": 4845,
|
|
"valid_targets_mean": 16138.8,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.2943688283960502,
|
|
"grad_norm": 0.22523252216211342,
|
|
"learning_rate": 5.0083081917300086e-05,
|
|
"loss": 0.6145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6485409736633301,
|
|
"step": 4850,
|
|
"valid_targets_mean": 16019.0,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 1.2957032292500668,
|
|
"grad_norm": 0.19342715348483416,
|
|
"learning_rate": 4.991838374720618e-05,
|
|
"loss": 0.6125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5838373899459839,
|
|
"step": 4855,
|
|
"valid_targets_mean": 15989.9,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 1.2970376301040831,
|
|
"grad_norm": 0.22246829854069744,
|
|
"learning_rate": 4.975382164635051e-05,
|
|
"loss": 0.5791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5719879269599915,
|
|
"step": 4860,
|
|
"valid_targets_mean": 15469.9,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 1.2983720309580997,
|
|
"grad_norm": 0.2253572157609349,
|
|
"learning_rate": 4.9589396507492044e-05,
|
|
"loss": 0.6033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6229650974273682,
|
|
"step": 4865,
|
|
"valid_targets_mean": 16063.2,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 1.2997064318121163,
|
|
"grad_norm": 0.17878954418677784,
|
|
"learning_rate": 4.942510922264686e-05,
|
|
"loss": 0.6003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.559048056602478,
|
|
"step": 4870,
|
|
"valid_targets_mean": 16705.4,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 1.301040832666133,
|
|
"grad_norm": 0.2048965948022605,
|
|
"learning_rate": 4.926096068308312e-05,
|
|
"loss": 0.593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6001508235931396,
|
|
"step": 4875,
|
|
"valid_targets_mean": 15511.5,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 1.3023752335201495,
|
|
"grad_norm": 0.23779288166871065,
|
|
"learning_rate": 4.90969517793162e-05,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.622168779373169,
|
|
"step": 4880,
|
|
"valid_targets_mean": 16736.6,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.303709634374166,
|
|
"grad_norm": 0.21782401030658538,
|
|
"learning_rate": 4.893308340110407e-05,
|
|
"loss": 0.6022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5979602932929993,
|
|
"step": 4885,
|
|
"valid_targets_mean": 15912.7,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 1.3050440352281825,
|
|
"grad_norm": 0.27079239859218573,
|
|
"learning_rate": 4.876935643744229e-05,
|
|
"loss": 0.6077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.662260115146637,
|
|
"step": 4890,
|
|
"valid_targets_mean": 15808.6,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 1.306378436082199,
|
|
"grad_norm": 0.208975210660639,
|
|
"learning_rate": 4.860577177655922e-05,
|
|
"loss": 0.6017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6019346117973328,
|
|
"step": 4895,
|
|
"valid_targets_mean": 16212.6,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 1.3077128369362157,
|
|
"grad_norm": 0.3092531887221787,
|
|
"learning_rate": 4.844233030591122e-05,
|
|
"loss": 0.5987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6115128397941589,
|
|
"step": 4900,
|
|
"valid_targets_mean": 16733.5,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 1.3090472377902322,
|
|
"grad_norm": 0.23607040987981687,
|
|
"learning_rate": 4.827903291217785e-05,
|
|
"loss": 0.5963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.610486626625061,
|
|
"step": 4905,
|
|
"valid_targets_mean": 15933.8,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 1.3103816386442486,
|
|
"grad_norm": 0.22198054742229872,
|
|
"learning_rate": 4.8115880481257066e-05,
|
|
"loss": 0.5979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5951927304267883,
|
|
"step": 4910,
|
|
"valid_targets_mean": 15765.8,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.3117160394982652,
|
|
"grad_norm": 0.2359077023791005,
|
|
"learning_rate": 4.795287389826035e-05,
|
|
"loss": 0.5967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.566422700881958,
|
|
"step": 4915,
|
|
"valid_targets_mean": 16070.0,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 1.3130504403522818,
|
|
"grad_norm": 0.23498321132713004,
|
|
"learning_rate": 4.779001404750797e-05,
|
|
"loss": 0.5878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.567772388458252,
|
|
"step": 4920,
|
|
"valid_targets_mean": 18758.8,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 1.3143848412062984,
|
|
"grad_norm": 0.1865459120511662,
|
|
"learning_rate": 4.762730181252415e-05,
|
|
"loss": 0.6056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6111925840377808,
|
|
"step": 4925,
|
|
"valid_targets_mean": 15379.9,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 1.315719242060315,
|
|
"grad_norm": 0.22937962717615007,
|
|
"learning_rate": 4.746473807603235e-05,
|
|
"loss": 0.581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5950619578361511,
|
|
"step": 4930,
|
|
"valid_targets_mean": 14893.1,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 1.3170536429143316,
|
|
"grad_norm": 0.20591200810593613,
|
|
"learning_rate": 4.730232371995029e-05,
|
|
"loss": 0.6075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.629429042339325,
|
|
"step": 4935,
|
|
"valid_targets_mean": 15851.1,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 1.318388043768348,
|
|
"grad_norm": 0.21808347777891596,
|
|
"learning_rate": 4.714005962538543e-05,
|
|
"loss": 0.6037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5948023200035095,
|
|
"step": 4940,
|
|
"valid_targets_mean": 16424.6,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 1.3197224446223645,
|
|
"grad_norm": 0.20745975595609303,
|
|
"learning_rate": 4.6977946672630004e-05,
|
|
"loss": 0.6063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5832745432853699,
|
|
"step": 4945,
|
|
"valid_targets_mean": 14841.7,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 1.3210568454763811,
|
|
"grad_norm": 0.2122108754462692,
|
|
"learning_rate": 4.681598574115622e-05,
|
|
"loss": 0.6076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6011431813240051,
|
|
"step": 4950,
|
|
"valid_targets_mean": 14942.2,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 1.3223912463303977,
|
|
"grad_norm": 0.21177215134869198,
|
|
"learning_rate": 4.665417770961166e-05,
|
|
"loss": 0.5986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5770174264907837,
|
|
"step": 4955,
|
|
"valid_targets_mean": 16438.4,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 1.323725647184414,
|
|
"grad_norm": 0.2055154833404559,
|
|
"learning_rate": 4.6492523455814415e-05,
|
|
"loss": 0.5879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5913348197937012,
|
|
"step": 4960,
|
|
"valid_targets_mean": 15420.0,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 1.3250600480384307,
|
|
"grad_norm": 0.20887288536757667,
|
|
"learning_rate": 4.633102385674821e-05,
|
|
"loss": 0.5941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5890672206878662,
|
|
"step": 4965,
|
|
"valid_targets_mean": 16079.2,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.3263944488924473,
|
|
"grad_norm": 0.2011370090756563,
|
|
"learning_rate": 4.616967978855788e-05,
|
|
"loss": 0.5929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.579448938369751,
|
|
"step": 4970,
|
|
"valid_targets_mean": 14828.5,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 1.3277288497464639,
|
|
"grad_norm": 0.2214769709247002,
|
|
"learning_rate": 4.6008492126544476e-05,
|
|
"loss": 0.5828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6091501116752625,
|
|
"step": 4975,
|
|
"valid_targets_mean": 15823.4,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 1.3290632506004805,
|
|
"grad_norm": 0.21158836520940907,
|
|
"learning_rate": 4.584746174516045e-05,
|
|
"loss": 0.6022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6131697297096252,
|
|
"step": 4980,
|
|
"valid_targets_mean": 15405.0,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 1.330397651454497,
|
|
"grad_norm": 0.24173405329840256,
|
|
"learning_rate": 4.568658951800512e-05,
|
|
"loss": 0.5843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6034678220748901,
|
|
"step": 4985,
|
|
"valid_targets_mean": 16839.0,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.3317320523085134,
|
|
"grad_norm": 0.23491746410533076,
|
|
"learning_rate": 4.552587631781969e-05,
|
|
"loss": 0.6073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6017293930053711,
|
|
"step": 4990,
|
|
"valid_targets_mean": 16098.4,
|
|
"valid_targets_min": 40
|
|
},
|
|
{
|
|
"epoch": 1.33306645316253,
|
|
"grad_norm": 0.253333964366688,
|
|
"learning_rate": 4.536532301648271e-05,
|
|
"loss": 0.5901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6172199249267578,
|
|
"step": 4995,
|
|
"valid_targets_mean": 15348.4,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 1.3344008540165466,
|
|
"grad_norm": 0.19692619276908743,
|
|
"learning_rate": 4.5204930485005306e-05,
|
|
"loss": 0.5923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6032106280326843,
|
|
"step": 5000,
|
|
"valid_targets_mean": 16151.9,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 1.335735254870563,
|
|
"grad_norm": 0.20788187256604537,
|
|
"learning_rate": 4.504469959352627e-05,
|
|
"loss": 0.5955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5894028544425964,
|
|
"step": 5005,
|
|
"valid_targets_mean": 14931.8,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 1.3370696557245796,
|
|
"grad_norm": 0.36348244446202216,
|
|
"learning_rate": 4.488463121130762e-05,
|
|
"loss": 0.5977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.605487585067749,
|
|
"step": 5010,
|
|
"valid_targets_mean": 16289.8,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 1.3384040565785962,
|
|
"grad_norm": 0.23102762914503286,
|
|
"learning_rate": 4.472472620672976e-05,
|
|
"loss": 0.5918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6059518456459045,
|
|
"step": 5015,
|
|
"valid_targets_mean": 16090.9,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 1.3397384574326128,
|
|
"grad_norm": 0.23526289831427483,
|
|
"learning_rate": 4.4564985447286614e-05,
|
|
"loss": 0.6004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6051850318908691,
|
|
"step": 5020,
|
|
"valid_targets_mean": 15910.4,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 1.3410728582866294,
|
|
"grad_norm": 0.2825165428791914,
|
|
"learning_rate": 4.440540979958124e-05,
|
|
"loss": 0.6061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6242756843566895,
|
|
"step": 5025,
|
|
"valid_targets_mean": 15441.1,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 1.342407259140646,
|
|
"grad_norm": 0.2192602545171049,
|
|
"learning_rate": 4.4246000129320867e-05,
|
|
"loss": 0.5899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.585957407951355,
|
|
"step": 5030,
|
|
"valid_targets_mean": 16634.4,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 1.3437416599946623,
|
|
"grad_norm": 0.26457067121397165,
|
|
"learning_rate": 4.408675730131227e-05,
|
|
"loss": 0.6129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6116036176681519,
|
|
"step": 5035,
|
|
"valid_targets_mean": 15448.4,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 1.345076060848679,
|
|
"grad_norm": 0.22036978697881862,
|
|
"learning_rate": 4.3927682179457144e-05,
|
|
"loss": 0.6047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6069991588592529,
|
|
"step": 5040,
|
|
"valid_targets_mean": 15667.9,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 1.3464104617026955,
|
|
"grad_norm": 0.20238586364496838,
|
|
"learning_rate": 4.376877562674737e-05,
|
|
"loss": 0.6056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5942937135696411,
|
|
"step": 5045,
|
|
"valid_targets_mean": 16723.7,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 1.347744862556712,
|
|
"grad_norm": 0.2126814911909632,
|
|
"learning_rate": 4.3610038505260264e-05,
|
|
"loss": 0.6056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6149522066116333,
|
|
"step": 5050,
|
|
"valid_targets_mean": 14714.7,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.3490792634107285,
|
|
"grad_norm": 0.20604689807287094,
|
|
"learning_rate": 4.3451471676154035e-05,
|
|
"loss": 0.5998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6014049053192139,
|
|
"step": 5055,
|
|
"valid_targets_mean": 15980.6,
|
|
"valid_targets_min": 162
|
|
},
|
|
{
|
|
"epoch": 1.350413664264745,
|
|
"grad_norm": 0.20624419122922974,
|
|
"learning_rate": 4.329307599966306e-05,
|
|
"loss": 0.5861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5753385424613953,
|
|
"step": 5060,
|
|
"valid_targets_mean": 16103.3,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 1.3517480651187617,
|
|
"grad_norm": 0.220186367340207,
|
|
"learning_rate": 4.313485233509309e-05,
|
|
"loss": 0.6008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5982659459114075,
|
|
"step": 5065,
|
|
"valid_targets_mean": 16369.3,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 1.3530824659727783,
|
|
"grad_norm": 0.24604574295768564,
|
|
"learning_rate": 4.297680154081686e-05,
|
|
"loss": 0.5961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5799025297164917,
|
|
"step": 5070,
|
|
"valid_targets_mean": 15543.4,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 1.3544168668267949,
|
|
"grad_norm": 0.2233422233797041,
|
|
"learning_rate": 4.2818924474269126e-05,
|
|
"loss": 0.5814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5851439833641052,
|
|
"step": 5075,
|
|
"valid_targets_mean": 16586.4,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.3557512676808114,
|
|
"grad_norm": 0.20406773644050366,
|
|
"learning_rate": 4.266122199194226e-05,
|
|
"loss": 0.5819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5653451681137085,
|
|
"step": 5080,
|
|
"valid_targets_mean": 17330.1,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 1.3570856685348278,
|
|
"grad_norm": 0.22946144750187072,
|
|
"learning_rate": 4.250369494938146e-05,
|
|
"loss": 0.5979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5864439606666565,
|
|
"step": 5085,
|
|
"valid_targets_mean": 15451.2,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 1.3584200693888444,
|
|
"grad_norm": 0.2431015210548431,
|
|
"learning_rate": 4.234634420118021e-05,
|
|
"loss": 0.6068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.623619019985199,
|
|
"step": 5090,
|
|
"valid_targets_mean": 15702.2,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 1.359754470242861,
|
|
"grad_norm": 0.17903038907752314,
|
|
"learning_rate": 4.218917060097547e-05,
|
|
"loss": 0.5945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5696024298667908,
|
|
"step": 5095,
|
|
"valid_targets_mean": 16674.5,
|
|
"valid_targets_min": 100
|
|
},
|
|
{
|
|
"epoch": 1.3610888710968774,
|
|
"grad_norm": 0.1715613252946905,
|
|
"learning_rate": 4.203217500144326e-05,
|
|
"loss": 0.5893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5678576827049255,
|
|
"step": 5100,
|
|
"valid_targets_mean": 16900.0,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 1.362423271950894,
|
|
"grad_norm": 0.18937189520289122,
|
|
"learning_rate": 4.187535825429396e-05,
|
|
"loss": 0.6088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6141047477722168,
|
|
"step": 5105,
|
|
"valid_targets_mean": 15568.6,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 1.3637576728049106,
|
|
"grad_norm": 0.18294726253792917,
|
|
"learning_rate": 4.171872121026753e-05,
|
|
"loss": 0.5972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6077937483787537,
|
|
"step": 5110,
|
|
"valid_targets_mean": 15063.0,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 1.3650920736589272,
|
|
"grad_norm": 0.1987739881577311,
|
|
"learning_rate": 4.1562264719129165e-05,
|
|
"loss": 0.593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5811140537261963,
|
|
"step": 5115,
|
|
"valid_targets_mean": 16650.8,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 1.3664264745129437,
|
|
"grad_norm": 0.2080084244520698,
|
|
"learning_rate": 4.140598962966447e-05,
|
|
"loss": 0.596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5925748348236084,
|
|
"step": 5120,
|
|
"valid_targets_mean": 14376.4,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 1.3677608753669603,
|
|
"grad_norm": 0.31546162795988003,
|
|
"learning_rate": 4.124989678967503e-05,
|
|
"loss": 0.5977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6151330471038818,
|
|
"step": 5125,
|
|
"valid_targets_mean": 16339.4,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 1.3690952762209767,
|
|
"grad_norm": 0.23693111572394246,
|
|
"learning_rate": 4.109398704597357e-05,
|
|
"loss": 0.5867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5888723731040955,
|
|
"step": 5130,
|
|
"valid_targets_mean": 16056.1,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 1.3704296770749933,
|
|
"grad_norm": 0.21582845063895006,
|
|
"learning_rate": 4.093826124437962e-05,
|
|
"loss": 0.589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5797953009605408,
|
|
"step": 5135,
|
|
"valid_targets_mean": 16444.4,
|
|
"valid_targets_min": 119
|
|
},
|
|
{
|
|
"epoch": 1.37176407792901,
|
|
"grad_norm": 0.22102332173033082,
|
|
"learning_rate": 4.078272022971481e-05,
|
|
"loss": 0.6006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5976300239562988,
|
|
"step": 5140,
|
|
"valid_targets_mean": 15232.9,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 1.3730984787830265,
|
|
"grad_norm": 0.21257064967375525,
|
|
"learning_rate": 4.06273648457982e-05,
|
|
"loss": 0.6014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6460756063461304,
|
|
"step": 5145,
|
|
"valid_targets_mean": 14771.5,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 1.3744328796370429,
|
|
"grad_norm": 0.21938234079342284,
|
|
"learning_rate": 4.0472195935441904e-05,
|
|
"loss": 0.586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6117610931396484,
|
|
"step": 5150,
|
|
"valid_targets_mean": 16004.5,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 1.3757672804910595,
|
|
"grad_norm": 0.23306877868076897,
|
|
"learning_rate": 4.031721434044635e-05,
|
|
"loss": 0.5996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6196531057357788,
|
|
"step": 5155,
|
|
"valid_targets_mean": 15975.8,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 1.377101681345076,
|
|
"grad_norm": 0.19937402973955345,
|
|
"learning_rate": 4.016242090159574e-05,
|
|
"loss": 0.6038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5912162065505981,
|
|
"step": 5160,
|
|
"valid_targets_mean": 15855.6,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.3784360821990926,
|
|
"grad_norm": 0.16528255884024484,
|
|
"learning_rate": 4.0007816458653566e-05,
|
|
"loss": 0.6015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5705872178077698,
|
|
"step": 5165,
|
|
"valid_targets_mean": 16436.0,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.3797704830531092,
|
|
"grad_norm": 0.2305999394156692,
|
|
"learning_rate": 3.9853401850358036e-05,
|
|
"loss": 0.596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5944071412086487,
|
|
"step": 5170,
|
|
"valid_targets_mean": 16191.2,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 1.3811048839071258,
|
|
"grad_norm": 0.22219990738356493,
|
|
"learning_rate": 3.969917791441739e-05,
|
|
"loss": 0.5981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5921786427497864,
|
|
"step": 5175,
|
|
"valid_targets_mean": 17374.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.3824392847611422,
|
|
"grad_norm": 0.21281241031463616,
|
|
"learning_rate": 3.954514548750553e-05,
|
|
"loss": 0.5955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6060578227043152,
|
|
"step": 5180,
|
|
"valid_targets_mean": 16728.2,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 1.3837736856151588,
|
|
"grad_norm": 0.21240254524115104,
|
|
"learning_rate": 3.939130540525746e-05,
|
|
"loss": 0.5907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5911335945129395,
|
|
"step": 5185,
|
|
"valid_targets_mean": 16488.3,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 1.3851080864691754,
|
|
"grad_norm": 0.19390611371519534,
|
|
"learning_rate": 3.923765850226456e-05,
|
|
"loss": 0.5861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5753046274185181,
|
|
"step": 5190,
|
|
"valid_targets_mean": 16045.1,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 1.3864424873231918,
|
|
"grad_norm": 0.20217722105575986,
|
|
"learning_rate": 3.908420561207032e-05,
|
|
"loss": 0.5892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6015340089797974,
|
|
"step": 5195,
|
|
"valid_targets_mean": 15448.1,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 1.3877768881772083,
|
|
"grad_norm": 0.237505877780031,
|
|
"learning_rate": 3.893094756716569e-05,
|
|
"loss": 0.6142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6200620532035828,
|
|
"step": 5200,
|
|
"valid_targets_mean": 14185.8,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 1.389111289031225,
|
|
"grad_norm": 0.3198811804284461,
|
|
"learning_rate": 3.87778851989845e-05,
|
|
"loss": 0.6024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5919672250747681,
|
|
"step": 5205,
|
|
"valid_targets_mean": 17366.0,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.3904456898852415,
|
|
"grad_norm": 0.2103159972730984,
|
|
"learning_rate": 3.862501933789908e-05,
|
|
"loss": 0.5973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6027743816375732,
|
|
"step": 5210,
|
|
"valid_targets_mean": 15103.6,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 1.3917800907392581,
|
|
"grad_norm": 0.24002562914948206,
|
|
"learning_rate": 3.847235081321573e-05,
|
|
"loss": 0.5952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5700179934501648,
|
|
"step": 5215,
|
|
"valid_targets_mean": 17431.1,
|
|
"valid_targets_min": 204
|
|
},
|
|
{
|
|
"epoch": 1.3931144915932747,
|
|
"grad_norm": 0.1611356551754476,
|
|
"learning_rate": 3.831988045317007e-05,
|
|
"loss": 0.5938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5655096769332886,
|
|
"step": 5220,
|
|
"valid_targets_mean": 16851.1,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 1.394448892447291,
|
|
"grad_norm": 0.263787542839795,
|
|
"learning_rate": 3.816760908492282e-05,
|
|
"loss": 0.6054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.590351939201355,
|
|
"step": 5225,
|
|
"valid_targets_mean": 16870.1,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 1.3957832933013077,
|
|
"grad_norm": 0.22225719444101186,
|
|
"learning_rate": 3.8015537534555e-05,
|
|
"loss": 0.5886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6013718247413635,
|
|
"step": 5230,
|
|
"valid_targets_mean": 15676.9,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 1.3971176941553243,
|
|
"grad_norm": 0.18685737512531117,
|
|
"learning_rate": 3.786366662706372e-05,
|
|
"loss": 0.598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5739297866821289,
|
|
"step": 5235,
|
|
"valid_targets_mean": 15324.5,
|
|
"valid_targets_min": 84
|
|
},
|
|
{
|
|
"epoch": 1.3984520950093409,
|
|
"grad_norm": 0.16037104282602255,
|
|
"learning_rate": 3.771199718635758e-05,
|
|
"loss": 0.5837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5539162755012512,
|
|
"step": 5240,
|
|
"valid_targets_mean": 18008.3,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 1.3997864958633572,
|
|
"grad_norm": 0.29003098065924876,
|
|
"learning_rate": 3.756053003525213e-05,
|
|
"loss": 0.6062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6276518106460571,
|
|
"step": 5245,
|
|
"valid_targets_mean": 15736.6,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 1.4011208967173738,
|
|
"grad_norm": 0.20843741485202597,
|
|
"learning_rate": 3.7409265995465577e-05,
|
|
"loss": 0.6059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6018944978713989,
|
|
"step": 5250,
|
|
"valid_targets_mean": 15948.7,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 1.4024552975713904,
|
|
"grad_norm": 0.18513610440099582,
|
|
"learning_rate": 3.725820588761422e-05,
|
|
"loss": 0.6073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6068531274795532,
|
|
"step": 5255,
|
|
"valid_targets_mean": 16082.1,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 1.403789698425407,
|
|
"grad_norm": 0.20095844719914072,
|
|
"learning_rate": 3.7107350531207944e-05,
|
|
"loss": 0.6076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6160079836845398,
|
|
"step": 5260,
|
|
"valid_targets_mean": 15276.1,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 1.4051240992794236,
|
|
"grad_norm": 0.20259429006491972,
|
|
"learning_rate": 3.6956700744645934e-05,
|
|
"loss": 0.5942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.597230851650238,
|
|
"step": 5265,
|
|
"valid_targets_mean": 16055.2,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 1.4064585001334402,
|
|
"grad_norm": 0.1892258550707547,
|
|
"learning_rate": 3.6806257345212136e-05,
|
|
"loss": 0.5898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5700638294219971,
|
|
"step": 5270,
|
|
"valid_targets_mean": 17513.9,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.4077929009874566,
|
|
"grad_norm": 0.23737922547821871,
|
|
"learning_rate": 3.665602114907075e-05,
|
|
"loss": 0.6009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5867282152175903,
|
|
"step": 5275,
|
|
"valid_targets_mean": 16111.4,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 1.4091273018414732,
|
|
"grad_norm": 0.17824476807657583,
|
|
"learning_rate": 3.650599297126198e-05,
|
|
"loss": 0.6023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6063085794448853,
|
|
"step": 5280,
|
|
"valid_targets_mean": 16692.3,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 1.4104617026954898,
|
|
"grad_norm": 0.32076707277791566,
|
|
"learning_rate": 3.63561736256975e-05,
|
|
"loss": 0.6002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5818400979042053,
|
|
"step": 5285,
|
|
"valid_targets_mean": 16016.4,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 1.4117961035495064,
|
|
"grad_norm": 0.2886623838106024,
|
|
"learning_rate": 3.6206563925156e-05,
|
|
"loss": 0.601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5962020754814148,
|
|
"step": 5290,
|
|
"valid_targets_mean": 16352.5,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 1.4131305044035227,
|
|
"grad_norm": 0.22448882632941783,
|
|
"learning_rate": 3.605716468127889e-05,
|
|
"loss": 0.5908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5873814225196838,
|
|
"step": 5295,
|
|
"valid_targets_mean": 16176.9,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 1.4144649052575393,
|
|
"grad_norm": 0.1974506656033717,
|
|
"learning_rate": 3.590797670456586e-05,
|
|
"loss": 0.5883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5660135746002197,
|
|
"step": 5300,
|
|
"valid_targets_mean": 16553.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 1.415799306111556,
|
|
"grad_norm": 0.19723366786559937,
|
|
"learning_rate": 3.575900080437036e-05,
|
|
"loss": 0.6039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5775608420372009,
|
|
"step": 5305,
|
|
"valid_targets_mean": 17046.9,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 1.4171337069655725,
|
|
"grad_norm": 0.16682747011523738,
|
|
"learning_rate": 3.561023778889545e-05,
|
|
"loss": 0.5867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5756033658981323,
|
|
"step": 5310,
|
|
"valid_targets_mean": 16093.6,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 1.418468107819589,
|
|
"grad_norm": 0.18365559629082295,
|
|
"learning_rate": 3.546168846518915e-05,
|
|
"loss": 0.5905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5974948406219482,
|
|
"step": 5315,
|
|
"valid_targets_mean": 16326.9,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 1.4198025086736055,
|
|
"grad_norm": 0.19089087086415163,
|
|
"learning_rate": 3.531335363914027e-05,
|
|
"loss": 0.5893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6205319166183472,
|
|
"step": 5320,
|
|
"valid_targets_mean": 15791.9,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 1.421136909527622,
|
|
"grad_norm": 0.19662505005309017,
|
|
"learning_rate": 3.516523411547397e-05,
|
|
"loss": 0.6034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5730395317077637,
|
|
"step": 5325,
|
|
"valid_targets_mean": 15740.1,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 1.4224713103816387,
|
|
"grad_norm": 0.18149916207996997,
|
|
"learning_rate": 3.5017330697747276e-05,
|
|
"loss": 0.5959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6132838726043701,
|
|
"step": 5330,
|
|
"valid_targets_mean": 15944.4,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 1.4238057112356552,
|
|
"grad_norm": 0.20843189835772197,
|
|
"learning_rate": 3.486964418834495e-05,
|
|
"loss": 0.5995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.589570939540863,
|
|
"step": 5335,
|
|
"valid_targets_mean": 14380.5,
|
|
"valid_targets_min": 32
|
|
},
|
|
{
|
|
"epoch": 1.4251401120896716,
|
|
"grad_norm": 0.16785233882242875,
|
|
"learning_rate": 3.472217538847496e-05,
|
|
"loss": 0.5976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6078864336013794,
|
|
"step": 5340,
|
|
"valid_targets_mean": 16013.7,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 1.4264745129436882,
|
|
"grad_norm": 0.1908103124634614,
|
|
"learning_rate": 3.457492509816416e-05,
|
|
"loss": 0.5888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5652427673339844,
|
|
"step": 5345,
|
|
"valid_targets_mean": 15611.4,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 1.4278089137977048,
|
|
"grad_norm": 0.21767883656025544,
|
|
"learning_rate": 3.442789411625402e-05,
|
|
"loss": 0.6041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5779842734336853,
|
|
"step": 5350,
|
|
"valid_targets_mean": 15840.2,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 1.4291433146517214,
|
|
"grad_norm": 0.22827072357848074,
|
|
"learning_rate": 3.428108324039626e-05,
|
|
"loss": 0.6022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5914899706840515,
|
|
"step": 5355,
|
|
"valid_targets_mean": 17542.0,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 1.430477715505738,
|
|
"grad_norm": 0.21643481421261868,
|
|
"learning_rate": 3.413449326704843e-05,
|
|
"loss": 0.5765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5556655526161194,
|
|
"step": 5360,
|
|
"valid_targets_mean": 16455.6,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 1.4318121163597546,
|
|
"grad_norm": 0.19922598541740116,
|
|
"learning_rate": 3.3988124991469764e-05,
|
|
"loss": 0.6153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6300039291381836,
|
|
"step": 5365,
|
|
"valid_targets_mean": 15127.3,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 1.433146517213771,
|
|
"grad_norm": 0.1635470558764958,
|
|
"learning_rate": 3.384197920771676e-05,
|
|
"loss": 0.5835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5532587170600891,
|
|
"step": 5370,
|
|
"valid_targets_mean": 18515.0,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 1.4344809180677875,
|
|
"grad_norm": 0.23513178207713545,
|
|
"learning_rate": 3.36960567086388e-05,
|
|
"loss": 0.6115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6326612830162048,
|
|
"step": 5375,
|
|
"valid_targets_mean": 15665.2,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 1.4358153189218041,
|
|
"grad_norm": 0.23912023489322257,
|
|
"learning_rate": 3.355035828587403e-05,
|
|
"loss": 0.5911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6104973554611206,
|
|
"step": 5380,
|
|
"valid_targets_mean": 15362.2,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 1.4371497197758207,
|
|
"grad_norm": 0.20331336753143553,
|
|
"learning_rate": 3.340488472984493e-05,
|
|
"loss": 0.5891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6072074770927429,
|
|
"step": 5385,
|
|
"valid_targets_mean": 15468.0,
|
|
"valid_targets_min": 171
|
|
},
|
|
{
|
|
"epoch": 1.438484120629837,
|
|
"grad_norm": 0.22083205722667518,
|
|
"learning_rate": 3.3259636829754086e-05,
|
|
"loss": 0.5966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.593048095703125,
|
|
"step": 5390,
|
|
"valid_targets_mean": 15959.7,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.4398185214838537,
|
|
"grad_norm": 0.19609468469659475,
|
|
"learning_rate": 3.3114615373579827e-05,
|
|
"loss": 0.5841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.602197527885437,
|
|
"step": 5395,
|
|
"valid_targets_mean": 16073.3,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 1.4411529223378703,
|
|
"grad_norm": 0.25854344244132677,
|
|
"learning_rate": 3.296982114807207e-05,
|
|
"loss": 0.6039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6135181188583374,
|
|
"step": 5400,
|
|
"valid_targets_mean": 15603.4,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 1.4424873231918869,
|
|
"grad_norm": 0.19332611302622635,
|
|
"learning_rate": 3.282525493874798e-05,
|
|
"loss": 0.5931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5894027352333069,
|
|
"step": 5405,
|
|
"valid_targets_mean": 15872.3,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 1.4438217240459035,
|
|
"grad_norm": 0.23148888062136433,
|
|
"learning_rate": 3.2680917529887746e-05,
|
|
"loss": 0.6009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5799127817153931,
|
|
"step": 5410,
|
|
"valid_targets_mean": 17129.0,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 1.44515612489992,
|
|
"grad_norm": 0.1671400646116706,
|
|
"learning_rate": 3.2536809704530206e-05,
|
|
"loss": 0.5807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5970917344093323,
|
|
"step": 5415,
|
|
"valid_targets_mean": 16345.1,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 1.4464905257539364,
|
|
"grad_norm": 0.18397653462430913,
|
|
"learning_rate": 3.239293224446879e-05,
|
|
"loss": 0.5964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5879602432250977,
|
|
"step": 5420,
|
|
"valid_targets_mean": 16521.5,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 1.447824926607953,
|
|
"grad_norm": 0.20694154995459813,
|
|
"learning_rate": 3.224928593024719e-05,
|
|
"loss": 0.5842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.569869339466095,
|
|
"step": 5425,
|
|
"valid_targets_mean": 16836.6,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.4491593274619696,
|
|
"grad_norm": 0.20357671075946887,
|
|
"learning_rate": 3.210587154115501e-05,
|
|
"loss": 0.6061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5855426788330078,
|
|
"step": 5430,
|
|
"valid_targets_mean": 16225.1,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 1.450493728315986,
|
|
"grad_norm": 0.18250478657118246,
|
|
"learning_rate": 3.196268985522376e-05,
|
|
"loss": 0.5967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.578062891960144,
|
|
"step": 5435,
|
|
"valid_targets_mean": 15699.8,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.4518281291700026,
|
|
"grad_norm": 0.19470223738557854,
|
|
"learning_rate": 3.1819741649222485e-05,
|
|
"loss": 0.5873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5918402671813965,
|
|
"step": 5440,
|
|
"valid_targets_mean": 15734.7,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.4531625300240192,
|
|
"grad_norm": 0.18052433407931184,
|
|
"learning_rate": 3.167702769865354e-05,
|
|
"loss": 0.6164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6201403141021729,
|
|
"step": 5445,
|
|
"valid_targets_mean": 16596.1,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 1.4544969308780358,
|
|
"grad_norm": 0.18538396940027405,
|
|
"learning_rate": 3.153454877774849e-05,
|
|
"loss": 0.5957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5928632020950317,
|
|
"step": 5450,
|
|
"valid_targets_mean": 16370.0,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 1.4558313317320524,
|
|
"grad_norm": 0.19658172576763985,
|
|
"learning_rate": 3.139230565946387e-05,
|
|
"loss": 0.5899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5843933820724487,
|
|
"step": 5455,
|
|
"valid_targets_mean": 15637.9,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 1.457165732586069,
|
|
"grad_norm": 0.1831526268026529,
|
|
"learning_rate": 3.1250299115476874e-05,
|
|
"loss": 0.6072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5988274812698364,
|
|
"step": 5460,
|
|
"valid_targets_mean": 15774.7,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 1.4585001334400853,
|
|
"grad_norm": 0.19210346886635393,
|
|
"learning_rate": 3.110852991618135e-05,
|
|
"loss": 0.5976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5973889231681824,
|
|
"step": 5465,
|
|
"valid_targets_mean": 15370.5,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 1.459834534294102,
|
|
"grad_norm": 0.20169036516535757,
|
|
"learning_rate": 3.0966998830683536e-05,
|
|
"loss": 0.5945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6419908404350281,
|
|
"step": 5470,
|
|
"valid_targets_mean": 15717.3,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 1.4611689351481185,
|
|
"grad_norm": 0.20011851692619234,
|
|
"learning_rate": 3.082570662679782e-05,
|
|
"loss": 0.591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6058547496795654,
|
|
"step": 5475,
|
|
"valid_targets_mean": 15943.1,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 1.4625033360021351,
|
|
"grad_norm": 0.19040363954948747,
|
|
"learning_rate": 3.068465407104275e-05,
|
|
"loss": 0.5945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5980136394500732,
|
|
"step": 5480,
|
|
"valid_targets_mean": 16190.1,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 1.4638377368561515,
|
|
"grad_norm": 0.18646996658321627,
|
|
"learning_rate": 3.054384192863664e-05,
|
|
"loss": 0.5788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5953836441040039,
|
|
"step": 5485,
|
|
"valid_targets_mean": 15646.8,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 1.465172137710168,
|
|
"grad_norm": 0.207569334773845,
|
|
"learning_rate": 3.0403270963493657e-05,
|
|
"loss": 0.5879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6002626419067383,
|
|
"step": 5490,
|
|
"valid_targets_mean": 15257.1,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 1.4665065385641847,
|
|
"grad_norm": 0.22537560589636887,
|
|
"learning_rate": 3.026294193821954e-05,
|
|
"loss": 0.5984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6118378639221191,
|
|
"step": 5495,
|
|
"valid_targets_mean": 14416.0,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 1.4678409394182013,
|
|
"grad_norm": 0.16379355795931236,
|
|
"learning_rate": 3.012285561410742e-05,
|
|
"loss": 0.5935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6207247376441956,
|
|
"step": 5500,
|
|
"valid_targets_mean": 16059.9,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 1.4691753402722179,
|
|
"grad_norm": 0.2080506615201531,
|
|
"learning_rate": 2.9983012751133852e-05,
|
|
"loss": 0.5937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5791140794754028,
|
|
"step": 5505,
|
|
"valid_targets_mean": 16530.1,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.4705097411262344,
|
|
"grad_norm": 0.19122494750135094,
|
|
"learning_rate": 2.9843414107954588e-05,
|
|
"loss": 0.586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5696684718132019,
|
|
"step": 5510,
|
|
"valid_targets_mean": 17553.1,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 1.4718441419802508,
|
|
"grad_norm": 0.19492474448887173,
|
|
"learning_rate": 2.9704060441900402e-05,
|
|
"loss": 0.6058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6235105395317078,
|
|
"step": 5515,
|
|
"valid_targets_mean": 15722.7,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 1.4731785428342674,
|
|
"grad_norm": 0.19044348278999745,
|
|
"learning_rate": 2.956495250897311e-05,
|
|
"loss": 0.5923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5901477336883545,
|
|
"step": 5520,
|
|
"valid_targets_mean": 15814.6,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.474512943688284,
|
|
"grad_norm": 0.2750435525308448,
|
|
"learning_rate": 2.9426091063841444e-05,
|
|
"loss": 0.5716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5935043096542358,
|
|
"step": 5525,
|
|
"valid_targets_mean": 16115.7,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 1.4758473445423004,
|
|
"grad_norm": 0.23200701546196978,
|
|
"learning_rate": 2.9287476859836817e-05,
|
|
"loss": 0.5956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.594383955001831,
|
|
"step": 5530,
|
|
"valid_targets_mean": 15149.1,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 1.477181745396317,
|
|
"grad_norm": 0.27083238636421597,
|
|
"learning_rate": 2.9149110648949447e-05,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6178330183029175,
|
|
"step": 5535,
|
|
"valid_targets_mean": 15509.2,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 1.4785161462503336,
|
|
"grad_norm": 0.21162805713737548,
|
|
"learning_rate": 2.9010993181824158e-05,
|
|
"loss": 0.5969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.60970139503479,
|
|
"step": 5540,
|
|
"valid_targets_mean": 17768.8,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 1.4798505471043502,
|
|
"grad_norm": 0.21777734779683613,
|
|
"learning_rate": 2.8873125207756255e-05,
|
|
"loss": 0.5957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5937681198120117,
|
|
"step": 5545,
|
|
"valid_targets_mean": 15974.1,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 1.4811849479583667,
|
|
"grad_norm": 0.2114795871318019,
|
|
"learning_rate": 2.8735507474687603e-05,
|
|
"loss": 0.5978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5643002986907959,
|
|
"step": 5550,
|
|
"valid_targets_mean": 16197.7,
|
|
"valid_targets_min": 159
|
|
},
|
|
{
|
|
"epoch": 1.4825193488123833,
|
|
"grad_norm": 0.1561900761777701,
|
|
"learning_rate": 2.859814072920249e-05,
|
|
"loss": 0.5844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5664620995521545,
|
|
"step": 5555,
|
|
"valid_targets_mean": 16723.9,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 1.4838537496663997,
|
|
"grad_norm": 0.21967858614223346,
|
|
"learning_rate": 2.846102571652352e-05,
|
|
"loss": 0.5988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5975771546363831,
|
|
"step": 5560,
|
|
"valid_targets_mean": 15892.9,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 1.4851881505204163,
|
|
"grad_norm": 0.1709140812610374,
|
|
"learning_rate": 2.8324163180507716e-05,
|
|
"loss": 0.5953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5756304860115051,
|
|
"step": 5565,
|
|
"valid_targets_mean": 16787.1,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 1.486522551374433,
|
|
"grad_norm": 0.24002195980945173,
|
|
"learning_rate": 2.8187553863642314e-05,
|
|
"loss": 0.5881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5959300994873047,
|
|
"step": 5570,
|
|
"valid_targets_mean": 16814.7,
|
|
"valid_targets_min": 66
|
|
},
|
|
{
|
|
"epoch": 1.4878569522284495,
|
|
"grad_norm": 0.1824666847678956,
|
|
"learning_rate": 2.8051198507040876e-05,
|
|
"loss": 0.6051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5945764780044556,
|
|
"step": 5575,
|
|
"valid_targets_mean": 16067.2,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 1.4891913530824659,
|
|
"grad_norm": 0.2584334177787084,
|
|
"learning_rate": 2.7915097850439238e-05,
|
|
"loss": 0.5943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6012793779373169,
|
|
"step": 5580,
|
|
"valid_targets_mean": 17179.9,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 1.4905257539364825,
|
|
"grad_norm": 0.18624677181070054,
|
|
"learning_rate": 2.7779252632191394e-05,
|
|
"loss": 0.5906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.597710371017456,
|
|
"step": 5585,
|
|
"valid_targets_mean": 16111.1,
|
|
"valid_targets_min": 132
|
|
},
|
|
{
|
|
"epoch": 1.491860154790499,
|
|
"grad_norm": 0.16420787753171032,
|
|
"learning_rate": 2.7643663589265642e-05,
|
|
"loss": 0.5987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6165971755981445,
|
|
"step": 5590,
|
|
"valid_targets_mean": 15712.7,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 1.4931945556445156,
|
|
"grad_norm": 0.20323769592892854,
|
|
"learning_rate": 2.750833145724049e-05,
|
|
"loss": 0.605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5955610275268555,
|
|
"step": 5595,
|
|
"valid_targets_mean": 16009.6,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 1.4945289564985322,
|
|
"grad_norm": 0.202867279463156,
|
|
"learning_rate": 2.7373256970300663e-05,
|
|
"loss": 0.5993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6011368036270142,
|
|
"step": 5600,
|
|
"valid_targets_mean": 15059.6,
|
|
"valid_targets_min": 143
|
|
},
|
|
{
|
|
"epoch": 1.4958633573525488,
|
|
"grad_norm": 0.20246773736830392,
|
|
"learning_rate": 2.7238440861233176e-05,
|
|
"loss": 0.6093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6146905422210693,
|
|
"step": 5605,
|
|
"valid_targets_mean": 15709.0,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 1.4971977582065652,
|
|
"grad_norm": 0.21797562189566266,
|
|
"learning_rate": 2.710388386142335e-05,
|
|
"loss": 0.603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6222171783447266,
|
|
"step": 5610,
|
|
"valid_targets_mean": 16111.1,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 1.4985321590605818,
|
|
"grad_norm": 0.21535300035765537,
|
|
"learning_rate": 2.6969586700850753e-05,
|
|
"loss": 0.6003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6006828546524048,
|
|
"step": 5615,
|
|
"valid_targets_mean": 16464.0,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 1.4998665599145984,
|
|
"grad_norm": 0.1739006092855029,
|
|
"learning_rate": 2.6835550108085373e-05,
|
|
"loss": 0.5914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5925532579421997,
|
|
"step": 5620,
|
|
"valid_targets_mean": 15993.4,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 1.5012009607686148,
|
|
"grad_norm": 0.17931968669047224,
|
|
"learning_rate": 2.67017748102836e-05,
|
|
"loss": 0.5887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.575809895992279,
|
|
"step": 5625,
|
|
"valid_targets_mean": 16380.2,
|
|
"valid_targets_min": 120
|
|
},
|
|
{
|
|
"epoch": 1.5025353616226313,
|
|
"grad_norm": 0.23543154742608263,
|
|
"learning_rate": 2.6568261533184233e-05,
|
|
"loss": 0.6003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5761880874633789,
|
|
"step": 5630,
|
|
"valid_targets_mean": 16202.9,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 1.503869762476648,
|
|
"grad_norm": 0.2439908369834107,
|
|
"learning_rate": 2.643501100110463e-05,
|
|
"loss": 0.5896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5689970850944519,
|
|
"step": 5635,
|
|
"valid_targets_mean": 15896.8,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 1.5052041633306645,
|
|
"grad_norm": 0.17114966912354662,
|
|
"learning_rate": 2.6302023936936776e-05,
|
|
"loss": 0.597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5948618650436401,
|
|
"step": 5640,
|
|
"valid_targets_mean": 15256.4,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 1.5065385641846811,
|
|
"grad_norm": 0.19044671484603795,
|
|
"learning_rate": 2.616930106214323e-05,
|
|
"loss": 0.5937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.595179557800293,
|
|
"step": 5645,
|
|
"valid_targets_mean": 14453.9,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 1.5078729650386977,
|
|
"grad_norm": 0.19342269370897394,
|
|
"learning_rate": 2.6036843096753394e-05,
|
|
"loss": 0.5876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5870005488395691,
|
|
"step": 5650,
|
|
"valid_targets_mean": 16089.0,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.5092073658927143,
|
|
"grad_norm": 0.17657912515294857,
|
|
"learning_rate": 2.5904650759359528e-05,
|
|
"loss": 0.602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5778599977493286,
|
|
"step": 5655,
|
|
"valid_targets_mean": 15060.4,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 1.5105417667467307,
|
|
"grad_norm": 0.20241815062961938,
|
|
"learning_rate": 2.5772724767112753e-05,
|
|
"loss": 0.5901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5876519083976746,
|
|
"step": 5660,
|
|
"valid_targets_mean": 14550.5,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 1.5118761676007473,
|
|
"grad_norm": 0.22708816876904175,
|
|
"learning_rate": 2.564106583571933e-05,
|
|
"loss": 0.6093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6123473048210144,
|
|
"step": 5665,
|
|
"valid_targets_mean": 14711.3,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 1.5132105684547636,
|
|
"grad_norm": 0.2055084291990726,
|
|
"learning_rate": 2.550967467943668e-05,
|
|
"loss": 0.5901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5626510381698608,
|
|
"step": 5670,
|
|
"valid_targets_mean": 16377.3,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 1.5145449693087802,
|
|
"grad_norm": 0.19756636706354835,
|
|
"learning_rate": 2.537855201106955e-05,
|
|
"loss": 0.5955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5962499380111694,
|
|
"step": 5675,
|
|
"valid_targets_mean": 17226.2,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.5158793701627968,
|
|
"grad_norm": 0.17030439598730115,
|
|
"learning_rate": 2.5247698541966066e-05,
|
|
"loss": 0.6044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.605013370513916,
|
|
"step": 5680,
|
|
"valid_targets_mean": 15812.6,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.5172137710168134,
|
|
"grad_norm": 0.1661861361650594,
|
|
"learning_rate": 2.511711498201397e-05,
|
|
"loss": 0.5948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6049402356147766,
|
|
"step": 5685,
|
|
"valid_targets_mean": 16516.9,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 1.51854817187083,
|
|
"grad_norm": 0.2125530503596033,
|
|
"learning_rate": 2.4986802039636773e-05,
|
|
"loss": 0.5948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5406869649887085,
|
|
"step": 5690,
|
|
"valid_targets_mean": 16043.2,
|
|
"valid_targets_min": 104
|
|
},
|
|
{
|
|
"epoch": 1.5198825727248466,
|
|
"grad_norm": 0.16553430129331403,
|
|
"learning_rate": 2.485676042178976e-05,
|
|
"loss": 0.5963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6006138920783997,
|
|
"step": 5695,
|
|
"valid_targets_mean": 15876.9,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 1.5212169735788632,
|
|
"grad_norm": 0.17905158814387356,
|
|
"learning_rate": 2.4726990833956363e-05,
|
|
"loss": 0.6006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5899641513824463,
|
|
"step": 5700,
|
|
"valid_targets_mean": 14660.8,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 1.5225513744328798,
|
|
"grad_norm": 0.17438147820585884,
|
|
"learning_rate": 2.45974939801442e-05,
|
|
"loss": 0.5886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5681999325752258,
|
|
"step": 5705,
|
|
"valid_targets_mean": 16089.6,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 1.5238857752868962,
|
|
"grad_norm": 0.16255470219703333,
|
|
"learning_rate": 2.446827056288131e-05,
|
|
"loss": 0.5832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6050971746444702,
|
|
"step": 5710,
|
|
"valid_targets_mean": 16768.1,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 1.5252201761409128,
|
|
"grad_norm": 0.18263292032857117,
|
|
"learning_rate": 2.4339321283212276e-05,
|
|
"loss": 0.5965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6079804301261902,
|
|
"step": 5715,
|
|
"valid_targets_mean": 15433.3,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 1.5265545769949291,
|
|
"grad_norm": 0.1532852778846096,
|
|
"learning_rate": 2.421064684069453e-05,
|
|
"loss": 0.6029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5876133441925049,
|
|
"step": 5720,
|
|
"valid_targets_mean": 16846.8,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 1.5278889778489457,
|
|
"grad_norm": 0.18064571046835637,
|
|
"learning_rate": 2.4082247933394414e-05,
|
|
"loss": 0.5996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6196247935295105,
|
|
"step": 5725,
|
|
"valid_targets_mean": 15432.3,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 1.5292233787029623,
|
|
"grad_norm": 0.16939114797997423,
|
|
"learning_rate": 2.3954125257883558e-05,
|
|
"loss": 0.5953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5634824633598328,
|
|
"step": 5730,
|
|
"valid_targets_mean": 16351.3,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 1.530557779556979,
|
|
"grad_norm": 0.1717461062559749,
|
|
"learning_rate": 2.382627950923501e-05,
|
|
"loss": 0.5951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6182409524917603,
|
|
"step": 5735,
|
|
"valid_targets_mean": 15957.7,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 1.5318921804109955,
|
|
"grad_norm": 0.1651763821054294,
|
|
"learning_rate": 2.3698711381019398e-05,
|
|
"loss": 0.5958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6126998662948608,
|
|
"step": 5740,
|
|
"valid_targets_mean": 15308.0,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 1.533226581265012,
|
|
"grad_norm": 0.18787738214160157,
|
|
"learning_rate": 2.3571421565301315e-05,
|
|
"loss": 0.5852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6002779006958008,
|
|
"step": 5745,
|
|
"valid_targets_mean": 16487.5,
|
|
"valid_targets_min": 141
|
|
},
|
|
{
|
|
"epoch": 1.5345609821190287,
|
|
"grad_norm": 0.15003464307738298,
|
|
"learning_rate": 2.3444410752635512e-05,
|
|
"loss": 0.6006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5795835852622986,
|
|
"step": 5750,
|
|
"valid_targets_mean": 15138.5,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 1.535895382973045,
|
|
"grad_norm": 0.1585655068584074,
|
|
"learning_rate": 2.331767963206302e-05,
|
|
"loss": 0.5877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5767025947570801,
|
|
"step": 5755,
|
|
"valid_targets_mean": 15502.4,
|
|
"valid_targets_min": 35
|
|
},
|
|
{
|
|
"epoch": 1.5372297838270617,
|
|
"grad_norm": 0.15371045283803136,
|
|
"learning_rate": 2.319122889110763e-05,
|
|
"loss": 0.592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5695284605026245,
|
|
"step": 5760,
|
|
"valid_targets_mean": 16759.5,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 1.5385641846810783,
|
|
"grad_norm": 0.17458428014420008,
|
|
"learning_rate": 2.3065059215772057e-05,
|
|
"loss": 0.5867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6019597053527832,
|
|
"step": 5765,
|
|
"valid_targets_mean": 16116.3,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 1.5398985855350946,
|
|
"grad_norm": 0.23360031742660345,
|
|
"learning_rate": 2.2939171290534127e-05,
|
|
"loss": 0.585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5718658566474915,
|
|
"step": 5770,
|
|
"valid_targets_mean": 15808.1,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 1.5412329863891112,
|
|
"grad_norm": 0.2098313727724575,
|
|
"learning_rate": 2.281356579834324e-05,
|
|
"loss": 0.5943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6123155355453491,
|
|
"step": 5775,
|
|
"valid_targets_mean": 16492.1,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 1.5425673872431278,
|
|
"grad_norm": 0.21325154876376626,
|
|
"learning_rate": 2.2688243420616573e-05,
|
|
"loss": 0.5857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6063023805618286,
|
|
"step": 5780,
|
|
"valid_targets_mean": 14032.6,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.5439017880971444,
|
|
"grad_norm": 0.18350668193667802,
|
|
"learning_rate": 2.2563204837235323e-05,
|
|
"loss": 0.6055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6285330653190613,
|
|
"step": 5785,
|
|
"valid_targets_mean": 16362.7,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.545236188951161,
|
|
"grad_norm": 0.22654982864260179,
|
|
"learning_rate": 2.243845072654115e-05,
|
|
"loss": 0.5776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5971053838729858,
|
|
"step": 5790,
|
|
"valid_targets_mean": 15919.5,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 1.5465705898051776,
|
|
"grad_norm": 0.18730563220805965,
|
|
"learning_rate": 2.2313981765332464e-05,
|
|
"loss": 0.5935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.592902660369873,
|
|
"step": 5795,
|
|
"valid_targets_mean": 15746.0,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.5479049906591942,
|
|
"grad_norm": 0.18051058434162362,
|
|
"learning_rate": 2.2189798628860604e-05,
|
|
"loss": 0.594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5940338969230652,
|
|
"step": 5800,
|
|
"valid_targets_mean": 15031.1,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 1.5492393915132106,
|
|
"grad_norm": 0.167045235586477,
|
|
"learning_rate": 2.206590199082642e-05,
|
|
"loss": 0.5893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5510430335998535,
|
|
"step": 5805,
|
|
"valid_targets_mean": 16077.8,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 1.5505737923672271,
|
|
"grad_norm": 0.1939197187907762,
|
|
"learning_rate": 2.194229252337639e-05,
|
|
"loss": 0.5924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5958045125007629,
|
|
"step": 5810,
|
|
"valid_targets_mean": 15662.2,
|
|
"valid_targets_min": 138
|
|
},
|
|
{
|
|
"epoch": 1.5519081932212435,
|
|
"grad_norm": 0.173011575574559,
|
|
"learning_rate": 2.181897089709913e-05,
|
|
"loss": 0.5864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6193079948425293,
|
|
"step": 5815,
|
|
"valid_targets_mean": 16200.1,
|
|
"valid_targets_min": 217
|
|
},
|
|
{
|
|
"epoch": 1.55324259407526,
|
|
"grad_norm": 0.14864675429494573,
|
|
"learning_rate": 2.1695937781021736e-05,
|
|
"loss": 0.5812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5566189289093018,
|
|
"step": 5820,
|
|
"valid_targets_mean": 17615.3,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 1.5545769949292767,
|
|
"grad_norm": 0.17403013700185496,
|
|
"learning_rate": 2.1573193842606007e-05,
|
|
"loss": 0.5903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.575369119644165,
|
|
"step": 5825,
|
|
"valid_targets_mean": 15289.2,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 1.5559113957832933,
|
|
"grad_norm": 0.16472117596213087,
|
|
"learning_rate": 2.1450739747745034e-05,
|
|
"loss": 0.5912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6011247634887695,
|
|
"step": 5830,
|
|
"valid_targets_mean": 15707.9,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 1.5572457966373099,
|
|
"grad_norm": 0.15586589372849496,
|
|
"learning_rate": 2.1328576160759486e-05,
|
|
"loss": 0.5867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5761151313781738,
|
|
"step": 5835,
|
|
"valid_targets_mean": 15750.8,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 1.5585801974913265,
|
|
"grad_norm": 0.18984736960378085,
|
|
"learning_rate": 2.1206703744393936e-05,
|
|
"loss": 0.6189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6555172204971313,
|
|
"step": 5840,
|
|
"valid_targets_mean": 15906.1,
|
|
"valid_targets_min": 331
|
|
},
|
|
{
|
|
"epoch": 1.559914598345343,
|
|
"grad_norm": 0.2024767844688485,
|
|
"learning_rate": 2.1085123159813398e-05,
|
|
"loss": 0.5924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5954935550689697,
|
|
"step": 5845,
|
|
"valid_targets_mean": 15512.7,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 1.5612489991993594,
|
|
"grad_norm": 0.14698357547390506,
|
|
"learning_rate": 2.0963835066599703e-05,
|
|
"loss": 0.5842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5737780332565308,
|
|
"step": 5850,
|
|
"valid_targets_mean": 16473.4,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 1.562583400053376,
|
|
"grad_norm": 0.20815349745584602,
|
|
"learning_rate": 2.084284012274781e-05,
|
|
"loss": 0.6054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6138194799423218,
|
|
"step": 5855,
|
|
"valid_targets_mean": 15586.1,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 1.5639178009073926,
|
|
"grad_norm": 0.1844791880337792,
|
|
"learning_rate": 2.0722138984662415e-05,
|
|
"loss": 0.5818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.563116192817688,
|
|
"step": 5860,
|
|
"valid_targets_mean": 15610.7,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 1.565252201761409,
|
|
"grad_norm": 0.16176616966994398,
|
|
"learning_rate": 2.0601732307154283e-05,
|
|
"loss": 0.5926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6010968685150146,
|
|
"step": 5865,
|
|
"valid_targets_mean": 16884.6,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 1.5665866026154256,
|
|
"grad_norm": 0.1515417804516231,
|
|
"learning_rate": 2.048162074343665e-05,
|
|
"loss": 0.5767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5779093503952026,
|
|
"step": 5870,
|
|
"valid_targets_mean": 15809.6,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 1.5679210034694422,
|
|
"grad_norm": 0.17709603738131607,
|
|
"learning_rate": 2.036180494512181e-05,
|
|
"loss": 0.5886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5752099752426147,
|
|
"step": 5875,
|
|
"valid_targets_mean": 15900.4,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 1.5692554043234588,
|
|
"grad_norm": 0.15363365071776586,
|
|
"learning_rate": 2.024228556221752e-05,
|
|
"loss": 0.6043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5751421451568604,
|
|
"step": 5880,
|
|
"valid_targets_mean": 16790.5,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 1.5705898051774754,
|
|
"grad_norm": 0.14514475181042058,
|
|
"learning_rate": 2.0123063243123395e-05,
|
|
"loss": 0.5994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5840705037117004,
|
|
"step": 5885,
|
|
"valid_targets_mean": 16916.9,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 1.571924206031492,
|
|
"grad_norm": 0.16845004524499335,
|
|
"learning_rate": 2.000413863462754e-05,
|
|
"loss": 0.5866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5833538770675659,
|
|
"step": 5890,
|
|
"valid_targets_mean": 17593.8,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 1.5732586068855086,
|
|
"grad_norm": 0.1780532499338099,
|
|
"learning_rate": 1.988551238190288e-05,
|
|
"loss": 0.5792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5539240837097168,
|
|
"step": 5895,
|
|
"valid_targets_mean": 15630.7,
|
|
"valid_targets_min": 164
|
|
},
|
|
{
|
|
"epoch": 1.574593007739525,
|
|
"grad_norm": 0.16652297951499662,
|
|
"learning_rate": 1.9767185128503817e-05,
|
|
"loss": 0.5951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6258705854415894,
|
|
"step": 5900,
|
|
"valid_targets_mean": 16201.0,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 1.5759274085935415,
|
|
"grad_norm": 0.16054757168609174,
|
|
"learning_rate": 1.9649157516362663e-05,
|
|
"loss": 0.5988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5688210725784302,
|
|
"step": 5905,
|
|
"valid_targets_mean": 17501.1,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 1.577261809447558,
|
|
"grad_norm": 0.17767923166761193,
|
|
"learning_rate": 1.953143018578607e-05,
|
|
"loss": 0.5936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6063442826271057,
|
|
"step": 5910,
|
|
"valid_targets_mean": 15196.0,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 1.5785962103015745,
|
|
"grad_norm": 0.16696992817675763,
|
|
"learning_rate": 1.9414003775451754e-05,
|
|
"loss": 0.5975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6127891540527344,
|
|
"step": 5915,
|
|
"valid_targets_mean": 15455.5,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 1.579930611155591,
|
|
"grad_norm": 0.16544949042824092,
|
|
"learning_rate": 1.9296878922404868e-05,
|
|
"loss": 0.5988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6133477091789246,
|
|
"step": 5920,
|
|
"valid_targets_mean": 15672.0,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.5812650120096077,
|
|
"grad_norm": 0.15281722728969543,
|
|
"learning_rate": 1.9180056262054575e-05,
|
|
"loss": 0.5962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5834280252456665,
|
|
"step": 5925,
|
|
"valid_targets_mean": 16627.5,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 1.5825994128636243,
|
|
"grad_norm": 0.17981677514443467,
|
|
"learning_rate": 1.9063536428170682e-05,
|
|
"loss": 0.5953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6035789251327515,
|
|
"step": 5930,
|
|
"valid_targets_mean": 15810.4,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 1.5839338137176409,
|
|
"grad_norm": 0.16230505592790417,
|
|
"learning_rate": 1.8947320052880106e-05,
|
|
"loss": 0.5955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6399399042129517,
|
|
"step": 5935,
|
|
"valid_targets_mean": 14876.2,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.5852682145716575,
|
|
"grad_norm": 0.18206963510342355,
|
|
"learning_rate": 1.8831407766663513e-05,
|
|
"loss": 0.6018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6215972304344177,
|
|
"step": 5940,
|
|
"valid_targets_mean": 16623.4,
|
|
"valid_targets_min": 209
|
|
},
|
|
{
|
|
"epoch": 1.5866026154256738,
|
|
"grad_norm": 0.15644039490129202,
|
|
"learning_rate": 1.8715800198351824e-05,
|
|
"loss": 0.5972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5751008987426758,
|
|
"step": 5945,
|
|
"valid_targets_mean": 15878.5,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 1.5879370162796904,
|
|
"grad_norm": 0.13079245795628466,
|
|
"learning_rate": 1.8600497975122877e-05,
|
|
"loss": 0.5792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5741679668426514,
|
|
"step": 5950,
|
|
"valid_targets_mean": 15517.2,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.589271417133707,
|
|
"grad_norm": 0.15528732944901663,
|
|
"learning_rate": 1.8485501722498024e-05,
|
|
"loss": 0.5942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5887077450752258,
|
|
"step": 5955,
|
|
"valid_targets_mean": 15592.5,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 1.5906058179877234,
|
|
"grad_norm": 0.1635991793147494,
|
|
"learning_rate": 1.8370812064338624e-05,
|
|
"loss": 0.589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5944458246231079,
|
|
"step": 5960,
|
|
"valid_targets_mean": 17228.2,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 1.59194021884174,
|
|
"grad_norm": 0.17784795023411568,
|
|
"learning_rate": 1.8256429622842818e-05,
|
|
"loss": 0.5868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6193060874938965,
|
|
"step": 5965,
|
|
"valid_targets_mean": 16224.5,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 1.5932746196957566,
|
|
"grad_norm": 0.1636364281337556,
|
|
"learning_rate": 1.814235501854206e-05,
|
|
"loss": 0.5879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5867465734481812,
|
|
"step": 5970,
|
|
"valid_targets_mean": 16228.0,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 1.5946090205497732,
|
|
"grad_norm": 0.17496121191528463,
|
|
"learning_rate": 1.8028588870297774e-05,
|
|
"loss": 0.5983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5999577045440674,
|
|
"step": 5975,
|
|
"valid_targets_mean": 14875.4,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 1.5959434214037898,
|
|
"grad_norm": 0.1635867696428078,
|
|
"learning_rate": 1.7915131795297956e-05,
|
|
"loss": 0.5974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5883691310882568,
|
|
"step": 5980,
|
|
"valid_targets_mean": 17177.1,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 1.5972778222578063,
|
|
"grad_norm": 0.15255090872222463,
|
|
"learning_rate": 1.7801984409053897e-05,
|
|
"loss": 0.5891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5758295655250549,
|
|
"step": 5985,
|
|
"valid_targets_mean": 16917.6,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.598612223111823,
|
|
"grad_norm": 0.18049120683196274,
|
|
"learning_rate": 1.7689147325396822e-05,
|
|
"loss": 0.5892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6227512955665588,
|
|
"step": 5990,
|
|
"valid_targets_mean": 15918.9,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 1.5999466239658393,
|
|
"grad_norm": 0.44262822676640395,
|
|
"learning_rate": 1.757662115647448e-05,
|
|
"loss": 0.6015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6125069856643677,
|
|
"step": 5995,
|
|
"valid_targets_mean": 15745.6,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.601281024819856,
|
|
"grad_norm": 0.16722709656486248,
|
|
"learning_rate": 1.7464406512747964e-05,
|
|
"loss": 0.5953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5720467567443848,
|
|
"step": 6000,
|
|
"valid_targets_mean": 15853.1,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 1.6026154256738723,
|
|
"grad_norm": 0.17427989834524088,
|
|
"learning_rate": 1.7352504002988303e-05,
|
|
"loss": 0.5993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6251067519187927,
|
|
"step": 6005,
|
|
"valid_targets_mean": 14854.0,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 1.6039498265278889,
|
|
"grad_norm": 0.14081820776809867,
|
|
"learning_rate": 1.7240914234273126e-05,
|
|
"loss": 0.6101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.610417366027832,
|
|
"step": 6010,
|
|
"valid_targets_mean": 16326.3,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.6052842273819055,
|
|
"grad_norm": 0.16288453950282422,
|
|
"learning_rate": 1.7129637811983507e-05,
|
|
"loss": 0.5892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5866801738739014,
|
|
"step": 6015,
|
|
"valid_targets_mean": 15117.6,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 1.606618628235922,
|
|
"grad_norm": 0.1646206458231276,
|
|
"learning_rate": 1.7018675339800557e-05,
|
|
"loss": 0.5985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5739710927009583,
|
|
"step": 6020,
|
|
"valid_targets_mean": 16634.2,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 1.6079530290899386,
|
|
"grad_norm": 0.16128266803140606,
|
|
"learning_rate": 1.690802741970217e-05,
|
|
"loss": 0.5916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5576621294021606,
|
|
"step": 6025,
|
|
"valid_targets_mean": 16370.4,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 1.6092874299439552,
|
|
"grad_norm": 0.15110479462081494,
|
|
"learning_rate": 1.6797694651959806e-05,
|
|
"loss": 0.5843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6075755953788757,
|
|
"step": 6030,
|
|
"valid_targets_mean": 16231.2,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 1.6106218307979718,
|
|
"grad_norm": 0.15048779325112796,
|
|
"learning_rate": 1.6687677635135218e-05,
|
|
"loss": 0.6001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6099841594696045,
|
|
"step": 6035,
|
|
"valid_targets_mean": 14744.9,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 1.6119562316519882,
|
|
"grad_norm": 0.14317332216610718,
|
|
"learning_rate": 1.657797696607714e-05,
|
|
"loss": 0.6013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6132588386535645,
|
|
"step": 6040,
|
|
"valid_targets_mean": 15638.2,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.6132906325060048,
|
|
"grad_norm": 0.16829366412089236,
|
|
"learning_rate": 1.6468593239918136e-05,
|
|
"loss": 0.6014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6245346069335938,
|
|
"step": 6045,
|
|
"valid_targets_mean": 15996.1,
|
|
"valid_targets_min": 193
|
|
},
|
|
{
|
|
"epoch": 1.6146250333600214,
|
|
"grad_norm": 0.16505711959570887,
|
|
"learning_rate": 1.635952705007136e-05,
|
|
"loss": 0.5872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5829812288284302,
|
|
"step": 6050,
|
|
"valid_targets_mean": 17497.4,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.6159594342140378,
|
|
"grad_norm": 0.15102892214056948,
|
|
"learning_rate": 1.6250778988227248e-05,
|
|
"loss": 0.606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6144422292709351,
|
|
"step": 6055,
|
|
"valid_targets_mean": 16664.8,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.6172938350680544,
|
|
"grad_norm": 0.13778739201523155,
|
|
"learning_rate": 1.614234964435044e-05,
|
|
"loss": 0.5827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5897339582443237,
|
|
"step": 6060,
|
|
"valid_targets_mean": 17281.9,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 1.618628235922071,
|
|
"grad_norm": 0.1607951680128057,
|
|
"learning_rate": 1.603423960667645e-05,
|
|
"loss": 0.597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6235299706459045,
|
|
"step": 6065,
|
|
"valid_targets_mean": 16151.6,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 1.6199626367760875,
|
|
"grad_norm": 0.13975207089816102,
|
|
"learning_rate": 1.5926449461708577e-05,
|
|
"loss": 0.6014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6146085262298584,
|
|
"step": 6070,
|
|
"valid_targets_mean": 16454.9,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 1.6212970376301041,
|
|
"grad_norm": 0.13776347324920654,
|
|
"learning_rate": 1.581897979421471e-05,
|
|
"loss": 0.5997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6190211176872253,
|
|
"step": 6075,
|
|
"valid_targets_mean": 15920.2,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 1.6226314384841207,
|
|
"grad_norm": 0.14646313951209627,
|
|
"learning_rate": 1.571183118722405e-05,
|
|
"loss": 0.5958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5927650928497314,
|
|
"step": 6080,
|
|
"valid_targets_mean": 15447.5,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 1.6239658393381373,
|
|
"grad_norm": 0.14941326518295334,
|
|
"learning_rate": 1.5605004222024074e-05,
|
|
"loss": 0.6005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6348234415054321,
|
|
"step": 6085,
|
|
"valid_targets_mean": 15996.3,
|
|
"valid_targets_min": 114
|
|
},
|
|
{
|
|
"epoch": 1.6253002401921537,
|
|
"grad_norm": 0.1730533751354215,
|
|
"learning_rate": 1.549849947815737e-05,
|
|
"loss": 0.6004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5779517292976379,
|
|
"step": 6090,
|
|
"valid_targets_mean": 15313.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.6266346410461703,
|
|
"grad_norm": 0.1627881531753968,
|
|
"learning_rate": 1.5392317533418366e-05,
|
|
"loss": 0.6025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5933674573898315,
|
|
"step": 6095,
|
|
"valid_targets_mean": 16250.2,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 1.6279690419001867,
|
|
"grad_norm": 0.15670768313961111,
|
|
"learning_rate": 1.5286458963850363e-05,
|
|
"loss": 0.5856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5807504057884216,
|
|
"step": 6100,
|
|
"valid_targets_mean": 15859.4,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 1.6293034427542032,
|
|
"grad_norm": 0.13926802970619206,
|
|
"learning_rate": 1.5180924343742316e-05,
|
|
"loss": 0.5891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.569827675819397,
|
|
"step": 6105,
|
|
"valid_targets_mean": 16856.4,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 1.6306378436082198,
|
|
"grad_norm": 0.16598312188525904,
|
|
"learning_rate": 1.5075714245625689e-05,
|
|
"loss": 0.5782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6085491180419922,
|
|
"step": 6110,
|
|
"valid_targets_mean": 15201.6,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 1.6319722444622364,
|
|
"grad_norm": 0.1458342645262622,
|
|
"learning_rate": 1.4970829240271448e-05,
|
|
"loss": 0.5857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5849499702453613,
|
|
"step": 6115,
|
|
"valid_targets_mean": 16259.1,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 1.633306645316253,
|
|
"grad_norm": 0.1439106809413625,
|
|
"learning_rate": 1.4866269896686917e-05,
|
|
"loss": 0.623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6008425951004028,
|
|
"step": 6120,
|
|
"valid_targets_mean": 16053.7,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 1.6346410461702696,
|
|
"grad_norm": 0.15056600587009406,
|
|
"learning_rate": 1.4762036782112624e-05,
|
|
"loss": 0.5999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5983825922012329,
|
|
"step": 6125,
|
|
"valid_targets_mean": 16599.0,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 1.6359754470242862,
|
|
"grad_norm": 0.18809406813947385,
|
|
"learning_rate": 1.465813046201934e-05,
|
|
"loss": 0.5883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5702234506607056,
|
|
"step": 6130,
|
|
"valid_targets_mean": 16462.3,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 1.6373098478783028,
|
|
"grad_norm": 0.15691191797848125,
|
|
"learning_rate": 1.4554551500104971e-05,
|
|
"loss": 0.5915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6137266159057617,
|
|
"step": 6135,
|
|
"valid_targets_mean": 16040.9,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 1.6386442487323192,
|
|
"grad_norm": 0.14355841984033843,
|
|
"learning_rate": 1.4451300458291401e-05,
|
|
"loss": 0.5957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6074045300483704,
|
|
"step": 6140,
|
|
"valid_targets_mean": 16056.3,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.6399786495863358,
|
|
"grad_norm": 0.13679949081670345,
|
|
"learning_rate": 1.4348377896721635e-05,
|
|
"loss": 0.588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5748726725578308,
|
|
"step": 6145,
|
|
"valid_targets_mean": 16659.3,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 1.6413130504403521,
|
|
"grad_norm": 0.15897239188462342,
|
|
"learning_rate": 1.4245784373756566e-05,
|
|
"loss": 0.594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6073840856552124,
|
|
"step": 6150,
|
|
"valid_targets_mean": 14906.5,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 1.6426474512943687,
|
|
"grad_norm": 0.16815252513846143,
|
|
"learning_rate": 1.4143520445972078e-05,
|
|
"loss": 0.6001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6236668229103088,
|
|
"step": 6155,
|
|
"valid_targets_mean": 15759.6,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.6439818521483853,
|
|
"grad_norm": 0.143794484563453,
|
|
"learning_rate": 1.4041586668155989e-05,
|
|
"loss": 0.6001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5926347374916077,
|
|
"step": 6160,
|
|
"valid_targets_mean": 16193.2,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.645316253002402,
|
|
"grad_norm": 0.1454519692729659,
|
|
"learning_rate": 1.3939983593304992e-05,
|
|
"loss": 0.5753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5642212629318237,
|
|
"step": 6165,
|
|
"valid_targets_mean": 16872.0,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 1.6466506538564185,
|
|
"grad_norm": 0.1376101516577718,
|
|
"learning_rate": 1.3838711772621743e-05,
|
|
"loss": 0.5935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5688588619232178,
|
|
"step": 6170,
|
|
"valid_targets_mean": 15333.7,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 1.647985054710435,
|
|
"grad_norm": 0.16223202014166033,
|
|
"learning_rate": 1.3737771755511811e-05,
|
|
"loss": 0.5997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5690805912017822,
|
|
"step": 6175,
|
|
"valid_targets_mean": 15612.1,
|
|
"valid_targets_min": 23
|
|
},
|
|
{
|
|
"epoch": 1.6493194555644517,
|
|
"grad_norm": 0.1285767046281265,
|
|
"learning_rate": 1.3637164089580673e-05,
|
|
"loss": 0.5884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5544174313545227,
|
|
"step": 6180,
|
|
"valid_targets_mean": 17636.2,
|
|
"valid_targets_min": 203
|
|
},
|
|
{
|
|
"epoch": 1.650653856418468,
|
|
"grad_norm": 0.148398065035962,
|
|
"learning_rate": 1.3536889320630841e-05,
|
|
"loss": 0.5965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6234698295593262,
|
|
"step": 6185,
|
|
"valid_targets_mean": 16127.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 1.6519882572724847,
|
|
"grad_norm": 0.14107759764387337,
|
|
"learning_rate": 1.3436947992658814e-05,
|
|
"loss": 0.58,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5890226364135742,
|
|
"step": 6190,
|
|
"valid_targets_mean": 16596.9,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.6533226581265013,
|
|
"grad_norm": 0.16180436639057758,
|
|
"learning_rate": 1.3337340647852135e-05,
|
|
"loss": 0.594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6033791899681091,
|
|
"step": 6195,
|
|
"valid_targets_mean": 15863.7,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 1.6546570589805176,
|
|
"grad_norm": 0.1518895405995563,
|
|
"learning_rate": 1.3238067826586491e-05,
|
|
"loss": 0.5991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5961155891418457,
|
|
"step": 6200,
|
|
"valid_targets_mean": 15850.0,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 1.6559914598345342,
|
|
"grad_norm": 0.17032843421286292,
|
|
"learning_rate": 1.3139130067422792e-05,
|
|
"loss": 0.5967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5981688499450684,
|
|
"step": 6205,
|
|
"valid_targets_mean": 16174.9,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 1.6573258606885508,
|
|
"grad_norm": 0.15065060581950682,
|
|
"learning_rate": 1.3040527907104126e-05,
|
|
"loss": 0.6057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5765440464019775,
|
|
"step": 6210,
|
|
"valid_targets_mean": 17820.4,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 1.6586602615425674,
|
|
"grad_norm": 0.1597433167413591,
|
|
"learning_rate": 1.2942261880553012e-05,
|
|
"loss": 0.5899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5972633957862854,
|
|
"step": 6215,
|
|
"valid_targets_mean": 16479.0,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 1.659994662396584,
|
|
"grad_norm": 0.15031409007903818,
|
|
"learning_rate": 1.2844332520868433e-05,
|
|
"loss": 0.5864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5601651668548584,
|
|
"step": 6220,
|
|
"valid_targets_mean": 15959.4,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 1.6613290632506006,
|
|
"grad_norm": 0.14976374680290783,
|
|
"learning_rate": 1.2746740359322857e-05,
|
|
"loss": 0.5995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6134690046310425,
|
|
"step": 6225,
|
|
"valid_targets_mean": 13893.3,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 1.6626634641046172,
|
|
"grad_norm": 0.14880912909382793,
|
|
"learning_rate": 1.2649485925359514e-05,
|
|
"loss": 0.6006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5879743099212646,
|
|
"step": 6230,
|
|
"valid_targets_mean": 15951.7,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 1.6639978649586336,
|
|
"grad_norm": 0.162208271485178,
|
|
"learning_rate": 1.2552569746589386e-05,
|
|
"loss": 0.5825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6024885177612305,
|
|
"step": 6235,
|
|
"valid_targets_mean": 15375.4,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 1.6653322658126501,
|
|
"grad_norm": 0.13271115663807362,
|
|
"learning_rate": 1.245599234878846e-05,
|
|
"loss": 0.5871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5827089548110962,
|
|
"step": 6240,
|
|
"valid_targets_mean": 15835.0,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.12474651138273796,
|
|
"learning_rate": 1.2359754255894737e-05,
|
|
"loss": 0.5959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6084429025650024,
|
|
"step": 6245,
|
|
"valid_targets_mean": 16269.3,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 1.6680010675206831,
|
|
"grad_norm": 0.1445050377364166,
|
|
"learning_rate": 1.2263855990005527e-05,
|
|
"loss": 0.6018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5961320400238037,
|
|
"step": 6250,
|
|
"valid_targets_mean": 16289.6,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 1.6693354683746997,
|
|
"grad_norm": 0.1482473852544307,
|
|
"learning_rate": 1.2168298071374543e-05,
|
|
"loss": 0.5963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5933790802955627,
|
|
"step": 6255,
|
|
"valid_targets_mean": 16351.3,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 1.6706698692287163,
|
|
"grad_norm": 0.13539046411548533,
|
|
"learning_rate": 1.2073081018409112e-05,
|
|
"loss": 0.5883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5713330507278442,
|
|
"step": 6260,
|
|
"valid_targets_mean": 16472.3,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 1.672004270082733,
|
|
"grad_norm": 0.14799011994133907,
|
|
"learning_rate": 1.1978205347667303e-05,
|
|
"loss": 0.5954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6045159101486206,
|
|
"step": 6265,
|
|
"valid_targets_mean": 16547.1,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.6733386709367495,
|
|
"grad_norm": 0.13725772965713526,
|
|
"learning_rate": 1.1883671573855186e-05,
|
|
"loss": 0.5923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5883108377456665,
|
|
"step": 6270,
|
|
"valid_targets_mean": 16727.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.674673071790766,
|
|
"grad_norm": 0.13623216265241142,
|
|
"learning_rate": 1.1789480209824064e-05,
|
|
"loss": 0.5835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5607722997665405,
|
|
"step": 6275,
|
|
"valid_targets_mean": 16476.7,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 1.6760074726447824,
|
|
"grad_norm": 0.13015789908144226,
|
|
"learning_rate": 1.1695631766567562e-05,
|
|
"loss": 0.593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5627079010009766,
|
|
"step": 6280,
|
|
"valid_targets_mean": 17736.7,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.677341873498799,
|
|
"grad_norm": 0.1286182356071317,
|
|
"learning_rate": 1.1602126753219005e-05,
|
|
"loss": 0.5815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5757423043251038,
|
|
"step": 6285,
|
|
"valid_targets_mean": 15710.1,
|
|
"valid_targets_min": 171
|
|
},
|
|
{
|
|
"epoch": 1.6786762743528156,
|
|
"grad_norm": 0.1499565174599278,
|
|
"learning_rate": 1.1508965677048585e-05,
|
|
"loss": 0.5976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5899896025657654,
|
|
"step": 6290,
|
|
"valid_targets_mean": 15419.5,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.680010675206832,
|
|
"grad_norm": 0.16982584818577737,
|
|
"learning_rate": 1.1416149043460562e-05,
|
|
"loss": 0.5845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6001005172729492,
|
|
"step": 6295,
|
|
"valid_targets_mean": 15842.3,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 1.6813450760608486,
|
|
"grad_norm": 0.14353702154030903,
|
|
"learning_rate": 1.132367735599066e-05,
|
|
"loss": 0.6126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6028214693069458,
|
|
"step": 6300,
|
|
"valid_targets_mean": 16275.0,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 1.6826794769148652,
|
|
"grad_norm": 0.1381931576513102,
|
|
"learning_rate": 1.1231551116303162e-05,
|
|
"loss": 0.6014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.599789023399353,
|
|
"step": 6305,
|
|
"valid_targets_mean": 15538.0,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 1.6840138777688818,
|
|
"grad_norm": 0.16697494527921555,
|
|
"learning_rate": 1.1139770824188334e-05,
|
|
"loss": 0.5883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6271265745162964,
|
|
"step": 6310,
|
|
"valid_targets_mean": 16457.3,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.6853482786228984,
|
|
"grad_norm": 0.1377080966130086,
|
|
"learning_rate": 1.1048336977559666e-05,
|
|
"loss": 0.5877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5899460315704346,
|
|
"step": 6315,
|
|
"valid_targets_mean": 16787.1,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 1.686682679476915,
|
|
"grad_norm": 0.16651128816051763,
|
|
"learning_rate": 1.0957250072451084e-05,
|
|
"loss": 0.5978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5856098532676697,
|
|
"step": 6320,
|
|
"valid_targets_mean": 15374.6,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 1.6880170803309316,
|
|
"grad_norm": 0.13609784431985525,
|
|
"learning_rate": 1.0866510603014411e-05,
|
|
"loss": 0.6023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6047021150588989,
|
|
"step": 6325,
|
|
"valid_targets_mean": 16535.2,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 1.689351481184948,
|
|
"grad_norm": 0.14592184066797317,
|
|
"learning_rate": 1.0776119061516613e-05,
|
|
"loss": 0.598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6073904037475586,
|
|
"step": 6330,
|
|
"valid_targets_mean": 16283.0,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 1.6906858820389645,
|
|
"grad_norm": 0.12591510091278948,
|
|
"learning_rate": 1.0686075938337055e-05,
|
|
"loss": 0.5717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5525071620941162,
|
|
"step": 6335,
|
|
"valid_targets_mean": 16847.4,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 1.692020282892981,
|
|
"grad_norm": 0.1484961336986223,
|
|
"learning_rate": 1.0596381721964984e-05,
|
|
"loss": 0.6042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6173697710037231,
|
|
"step": 6340,
|
|
"valid_targets_mean": 14540.3,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 1.6933546837469975,
|
|
"grad_norm": 0.1518980985701294,
|
|
"learning_rate": 1.0507036898996787e-05,
|
|
"loss": 0.5825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5898152589797974,
|
|
"step": 6345,
|
|
"valid_targets_mean": 16248.2,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 1.694689084601014,
|
|
"grad_norm": 0.129878707444232,
|
|
"learning_rate": 1.0418041954133346e-05,
|
|
"loss": 0.5914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5593838095664978,
|
|
"step": 6350,
|
|
"valid_targets_mean": 15633.2,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 1.6960234854550307,
|
|
"grad_norm": 0.132711070418758,
|
|
"learning_rate": 1.032939737017745e-05,
|
|
"loss": 0.5974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5812194347381592,
|
|
"step": 6355,
|
|
"valid_targets_mean": 15864.6,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 1.6973578863090473,
|
|
"grad_norm": 0.14825395712334374,
|
|
"learning_rate": 1.02411036280312e-05,
|
|
"loss": 0.6183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6262049078941345,
|
|
"step": 6360,
|
|
"valid_targets_mean": 15748.1,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 1.6986922871630639,
|
|
"grad_norm": 0.13486774328472748,
|
|
"learning_rate": 1.0153161206693269e-05,
|
|
"loss": 0.5929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5607184171676636,
|
|
"step": 6365,
|
|
"valid_targets_mean": 15879.1,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 1.7000266880170805,
|
|
"grad_norm": 0.1531399398087951,
|
|
"learning_rate": 1.0065570583256483e-05,
|
|
"loss": 0.5928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5941235423088074,
|
|
"step": 6370,
|
|
"valid_targets_mean": 16422.2,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 1.7013610888710968,
|
|
"grad_norm": 0.14345987977648245,
|
|
"learning_rate": 9.978332232905114e-06,
|
|
"loss": 0.5857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.571819543838501,
|
|
"step": 6375,
|
|
"valid_targets_mean": 16778.1,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 1.7026954897251134,
|
|
"grad_norm": 0.14740771989302237,
|
|
"learning_rate": 9.891446628912286e-06,
|
|
"loss": 0.5944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6348655223846436,
|
|
"step": 6380,
|
|
"valid_targets_mean": 14806.2,
|
|
"valid_targets_min": 91
|
|
},
|
|
{
|
|
"epoch": 1.70402989057913,
|
|
"grad_norm": 0.14720633715151338,
|
|
"learning_rate": 9.804914242637541e-06,
|
|
"loss": 0.5906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5854336023330688,
|
|
"step": 6385,
|
|
"valid_targets_mean": 16334.8,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 1.7053642914331464,
|
|
"grad_norm": 0.15414339297276425,
|
|
"learning_rate": 9.718735543524103e-06,
|
|
"loss": 0.5894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.585676908493042,
|
|
"step": 6390,
|
|
"valid_targets_mean": 16565.8,
|
|
"valid_targets_min": 25
|
|
},
|
|
{
|
|
"epoch": 1.706698692287163,
|
|
"grad_norm": 0.144630066463583,
|
|
"learning_rate": 9.632910999096486e-06,
|
|
"loss": 0.6003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5819835066795349,
|
|
"step": 6395,
|
|
"valid_targets_mean": 16601.6,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 1.7080330931411796,
|
|
"grad_norm": 0.14199837312391764,
|
|
"learning_rate": 9.547441074957884e-06,
|
|
"loss": 0.5922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5858323574066162,
|
|
"step": 6400,
|
|
"valid_targets_mean": 16673.0,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 1.7093674939951962,
|
|
"grad_norm": 0.12787592866458353,
|
|
"learning_rate": 9.462326234787621e-06,
|
|
"loss": 0.5889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5914207100868225,
|
|
"step": 6405,
|
|
"valid_targets_mean": 15893.9,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.7107018948492128,
|
|
"grad_norm": 0.12777324318682454,
|
|
"learning_rate": 9.377566940338712e-06,
|
|
"loss": 0.6031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.604408323764801,
|
|
"step": 6410,
|
|
"valid_targets_mean": 17530.7,
|
|
"valid_targets_min": 120
|
|
},
|
|
{
|
|
"epoch": 1.7120362957032293,
|
|
"grad_norm": 0.13973583954593194,
|
|
"learning_rate": 9.293163651435298e-06,
|
|
"loss": 0.5884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.582183837890625,
|
|
"step": 6415,
|
|
"valid_targets_mean": 16832.0,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 1.713370696557246,
|
|
"grad_norm": 0.18585246281645018,
|
|
"learning_rate": 9.20911682597015e-06,
|
|
"loss": 0.5808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6085513234138489,
|
|
"step": 6420,
|
|
"valid_targets_mean": 15203.5,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 1.7147050974112623,
|
|
"grad_norm": 0.12632781741232604,
|
|
"learning_rate": 9.125426919902231e-06,
|
|
"loss": 0.5697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.582617998123169,
|
|
"step": 6425,
|
|
"valid_targets_mean": 16540.3,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 1.716039498265279,
|
|
"grad_norm": 0.14588864430661452,
|
|
"learning_rate": 9.042094387254212e-06,
|
|
"loss": 0.5929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.588573694229126,
|
|
"step": 6430,
|
|
"valid_targets_mean": 14756.5,
|
|
"valid_targets_min": 124
|
|
},
|
|
{
|
|
"epoch": 1.7173738991192953,
|
|
"grad_norm": 0.14127921161289128,
|
|
"learning_rate": 8.959119680109918e-06,
|
|
"loss": 0.5859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6098560094833374,
|
|
"step": 6435,
|
|
"valid_targets_mean": 15344.9,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 1.7187082999733119,
|
|
"grad_norm": 0.12191637752010245,
|
|
"learning_rate": 8.876503248612036e-06,
|
|
"loss": 0.5943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.594386100769043,
|
|
"step": 6440,
|
|
"valid_targets_mean": 15512.2,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 1.7200427008273285,
|
|
"grad_norm": 0.13184433555587588,
|
|
"learning_rate": 8.794245540959546e-06,
|
|
"loss": 0.5937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5808385610580444,
|
|
"step": 6445,
|
|
"valid_targets_mean": 17211.4,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 1.721377101681345,
|
|
"grad_norm": 0.12236297502276318,
|
|
"learning_rate": 8.712347003405304e-06,
|
|
"loss": 0.5809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.579727828502655,
|
|
"step": 6450,
|
|
"valid_targets_mean": 15807.0,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 1.7227115025353616,
|
|
"grad_norm": 0.12733970081519047,
|
|
"learning_rate": 8.630808080253701e-06,
|
|
"loss": 0.5871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5996691584587097,
|
|
"step": 6455,
|
|
"valid_targets_mean": 16812.1,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 1.7240459033893782,
|
|
"grad_norm": 0.12927177044213958,
|
|
"learning_rate": 8.549629213858192e-06,
|
|
"loss": 0.591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6019003391265869,
|
|
"step": 6460,
|
|
"valid_targets_mean": 16805.8,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 1.7253803042433948,
|
|
"grad_norm": 0.12821859951783265,
|
|
"learning_rate": 8.468810844618842e-06,
|
|
"loss": 0.5991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5908550024032593,
|
|
"step": 6465,
|
|
"valid_targets_mean": 15914.5,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 1.7267147050974114,
|
|
"grad_norm": 0.12463354968323265,
|
|
"learning_rate": 8.388353410980075e-06,
|
|
"loss": 0.5931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6119598150253296,
|
|
"step": 6470,
|
|
"valid_targets_mean": 16807.3,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 1.7280491059514278,
|
|
"grad_norm": 0.1423546941527745,
|
|
"learning_rate": 8.308257349428154e-06,
|
|
"loss": 0.5873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5823371410369873,
|
|
"step": 6475,
|
|
"valid_targets_mean": 16515.9,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 1.7293835068054444,
|
|
"grad_norm": 0.13947593660909918,
|
|
"learning_rate": 8.228523094488928e-06,
|
|
"loss": 0.5853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6231012344360352,
|
|
"step": 6480,
|
|
"valid_targets_mean": 14861.1,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 1.7307179076594608,
|
|
"grad_norm": 0.13401251190748847,
|
|
"learning_rate": 8.149151078725416e-06,
|
|
"loss": 0.6017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6143298149108887,
|
|
"step": 6485,
|
|
"valid_targets_mean": 15608.4,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 1.7320523085134774,
|
|
"grad_norm": 0.12714425848238867,
|
|
"learning_rate": 8.070141732735424e-06,
|
|
"loss": 0.5879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5799736380577087,
|
|
"step": 6490,
|
|
"valid_targets_mean": 16586.3,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 1.733386709367494,
|
|
"grad_norm": 0.13113293232699752,
|
|
"learning_rate": 7.991495485149294e-06,
|
|
"loss": 0.5977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6002615690231323,
|
|
"step": 6495,
|
|
"valid_targets_mean": 15329.1,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 1.7347211102215105,
|
|
"grad_norm": 0.12430560028875079,
|
|
"learning_rate": 7.913212762627539e-06,
|
|
"loss": 0.5965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5728801488876343,
|
|
"step": 6500,
|
|
"valid_targets_mean": 16844.3,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 1.7360555110755271,
|
|
"grad_norm": 0.1385872249129435,
|
|
"learning_rate": 7.835293989858527e-06,
|
|
"loss": 0.5932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5763715505599976,
|
|
"step": 6505,
|
|
"valid_targets_mean": 16026.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.7373899119295437,
|
|
"grad_norm": 0.13575477026988048,
|
|
"learning_rate": 7.75773958955614e-06,
|
|
"loss": 0.5827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5958987474441528,
|
|
"step": 6510,
|
|
"valid_targets_mean": 15166.1,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 1.7387243127835603,
|
|
"grad_norm": 0.14375470702045678,
|
|
"learning_rate": 7.680549982457553e-06,
|
|
"loss": 0.5967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5804993510246277,
|
|
"step": 6515,
|
|
"valid_targets_mean": 15304.0,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 1.7400587136375767,
|
|
"grad_norm": 0.13251025136148764,
|
|
"learning_rate": 7.6037255873209165e-06,
|
|
"loss": 0.604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6260999441146851,
|
|
"step": 6520,
|
|
"valid_targets_mean": 17056.7,
|
|
"valid_targets_min": 42
|
|
},
|
|
{
|
|
"epoch": 1.7413931144915933,
|
|
"grad_norm": 0.13207149689690997,
|
|
"learning_rate": 7.527266820923089e-06,
|
|
"loss": 0.5963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5891574025154114,
|
|
"step": 6525,
|
|
"valid_targets_mean": 16467.2,
|
|
"valid_targets_min": 54
|
|
},
|
|
{
|
|
"epoch": 1.7427275153456097,
|
|
"grad_norm": 0.12270209253540865,
|
|
"learning_rate": 7.45117409805733e-06,
|
|
"loss": 0.5928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5965874195098877,
|
|
"step": 6530,
|
|
"valid_targets_mean": 15756.0,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 1.7440619161996262,
|
|
"grad_norm": 0.14656779384139537,
|
|
"learning_rate": 7.375447831531128e-06,
|
|
"loss": 0.5901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5620827674865723,
|
|
"step": 6535,
|
|
"valid_targets_mean": 15943.0,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 1.7453963170536428,
|
|
"grad_norm": 0.12908583929661363,
|
|
"learning_rate": 7.300088432163945e-06,
|
|
"loss": 0.5914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6227389574050903,
|
|
"step": 6540,
|
|
"valid_targets_mean": 16917.0,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 1.7467307179076594,
|
|
"grad_norm": 0.140681704180509,
|
|
"learning_rate": 7.2250963087849e-06,
|
|
"loss": 0.5722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.576507031917572,
|
|
"step": 6545,
|
|
"valid_targets_mean": 14793.1,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.748065118761676,
|
|
"grad_norm": 0.14019687191999916,
|
|
"learning_rate": 7.1504718682306754e-06,
|
|
"loss": 0.5961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5997872948646545,
|
|
"step": 6550,
|
|
"valid_targets_mean": 14530.4,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 1.7493995196156926,
|
|
"grad_norm": 0.12345291128910027,
|
|
"learning_rate": 7.076215515343256e-06,
|
|
"loss": 0.5846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5840612649917603,
|
|
"step": 6555,
|
|
"valid_targets_mean": 15716.3,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 1.7507339204697092,
|
|
"grad_norm": 0.1425058945838927,
|
|
"learning_rate": 7.0023276529676655e-06,
|
|
"loss": 0.6116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6450003385543823,
|
|
"step": 6560,
|
|
"valid_targets_mean": 15855.2,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 1.7520683213237258,
|
|
"grad_norm": 0.12452597354737815,
|
|
"learning_rate": 6.928808681949919e-06,
|
|
"loss": 0.5868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5846118330955505,
|
|
"step": 6565,
|
|
"valid_targets_mean": 15287.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.7534027221777422,
|
|
"grad_norm": 0.11444459062454918,
|
|
"learning_rate": 6.855659001134739e-06,
|
|
"loss": 0.6064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.579085111618042,
|
|
"step": 6570,
|
|
"valid_targets_mean": 15837.1,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 1.7547371230317588,
|
|
"grad_norm": 0.12798762634839495,
|
|
"learning_rate": 6.78287900736342e-06,
|
|
"loss": 0.5873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.578317403793335,
|
|
"step": 6575,
|
|
"valid_targets_mean": 16501.7,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.7560715238857751,
|
|
"grad_norm": 0.12134400921362967,
|
|
"learning_rate": 6.710469095471701e-06,
|
|
"loss": 0.583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5920226573944092,
|
|
"step": 6580,
|
|
"valid_targets_mean": 17444.7,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 1.7574059247397917,
|
|
"grad_norm": 0.12768102290297237,
|
|
"learning_rate": 6.638429658287603e-06,
|
|
"loss": 0.591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5544648170471191,
|
|
"step": 6585,
|
|
"valid_targets_mean": 15754.8,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.7587403255938083,
|
|
"grad_norm": 0.1143794766814526,
|
|
"learning_rate": 6.566761086629285e-06,
|
|
"loss": 0.5869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5953787565231323,
|
|
"step": 6590,
|
|
"valid_targets_mean": 17017.4,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 1.760074726447825,
|
|
"grad_norm": 0.13194510810045934,
|
|
"learning_rate": 6.495463769302952e-06,
|
|
"loss": 0.5977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6088513731956482,
|
|
"step": 6595,
|
|
"valid_targets_mean": 15849.3,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 1.7614091273018415,
|
|
"grad_norm": 0.13380248430204805,
|
|
"learning_rate": 6.424538093100745e-06,
|
|
"loss": 0.6004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5823229551315308,
|
|
"step": 6600,
|
|
"valid_targets_mean": 15976.0,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 1.762743528155858,
|
|
"grad_norm": 0.11524897333434836,
|
|
"learning_rate": 6.353984442798582e-06,
|
|
"loss": 0.5969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5522147417068481,
|
|
"step": 6605,
|
|
"valid_targets_mean": 17450.5,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 1.7640779290098747,
|
|
"grad_norm": 0.1315759288137918,
|
|
"learning_rate": 6.283803201154173e-06,
|
|
"loss": 0.5831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5800704956054688,
|
|
"step": 6610,
|
|
"valid_targets_mean": 17219.5,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 1.765412329863891,
|
|
"grad_norm": 0.13343874209116965,
|
|
"learning_rate": 6.213994748904866e-06,
|
|
"loss": 0.5882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5874321460723877,
|
|
"step": 6615,
|
|
"valid_targets_mean": 16498.7,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.7667467307179077,
|
|
"grad_norm": 0.13166214820508826,
|
|
"learning_rate": 6.144559464765605e-06,
|
|
"loss": 0.5923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5657899379730225,
|
|
"step": 6620,
|
|
"valid_targets_mean": 16268.9,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 1.7680811315719243,
|
|
"grad_norm": 0.14027113380235667,
|
|
"learning_rate": 6.075497725426862e-06,
|
|
"loss": 0.5955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6110091209411621,
|
|
"step": 6625,
|
|
"valid_targets_mean": 15386.6,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 1.7694155324259406,
|
|
"grad_norm": 0.13038351513833302,
|
|
"learning_rate": 6.0068099055526505e-06,
|
|
"loss": 0.6022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5834971070289612,
|
|
"step": 6630,
|
|
"valid_targets_mean": 16473.2,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 1.7707499332799572,
|
|
"grad_norm": 0.13341949521409505,
|
|
"learning_rate": 5.938496377778395e-06,
|
|
"loss": 0.5987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.589992880821228,
|
|
"step": 6635,
|
|
"valid_targets_mean": 15739.5,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 1.7720843341339738,
|
|
"grad_norm": 0.127563718799623,
|
|
"learning_rate": 5.870557512709001e-06,
|
|
"loss": 0.5945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5829722881317139,
|
|
"step": 6640,
|
|
"valid_targets_mean": 16187.3,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.7734187349879904,
|
|
"grad_norm": 0.13086379830367054,
|
|
"learning_rate": 5.802993678916773e-06,
|
|
"loss": 0.6031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5990883111953735,
|
|
"step": 6645,
|
|
"valid_targets_mean": 15799.8,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 1.774753135842007,
|
|
"grad_norm": 0.1339382142200281,
|
|
"learning_rate": 5.7358052429394785e-06,
|
|
"loss": 0.5958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5838741064071655,
|
|
"step": 6650,
|
|
"valid_targets_mean": 16166.3,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 1.7760875366960236,
|
|
"grad_norm": 0.12436641926688305,
|
|
"learning_rate": 5.668992569278347e-06,
|
|
"loss": 0.5946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6025828719139099,
|
|
"step": 6655,
|
|
"valid_targets_mean": 15163.1,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 1.7774219375500402,
|
|
"grad_norm": 0.12139389266546576,
|
|
"learning_rate": 5.602556020396004e-06,
|
|
"loss": 0.5753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5686994791030884,
|
|
"step": 6660,
|
|
"valid_targets_mean": 16358.0,
|
|
"valid_targets_min": 148
|
|
},
|
|
{
|
|
"epoch": 1.7787563384040566,
|
|
"grad_norm": 0.12719396458430962,
|
|
"learning_rate": 5.5364959567146514e-06,
|
|
"loss": 0.5938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5728775262832642,
|
|
"step": 6665,
|
|
"valid_targets_mean": 16011.3,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 1.7800907392580732,
|
|
"grad_norm": 0.13944967945573655,
|
|
"learning_rate": 5.470812736614014e-06,
|
|
"loss": 0.5976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6120748519897461,
|
|
"step": 6670,
|
|
"valid_targets_mean": 14323.0,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 1.7814251401120895,
|
|
"grad_norm": 0.12706901606619092,
|
|
"learning_rate": 5.405506716429378e-06,
|
|
"loss": 0.5907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6091560125350952,
|
|
"step": 6675,
|
|
"valid_targets_mean": 16226.2,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 1.7827595409661061,
|
|
"grad_norm": 0.11505706377936203,
|
|
"learning_rate": 5.340578250449742e-06,
|
|
"loss": 0.5768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.558120846748352,
|
|
"step": 6680,
|
|
"valid_targets_mean": 16645.7,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 1.7840939418201227,
|
|
"grad_norm": 0.12119249283018675,
|
|
"learning_rate": 5.276027690915868e-06,
|
|
"loss": 0.604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6369675397872925,
|
|
"step": 6685,
|
|
"valid_targets_mean": 16399.1,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 1.7854283426741393,
|
|
"grad_norm": 0.11977714576766528,
|
|
"learning_rate": 5.211855388018282e-06,
|
|
"loss": 0.6026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6144067049026489,
|
|
"step": 6690,
|
|
"valid_targets_mean": 16009.9,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 1.786762743528156,
|
|
"grad_norm": 0.12002456110716048,
|
|
"learning_rate": 5.148061689895519e-06,
|
|
"loss": 0.6054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5942480564117432,
|
|
"step": 6695,
|
|
"valid_targets_mean": 15441.4,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 1.7880971443821725,
|
|
"grad_norm": 0.12419839234916387,
|
|
"learning_rate": 5.084646942632123e-06,
|
|
"loss": 0.5845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5599845051765442,
|
|
"step": 6700,
|
|
"valid_targets_mean": 16584.7,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.789431545236189,
|
|
"grad_norm": 0.12276142930868597,
|
|
"learning_rate": 5.0216114902567995e-06,
|
|
"loss": 0.5873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6189547777175903,
|
|
"step": 6705,
|
|
"valid_targets_mean": 14926.4,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 1.7907659460902055,
|
|
"grad_norm": 0.11850993922110012,
|
|
"learning_rate": 4.9589556747406e-06,
|
|
"loss": 0.5838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5783436298370361,
|
|
"step": 6710,
|
|
"valid_targets_mean": 16365.0,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 1.792100346944222,
|
|
"grad_norm": 0.12343086819584895,
|
|
"learning_rate": 4.896679835994965e-06,
|
|
"loss": 0.579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5465564131736755,
|
|
"step": 6715,
|
|
"valid_targets_mean": 16514.3,
|
|
"valid_targets_min": 169
|
|
},
|
|
{
|
|
"epoch": 1.7934347477982386,
|
|
"grad_norm": 0.11718639877344501,
|
|
"learning_rate": 4.834784311869985e-06,
|
|
"loss": 0.577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5630457401275635,
|
|
"step": 6720,
|
|
"valid_targets_mean": 16447.2,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 1.794769148652255,
|
|
"grad_norm": 0.12437660439211354,
|
|
"learning_rate": 4.773269438152516e-06,
|
|
"loss": 0.6021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6006584167480469,
|
|
"step": 6725,
|
|
"valid_targets_mean": 14929.2,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 1.7961035495062716,
|
|
"grad_norm": 0.11840169382440213,
|
|
"learning_rate": 4.712135548564333e-06,
|
|
"loss": 0.5739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5946887135505676,
|
|
"step": 6730,
|
|
"valid_targets_mean": 16411.4,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 1.7974379503602882,
|
|
"grad_norm": 0.11591503727537256,
|
|
"learning_rate": 4.651382974760382e-06,
|
|
"loss": 0.5972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6221188306808472,
|
|
"step": 6735,
|
|
"valid_targets_mean": 16008.2,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 1.7987723512143048,
|
|
"grad_norm": 0.12495184922741498,
|
|
"learning_rate": 4.591012046326944e-06,
|
|
"loss": 0.5844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6207176446914673,
|
|
"step": 6740,
|
|
"valid_targets_mean": 15785.4,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.8001067520683214,
|
|
"grad_norm": 0.11777234439275203,
|
|
"learning_rate": 4.5310230907798285e-06,
|
|
"loss": 0.5912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5886247158050537,
|
|
"step": 6745,
|
|
"valid_targets_mean": 15777.8,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 1.801441152922338,
|
|
"grad_norm": 0.13310005646330186,
|
|
"learning_rate": 4.471416433562638e-06,
|
|
"loss": 0.5864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6195192337036133,
|
|
"step": 6750,
|
|
"valid_targets_mean": 16042.3,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 1.8027755537763546,
|
|
"grad_norm": 0.3573687288701633,
|
|
"learning_rate": 4.412192398044997e-06,
|
|
"loss": 0.5825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.547378420829773,
|
|
"step": 6755,
|
|
"valid_targets_mean": 16194.8,
|
|
"valid_targets_min": 22
|
|
},
|
|
{
|
|
"epoch": 1.804109954630371,
|
|
"grad_norm": 0.12228667808737118,
|
|
"learning_rate": 4.353351305520747e-06,
|
|
"loss": 0.5923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.615507185459137,
|
|
"step": 6760,
|
|
"valid_targets_mean": 17472.8,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 1.8054443554843875,
|
|
"grad_norm": 0.12032619569895683,
|
|
"learning_rate": 4.2948934752062655e-06,
|
|
"loss": 0.6047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5853080749511719,
|
|
"step": 6765,
|
|
"valid_targets_mean": 17070.5,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 1.806778756338404,
|
|
"grad_norm": 0.12726124020104732,
|
|
"learning_rate": 4.2368192242387355e-06,
|
|
"loss": 0.5978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6125775575637817,
|
|
"step": 6770,
|
|
"valid_targets_mean": 14640.3,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.8081131571924205,
|
|
"grad_norm": 0.1268406765012243,
|
|
"learning_rate": 4.179128867674348e-06,
|
|
"loss": 0.5728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5960764288902283,
|
|
"step": 6775,
|
|
"valid_targets_mean": 17594.2,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 1.809447558046437,
|
|
"grad_norm": 0.13020025506897828,
|
|
"learning_rate": 4.121822718486664e-06,
|
|
"loss": 0.5937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5955917835235596,
|
|
"step": 6780,
|
|
"valid_targets_mean": 15496.2,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 1.8107819589004537,
|
|
"grad_norm": 0.13462347960359008,
|
|
"learning_rate": 4.064901087564918e-06,
|
|
"loss": 0.6062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6180999279022217,
|
|
"step": 6785,
|
|
"valid_targets_mean": 14829.8,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 1.8121163597544703,
|
|
"grad_norm": 0.12520707920034008,
|
|
"learning_rate": 4.008364283712298e-06,
|
|
"loss": 0.5764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6032578945159912,
|
|
"step": 6790,
|
|
"valid_targets_mean": 15743.1,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 1.8134507606084869,
|
|
"grad_norm": 0.12153868786431792,
|
|
"learning_rate": 3.9522126136442515e-06,
|
|
"loss": 0.5842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5904735326766968,
|
|
"step": 6795,
|
|
"valid_targets_mean": 17683.8,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 1.8147851614625035,
|
|
"grad_norm": 0.13380932238576912,
|
|
"learning_rate": 3.896446381986901e-06,
|
|
"loss": 0.5886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5878794193267822,
|
|
"step": 6800,
|
|
"valid_targets_mean": 15747.5,
|
|
"valid_targets_min": 24
|
|
},
|
|
{
|
|
"epoch": 1.8161195623165198,
|
|
"grad_norm": 0.11363090802399065,
|
|
"learning_rate": 3.841065891275328e-06,
|
|
"loss": 0.5814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5766637921333313,
|
|
"step": 6805,
|
|
"valid_targets_mean": 16026.2,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 1.8174539631705364,
|
|
"grad_norm": 0.11903433960083935,
|
|
"learning_rate": 3.786071441951918e-06,
|
|
"loss": 0.5715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5726655125617981,
|
|
"step": 6810,
|
|
"valid_targets_mean": 14966.9,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 1.818788364024553,
|
|
"grad_norm": 0.10999529624986326,
|
|
"learning_rate": 3.7314633323647952e-06,
|
|
"loss": 0.599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5864944458007812,
|
|
"step": 6815,
|
|
"valid_targets_mean": 15981.9,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 1.8201227648785694,
|
|
"grad_norm": 0.11513842609934516,
|
|
"learning_rate": 3.6772418587661474e-06,
|
|
"loss": 0.5895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6018288135528564,
|
|
"step": 6820,
|
|
"valid_targets_mean": 16233.4,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 1.821457165732586,
|
|
"grad_norm": 0.126039895611612,
|
|
"learning_rate": 3.623407315310667e-06,
|
|
"loss": 0.6095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6270237565040588,
|
|
"step": 6825,
|
|
"valid_targets_mean": 15310.5,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 1.8227915665866026,
|
|
"grad_norm": 0.1215457476544879,
|
|
"learning_rate": 3.5699599940538836e-06,
|
|
"loss": 0.5932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5928213596343994,
|
|
"step": 6830,
|
|
"valid_targets_mean": 15371.1,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 1.8241259674406192,
|
|
"grad_norm": 0.12623056542560115,
|
|
"learning_rate": 3.5169001849506496e-06,
|
|
"loss": 0.6144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6307641863822937,
|
|
"step": 6835,
|
|
"valid_targets_mean": 15279.9,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 1.8254603682946358,
|
|
"grad_norm": 0.11249761749010688,
|
|
"learning_rate": 3.4642281758535645e-06,
|
|
"loss": 0.5942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5592288970947266,
|
|
"step": 6840,
|
|
"valid_targets_mean": 16210.7,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 1.8267947691486524,
|
|
"grad_norm": 0.1098951823272564,
|
|
"learning_rate": 3.4119442525113283e-06,
|
|
"loss": 0.5938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5647261738777161,
|
|
"step": 6845,
|
|
"valid_targets_mean": 15996.9,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 1.828129170002669,
|
|
"grad_norm": 0.12384781447849601,
|
|
"learning_rate": 3.3600486985673163e-06,
|
|
"loss": 0.5848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6426936388015747,
|
|
"step": 6850,
|
|
"valid_targets_mean": 14943.6,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 1.8294635708566853,
|
|
"grad_norm": 0.13195995027485155,
|
|
"learning_rate": 3.308541795557948e-06,
|
|
"loss": 0.5941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6188579201698303,
|
|
"step": 6855,
|
|
"valid_targets_mean": 14683.0,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 1.830797971710702,
|
|
"grad_norm": 0.125628185782131,
|
|
"learning_rate": 3.2574238229111704e-06,
|
|
"loss": 0.5902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5590197443962097,
|
|
"step": 6860,
|
|
"valid_targets_mean": 15653.1,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.8321323725647183,
|
|
"grad_norm": 0.12472682756940785,
|
|
"learning_rate": 3.2066950579450024e-06,
|
|
"loss": 0.582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5600212812423706,
|
|
"step": 6865,
|
|
"valid_targets_mean": 16062.4,
|
|
"valid_targets_min": 61
|
|
},
|
|
{
|
|
"epoch": 1.8334667734187349,
|
|
"grad_norm": 0.12207900265083974,
|
|
"learning_rate": 3.156355775865968e-06,
|
|
"loss": 0.5792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5914990305900574,
|
|
"step": 6870,
|
|
"valid_targets_mean": 15910.1,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 1.8348011742727515,
|
|
"grad_norm": 0.13258139999974625,
|
|
"learning_rate": 3.106406249767607e-06,
|
|
"loss": 0.5816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.591293215751648,
|
|
"step": 6875,
|
|
"valid_targets_mean": 15699.3,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 1.836135575126768,
|
|
"grad_norm": 0.1160461192943085,
|
|
"learning_rate": 3.056846750629041e-06,
|
|
"loss": 0.604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5780788660049438,
|
|
"step": 6880,
|
|
"valid_targets_mean": 16653.1,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 1.8374699759807847,
|
|
"grad_norm": 0.11635703270079688,
|
|
"learning_rate": 3.007677547313436e-06,
|
|
"loss": 0.5963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5911270380020142,
|
|
"step": 6885,
|
|
"valid_targets_mean": 16261.6,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 1.8388043768348012,
|
|
"grad_norm": 0.10977172189530393,
|
|
"learning_rate": 2.958898906566626e-06,
|
|
"loss": 0.5962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5859546065330505,
|
|
"step": 6890,
|
|
"valid_targets_mean": 16357.7,
|
|
"valid_targets_min": 408
|
|
},
|
|
{
|
|
"epoch": 1.8401387776888178,
|
|
"grad_norm": 0.11719525454631909,
|
|
"learning_rate": 2.910511093015588e-06,
|
|
"loss": 0.5975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6086652278900146,
|
|
"step": 6895,
|
|
"valid_targets_mean": 15757.9,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 1.8414731785428344,
|
|
"grad_norm": 0.12906155929183688,
|
|
"learning_rate": 2.8625143691670404e-06,
|
|
"loss": 0.5879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6210035085678101,
|
|
"step": 6900,
|
|
"valid_targets_mean": 14412.2,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 1.8428075793968508,
|
|
"grad_norm": 0.12002678379642992,
|
|
"learning_rate": 2.8149089954060287e-06,
|
|
"loss": 0.5852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5683364868164062,
|
|
"step": 6905,
|
|
"valid_targets_mean": 15993.1,
|
|
"valid_targets_min": 199
|
|
},
|
|
{
|
|
"epoch": 1.8441419802508674,
|
|
"grad_norm": 0.11656843705780746,
|
|
"learning_rate": 2.767695229994507e-06,
|
|
"loss": 0.6108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6220520734786987,
|
|
"step": 6910,
|
|
"valid_targets_mean": 17196.1,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 1.8454763811048838,
|
|
"grad_norm": 0.10599874066792718,
|
|
"learning_rate": 2.720873329069895e-06,
|
|
"loss": 0.5894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5496461391448975,
|
|
"step": 6915,
|
|
"valid_targets_mean": 17237.9,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.8468107819589004,
|
|
"grad_norm": 0.11867440275148777,
|
|
"learning_rate": 2.6744435466437535e-06,
|
|
"loss": 0.5904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6073455214500427,
|
|
"step": 6920,
|
|
"valid_targets_mean": 14592.5,
|
|
"valid_targets_min": 104
|
|
},
|
|
{
|
|
"epoch": 1.848145182812917,
|
|
"grad_norm": 0.10484802668554907,
|
|
"learning_rate": 2.6284061346004055e-06,
|
|
"loss": 0.5878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5743545889854431,
|
|
"step": 6925,
|
|
"valid_targets_mean": 16563.4,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 1.8494795836669335,
|
|
"grad_norm": 0.11758213812901547,
|
|
"learning_rate": 2.5827613426954664e-06,
|
|
"loss": 0.5966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6080979108810425,
|
|
"step": 6930,
|
|
"valid_targets_mean": 14695.3,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.8508139845209501,
|
|
"grad_norm": 0.1232609498682966,
|
|
"learning_rate": 2.537509418554631e-06,
|
|
"loss": 0.5922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5918781757354736,
|
|
"step": 6935,
|
|
"valid_targets_mean": 15298.3,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 1.8521483853749667,
|
|
"grad_norm": 0.15512351459385357,
|
|
"learning_rate": 2.4926506076722417e-06,
|
|
"loss": 0.599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5890061259269714,
|
|
"step": 6940,
|
|
"valid_targets_mean": 15713.8,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 1.8534827862289833,
|
|
"grad_norm": 0.11156936934072108,
|
|
"learning_rate": 2.4481851534099707e-06,
|
|
"loss": 0.5852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5654975175857544,
|
|
"step": 6945,
|
|
"valid_targets_mean": 15386.7,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 1.8548171870829997,
|
|
"grad_norm": 0.12304603218916753,
|
|
"learning_rate": 2.404113296995505e-06,
|
|
"loss": 0.6024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5728027820587158,
|
|
"step": 6950,
|
|
"valid_targets_mean": 15505.8,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 1.8561515879370163,
|
|
"grad_norm": 0.11534915665069846,
|
|
"learning_rate": 2.360435277521275e-06,
|
|
"loss": 0.59,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5883013606071472,
|
|
"step": 6955,
|
|
"valid_targets_mean": 16474.5,
|
|
"valid_targets_min": 122
|
|
},
|
|
{
|
|
"epoch": 1.8574859887910327,
|
|
"grad_norm": 0.12014463570615995,
|
|
"learning_rate": 2.3171513319430596e-06,
|
|
"loss": 0.6038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6192582845687866,
|
|
"step": 6960,
|
|
"valid_targets_mean": 14878.8,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 1.8588203896450493,
|
|
"grad_norm": 0.10774896921661002,
|
|
"learning_rate": 2.274261695078841e-06,
|
|
"loss": 0.5971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5748175382614136,
|
|
"step": 6965,
|
|
"valid_targets_mean": 17166.3,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 1.8601547904990658,
|
|
"grad_norm": 0.1078641120068596,
|
|
"learning_rate": 2.231766599607371e-06,
|
|
"loss": 0.5697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5333196520805359,
|
|
"step": 6970,
|
|
"valid_targets_mean": 15537.1,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 1.8614891913530824,
|
|
"grad_norm": 0.11864787077881608,
|
|
"learning_rate": 2.1896662760670618e-06,
|
|
"loss": 0.591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6141413450241089,
|
|
"step": 6975,
|
|
"valid_targets_mean": 16036.9,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 1.862823592207099,
|
|
"grad_norm": 0.11136294506461766,
|
|
"learning_rate": 2.1479609528546328e-06,
|
|
"loss": 0.5952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5849490165710449,
|
|
"step": 6980,
|
|
"valid_targets_mean": 16131.3,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 1.8641579930611156,
|
|
"grad_norm": 0.11410952923157061,
|
|
"learning_rate": 2.106650856223899e-06,
|
|
"loss": 0.5975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5867966413497925,
|
|
"step": 6985,
|
|
"valid_targets_mean": 16985.9,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 1.8654923939151322,
|
|
"grad_norm": 0.1128290098107921,
|
|
"learning_rate": 2.0657362102845576e-06,
|
|
"loss": 0.5847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5945344567298889,
|
|
"step": 6990,
|
|
"valid_targets_mean": 14839.8,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 1.8668267947691488,
|
|
"grad_norm": 0.11045443610042519,
|
|
"learning_rate": 2.0252172370009646e-06,
|
|
"loss": 0.5717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5728679895401001,
|
|
"step": 6995,
|
|
"valid_targets_mean": 16231.9,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 1.8681611956231652,
|
|
"grad_norm": 0.11519466042156558,
|
|
"learning_rate": 1.985094156190925e-06,
|
|
"loss": 0.6034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.58236163854599,
|
|
"step": 7000,
|
|
"valid_targets_mean": 16598.3,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 1.8694955964771818,
|
|
"grad_norm": 0.12345773340208867,
|
|
"learning_rate": 1.9453671855244963e-06,
|
|
"loss": 0.5913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5991541147232056,
|
|
"step": 7005,
|
|
"valid_targets_mean": 16407.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 1.8708299973311981,
|
|
"grad_norm": 0.11577989709027653,
|
|
"learning_rate": 1.906036540522829e-06,
|
|
"loss": 0.5972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6317030787467957,
|
|
"step": 7010,
|
|
"valid_targets_mean": 15361.4,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 1.8721643981852147,
|
|
"grad_norm": 0.11308467546031899,
|
|
"learning_rate": 1.8671024345569773e-06,
|
|
"loss": 0.5984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6096479296684265,
|
|
"step": 7015,
|
|
"valid_targets_mean": 16363.6,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 1.8734987990392313,
|
|
"grad_norm": 0.10830315239475957,
|
|
"learning_rate": 1.8285650788467415e-06,
|
|
"loss": 0.5821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5622441172599792,
|
|
"step": 7020,
|
|
"valid_targets_mean": 16065.3,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.874833199893248,
|
|
"grad_norm": 0.11724510320410304,
|
|
"learning_rate": 1.7904246824595514e-06,
|
|
"loss": 0.5909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5891350507736206,
|
|
"step": 7025,
|
|
"valid_targets_mean": 16113.2,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 1.8761676007472645,
|
|
"grad_norm": 0.1148792581953953,
|
|
"learning_rate": 1.7526814523092763e-06,
|
|
"loss": 0.6039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6323190927505493,
|
|
"step": 7030,
|
|
"valid_targets_mean": 14901.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 1.8775020016012811,
|
|
"grad_norm": 0.11797792259985213,
|
|
"learning_rate": 1.7153355931551592e-06,
|
|
"loss": 0.6158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6224967241287231,
|
|
"step": 7035,
|
|
"valid_targets_mean": 15057.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.8788364024552977,
|
|
"grad_norm": 0.1260175985764918,
|
|
"learning_rate": 1.678387307600676e-06,
|
|
"loss": 0.6037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5865051746368408,
|
|
"step": 7040,
|
|
"valid_targets_mean": 15720.0,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 1.880170803309314,
|
|
"grad_norm": 0.1067135636134446,
|
|
"learning_rate": 1.6418367960924271e-06,
|
|
"loss": 0.5846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.580804705619812,
|
|
"step": 7045,
|
|
"valid_targets_mean": 15797.4,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.8815052041633307,
|
|
"grad_norm": 0.10174351633184524,
|
|
"learning_rate": 1.6056842569190987e-06,
|
|
"loss": 0.5923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5535533428192139,
|
|
"step": 7050,
|
|
"valid_targets_mean": 16996.6,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 1.8828396050173473,
|
|
"grad_norm": 0.11244985512618108,
|
|
"learning_rate": 1.5699298862103276e-06,
|
|
"loss": 0.5891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5968772172927856,
|
|
"step": 7055,
|
|
"valid_targets_mean": 16006.3,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 1.8841740058713636,
|
|
"grad_norm": 0.11948717366431685,
|
|
"learning_rate": 1.5345738779356714e-06,
|
|
"loss": 0.6002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6243698596954346,
|
|
"step": 7060,
|
|
"valid_targets_mean": 15559.3,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.8855084067253802,
|
|
"grad_norm": 0.11580752047329682,
|
|
"learning_rate": 1.4996164239035408e-06,
|
|
"loss": 0.5748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5642666816711426,
|
|
"step": 7065,
|
|
"valid_targets_mean": 15481.2,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 1.8868428075793968,
|
|
"grad_norm": 0.10485005764097419,
|
|
"learning_rate": 1.4650577137601843e-06,
|
|
"loss": 0.5957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5874780416488647,
|
|
"step": 7070,
|
|
"valid_targets_mean": 15717.2,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 1.8881772084334134,
|
|
"grad_norm": 0.10417353191360103,
|
|
"learning_rate": 1.4308979349886146e-06,
|
|
"loss": 0.5924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6178035736083984,
|
|
"step": 7075,
|
|
"valid_targets_mean": 17092.7,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 1.88951160928743,
|
|
"grad_norm": 0.1058447762736142,
|
|
"learning_rate": 1.3971372729076503e-06,
|
|
"loss": 0.6024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6247731447219849,
|
|
"step": 7080,
|
|
"valid_targets_mean": 15420.1,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 1.8908460101414466,
|
|
"grad_norm": 0.10652874154039586,
|
|
"learning_rate": 1.3637759106708501e-06,
|
|
"loss": 0.5972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5917098522186279,
|
|
"step": 7085,
|
|
"valid_targets_mean": 16516.3,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 1.8921804109954632,
|
|
"grad_norm": 0.10799915454444874,
|
|
"learning_rate": 1.3308140292655645e-06,
|
|
"loss": 0.58,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5906647443771362,
|
|
"step": 7090,
|
|
"valid_targets_mean": 16697.9,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 1.8935148118494796,
|
|
"grad_norm": 0.11981808776170194,
|
|
"learning_rate": 1.2982518075119352e-06,
|
|
"loss": 0.5763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6170154809951782,
|
|
"step": 7095,
|
|
"valid_targets_mean": 15495.0,
|
|
"valid_targets_min": 146
|
|
},
|
|
{
|
|
"epoch": 1.8948492127034962,
|
|
"grad_norm": 0.11486360410426957,
|
|
"learning_rate": 1.2660894220619139e-06,
|
|
"loss": 0.595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6035982370376587,
|
|
"step": 7100,
|
|
"valid_targets_mean": 16467.9,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 1.8961836135575125,
|
|
"grad_norm": 0.10499519888203847,
|
|
"learning_rate": 1.2343270473983286e-06,
|
|
"loss": 0.597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5793255567550659,
|
|
"step": 7105,
|
|
"valid_targets_mean": 16619.4,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 1.8975180144115291,
|
|
"grad_norm": 0.12478283596005901,
|
|
"learning_rate": 1.202964855833935e-06,
|
|
"loss": 0.5831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5687292814254761,
|
|
"step": 7110,
|
|
"valid_targets_mean": 15931.3,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.8988524152655457,
|
|
"grad_norm": 0.10846084952146565,
|
|
"learning_rate": 1.1720030175104506e-06,
|
|
"loss": 0.5972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5962158441543579,
|
|
"step": 7115,
|
|
"valid_targets_mean": 16200.3,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.9001868161195623,
|
|
"grad_norm": 0.11149955520672268,
|
|
"learning_rate": 1.1414417003976634e-06,
|
|
"loss": 0.5787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.565889835357666,
|
|
"step": 7120,
|
|
"valid_targets_mean": 16230.9,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 1.901521216973579,
|
|
"grad_norm": 0.09727284712006973,
|
|
"learning_rate": 1.1112810702925163e-06,
|
|
"loss": 0.5852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5637081265449524,
|
|
"step": 7125,
|
|
"valid_targets_mean": 17315.3,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 1.9028556178275955,
|
|
"grad_norm": 0.11391246609885147,
|
|
"learning_rate": 1.0815212908181825e-06,
|
|
"loss": 0.584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5784386396408081,
|
|
"step": 7130,
|
|
"valid_targets_mean": 16232.3,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 1.904190018681612,
|
|
"grad_norm": 0.10947631191296657,
|
|
"learning_rate": 1.0521625234232333e-06,
|
|
"loss": 0.5959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6112264394760132,
|
|
"step": 7135,
|
|
"valid_targets_mean": 15190.9,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 1.9055244195356285,
|
|
"grad_norm": 0.11228511943247846,
|
|
"learning_rate": 1.023204927380672e-06,
|
|
"loss": 0.6016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5891322493553162,
|
|
"step": 7140,
|
|
"valid_targets_mean": 16496.0,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 1.906858820389645,
|
|
"grad_norm": 0.10335632580031771,
|
|
"learning_rate": 9.946486597871672e-07,
|
|
"loss": 0.5931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5750100612640381,
|
|
"step": 7145,
|
|
"valid_targets_mean": 16288.6,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 1.9081932212436616,
|
|
"grad_norm": 0.10643300666622367,
|
|
"learning_rate": 9.664938755621632e-07,
|
|
"loss": 0.5861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6035852432250977,
|
|
"step": 7150,
|
|
"valid_targets_mean": 15821.4,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 1.909527622097678,
|
|
"grad_norm": 0.10814021288570172,
|
|
"learning_rate": 9.387407274469793e-07,
|
|
"loss": 0.5903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5945829153060913,
|
|
"step": 7155,
|
|
"valid_targets_mean": 16528.9,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 1.9108620229516946,
|
|
"grad_norm": 0.10771079524688575,
|
|
"learning_rate": 9.113893660041033e-07,
|
|
"loss": 0.5876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5860378742218018,
|
|
"step": 7160,
|
|
"valid_targets_mean": 15851.9,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 1.9121964238057112,
|
|
"grad_norm": 0.10426780320770314,
|
|
"learning_rate": 8.844399396162577e-07,
|
|
"loss": 0.5817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5573742389678955,
|
|
"step": 7165,
|
|
"valid_targets_mean": 16035.7,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 1.9135308246597278,
|
|
"grad_norm": 0.11271184825740115,
|
|
"learning_rate": 8.578925944856596e-07,
|
|
"loss": 0.6009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.607288122177124,
|
|
"step": 7170,
|
|
"valid_targets_mean": 16248.8,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 1.9148652255137444,
|
|
"grad_norm": 0.10539783629888545,
|
|
"learning_rate": 8.317474746332126e-07,
|
|
"loss": 0.5763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5683166980743408,
|
|
"step": 7175,
|
|
"valid_targets_mean": 16696.4,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 1.916199626367761,
|
|
"grad_norm": 0.11251688734146195,
|
|
"learning_rate": 8.060047218977323e-07,
|
|
"loss": 0.5944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5919359922409058,
|
|
"step": 7180,
|
|
"valid_targets_mean": 15508.2,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.9175340272217776,
|
|
"grad_norm": 0.11077719358664008,
|
|
"learning_rate": 7.806644759351471e-07,
|
|
"loss": 0.5917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6042367219924927,
|
|
"step": 7185,
|
|
"valid_targets_mean": 16469.2,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.918868428075794,
|
|
"grad_norm": 0.10978081665948485,
|
|
"learning_rate": 7.557268742177908e-07,
|
|
"loss": 0.5953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5959744453430176,
|
|
"step": 7190,
|
|
"valid_targets_mean": 15302.7,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 1.9202028289298105,
|
|
"grad_norm": 0.11271598718073285,
|
|
"learning_rate": 7.311920520336106e-07,
|
|
"loss": 0.5895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6039493083953857,
|
|
"step": 7195,
|
|
"valid_targets_mean": 15806.0,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 1.921537229783827,
|
|
"grad_norm": 0.09979538157727764,
|
|
"learning_rate": 7.070601424854522e-07,
|
|
"loss": 0.5909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5759025812149048,
|
|
"step": 7200,
|
|
"valid_targets_mean": 17470.4,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 1.9228716306378435,
|
|
"grad_norm": 0.11197812099612356,
|
|
"learning_rate": 6.833312764903343e-07,
|
|
"loss": 0.5963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6100215911865234,
|
|
"step": 7205,
|
|
"valid_targets_mean": 16637.7,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.92420603149186,
|
|
"grad_norm": 0.11349120819192293,
|
|
"learning_rate": 6.600055827787581e-07,
|
|
"loss": 0.5888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5917244553565979,
|
|
"step": 7210,
|
|
"valid_targets_mean": 15618.0,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 1.9255404323458767,
|
|
"grad_norm": 0.10606001203762529,
|
|
"learning_rate": 6.370831878939747e-07,
|
|
"loss": 0.6123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6029739379882812,
|
|
"step": 7215,
|
|
"valid_targets_mean": 16360.1,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.9268748331998933,
|
|
"grad_norm": 0.11101483061117472,
|
|
"learning_rate": 6.1456421619131e-07,
|
|
"loss": 0.5789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.590622067451477,
|
|
"step": 7220,
|
|
"valid_targets_mean": 14537.4,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 1.9282092340539099,
|
|
"grad_norm": 0.10746309454661468,
|
|
"learning_rate": 5.924487898375158e-07,
|
|
"loss": 0.5894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.610184907913208,
|
|
"step": 7225,
|
|
"valid_targets_mean": 15415.4,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 1.9295436349079265,
|
|
"grad_norm": 0.10496606126519877,
|
|
"learning_rate": 5.707370288100782e-07,
|
|
"loss": 0.6039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6027356386184692,
|
|
"step": 7230,
|
|
"valid_targets_mean": 16612.8,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 1.9308780357619428,
|
|
"grad_norm": 0.12280401202824656,
|
|
"learning_rate": 5.494290508965605e-07,
|
|
"loss": 0.6047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6241533756256104,
|
|
"step": 7235,
|
|
"valid_targets_mean": 14704.9,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 1.9322124366159594,
|
|
"grad_norm": 0.10418094491281422,
|
|
"learning_rate": 5.285249716940026e-07,
|
|
"loss": 0.5834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5590264201164246,
|
|
"step": 7240,
|
|
"valid_targets_mean": 17070.4,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 1.933546837469976,
|
|
"grad_norm": 0.10925989299981413,
|
|
"learning_rate": 5.080249046082563e-07,
|
|
"loss": 0.5967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6061210036277771,
|
|
"step": 7245,
|
|
"valid_targets_mean": 15973.7,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 1.9348812383239924,
|
|
"grad_norm": 0.1038167631296208,
|
|
"learning_rate": 4.879289608533926e-07,
|
|
"loss": 0.6052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5867657661437988,
|
|
"step": 7250,
|
|
"valid_targets_mean": 16210.0,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 1.936215639178009,
|
|
"grad_norm": 0.10564140231065966,
|
|
"learning_rate": 4.6823724945107e-07,
|
|
"loss": 0.5865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5992836952209473,
|
|
"step": 7255,
|
|
"valid_targets_mean": 15896.8,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.9375500400320256,
|
|
"grad_norm": 0.10121772563289091,
|
|
"learning_rate": 4.489498772299843e-07,
|
|
"loss": 0.5955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5765215158462524,
|
|
"step": 7260,
|
|
"valid_targets_mean": 16066.0,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 1.9388844408860422,
|
|
"grad_norm": 0.10685783716605916,
|
|
"learning_rate": 4.3006694882526947e-07,
|
|
"loss": 0.5929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5946753025054932,
|
|
"step": 7265,
|
|
"valid_targets_mean": 16010.7,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 1.9402188417400588,
|
|
"grad_norm": 0.11537038648261387,
|
|
"learning_rate": 4.115885666779062e-07,
|
|
"loss": 0.5921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6231012344360352,
|
|
"step": 7270,
|
|
"valid_targets_mean": 15111.9,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.9415532425940754,
|
|
"grad_norm": 0.10414761108882142,
|
|
"learning_rate": 3.9351483103420566e-07,
|
|
"loss": 0.5803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5548113584518433,
|
|
"step": 7275,
|
|
"valid_targets_mean": 17432.2,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 1.942887643448092,
|
|
"grad_norm": 0.10200057372451578,
|
|
"learning_rate": 3.758458399452519e-07,
|
|
"loss": 0.5856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5601938962936401,
|
|
"step": 7280,
|
|
"valid_targets_mean": 16905.9,
|
|
"valid_targets_min": 193
|
|
},
|
|
{
|
|
"epoch": 1.9442220443021083,
|
|
"grad_norm": 0.10194804058111612,
|
|
"learning_rate": 3.585816892663351e-07,
|
|
"loss": 0.5865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5662258863449097,
|
|
"step": 7285,
|
|
"valid_targets_mean": 16372.1,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 1.945556445156125,
|
|
"grad_norm": 0.10425651405329209,
|
|
"learning_rate": 3.4172247265650267e-07,
|
|
"loss": 0.5944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5972768664360046,
|
|
"step": 7290,
|
|
"valid_targets_mean": 15505.5,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 1.9468908460101413,
|
|
"grad_norm": 0.10652966361544959,
|
|
"learning_rate": 3.252682815779922e-07,
|
|
"loss": 0.5971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5889468789100647,
|
|
"step": 7295,
|
|
"valid_targets_mean": 15443.7,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 1.9482252468641579,
|
|
"grad_norm": 0.10225399193367106,
|
|
"learning_rate": 3.0921920529574096e-07,
|
|
"loss": 0.5976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6094781756401062,
|
|
"step": 7300,
|
|
"valid_targets_mean": 16137.0,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 1.9495596477181745,
|
|
"grad_norm": 0.1058290729172914,
|
|
"learning_rate": 2.9357533087694397e-07,
|
|
"loss": 0.59,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6034204959869385,
|
|
"step": 7305,
|
|
"valid_targets_mean": 15746.1,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 1.950894048572191,
|
|
"grad_norm": 0.09901387339508871,
|
|
"learning_rate": 2.7833674319052977e-07,
|
|
"loss": 0.5876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5842059254646301,
|
|
"step": 7310,
|
|
"valid_targets_mean": 16711.1,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 1.9522284494262077,
|
|
"grad_norm": 0.1028355506347445,
|
|
"learning_rate": 2.6350352490672746e-07,
|
|
"loss": 0.5919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6164946556091309,
|
|
"step": 7315,
|
|
"valid_targets_mean": 15863.6,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 1.9535628502802243,
|
|
"grad_norm": 0.11315707367960545,
|
|
"learning_rate": 2.49075756496625e-07,
|
|
"loss": 0.5899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5809638500213623,
|
|
"step": 7320,
|
|
"valid_targets_mean": 16541.0,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.9548972511342408,
|
|
"grad_norm": 0.10266507868918702,
|
|
"learning_rate": 2.3505351623170353e-07,
|
|
"loss": 0.6066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5731981992721558,
|
|
"step": 7325,
|
|
"valid_targets_mean": 15640.2,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 1.9562316519882574,
|
|
"grad_norm": 0.10345989214210093,
|
|
"learning_rate": 2.2143688018343707e-07,
|
|
"loss": 0.6077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5999371409416199,
|
|
"step": 7330,
|
|
"valid_targets_mean": 15955.4,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 1.9575660528422738,
|
|
"grad_norm": 0.10090461630292039,
|
|
"learning_rate": 2.0822592222287659e-07,
|
|
"loss": 0.5969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6222025156021118,
|
|
"step": 7335,
|
|
"valid_targets_mean": 16458.2,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 1.9589004536962904,
|
|
"grad_norm": 0.11171063134597106,
|
|
"learning_rate": 1.9542071402024185e-07,
|
|
"loss": 0.5785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6225034594535828,
|
|
"step": 7340,
|
|
"valid_targets_mean": 14275.8,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 1.9602348545503068,
|
|
"grad_norm": 0.10472137138520929,
|
|
"learning_rate": 1.830213250445467e-07,
|
|
"loss": 0.5777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5679185390472412,
|
|
"step": 7345,
|
|
"valid_targets_mean": 15203.7,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 1.9615692554043234,
|
|
"grad_norm": 0.10556708209565631,
|
|
"learning_rate": 1.7102782256319115e-07,
|
|
"loss": 0.6075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6184024810791016,
|
|
"step": 7350,
|
|
"valid_targets_mean": 15826.0,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.96290365625834,
|
|
"grad_norm": 0.10908761627910832,
|
|
"learning_rate": 1.5944027164163652e-07,
|
|
"loss": 0.6058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6222631931304932,
|
|
"step": 7355,
|
|
"valid_targets_mean": 15221.2,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 1.9642380571123566,
|
|
"grad_norm": 0.1010896453599452,
|
|
"learning_rate": 1.4825873514302257e-07,
|
|
"loss": 0.5973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5961298942565918,
|
|
"step": 7360,
|
|
"valid_targets_mean": 15682.6,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 1.9655724579663731,
|
|
"grad_norm": 0.10025610273231542,
|
|
"learning_rate": 1.3748327372784252e-07,
|
|
"loss": 0.5972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6105427145957947,
|
|
"step": 7365,
|
|
"valid_targets_mean": 16404.8,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.9669068588203897,
|
|
"grad_norm": 0.10228371428791674,
|
|
"learning_rate": 1.271139458536019e-07,
|
|
"loss": 0.5934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5717061161994934,
|
|
"step": 7370,
|
|
"valid_targets_mean": 16421.3,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 1.9682412596744063,
|
|
"grad_norm": 0.10475497189588388,
|
|
"learning_rate": 1.1715080777451868e-07,
|
|
"loss": 0.5912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.59098219871521,
|
|
"step": 7375,
|
|
"valid_targets_mean": 16878.4,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 1.9695756605284227,
|
|
"grad_norm": 0.10015423663677037,
|
|
"learning_rate": 1.0759391354119017e-07,
|
|
"loss": 0.5878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5767727494239807,
|
|
"step": 7380,
|
|
"valid_targets_mean": 15934.6,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 1.9709100613824393,
|
|
"grad_norm": 0.10156387805197431,
|
|
"learning_rate": 9.844331500034331e-08,
|
|
"loss": 0.5962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6096939444541931,
|
|
"step": 7385,
|
|
"valid_targets_mean": 16366.0,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 1.9722444622364559,
|
|
"grad_norm": 0.10719608709343924,
|
|
"learning_rate": 8.969906179449316e-08,
|
|
"loss": 0.6091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6203577518463135,
|
|
"step": 7390,
|
|
"valid_targets_mean": 15040.2,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 1.9735788630904723,
|
|
"grad_norm": 0.11017698139830989,
|
|
"learning_rate": 8.136120136174318e-08,
|
|
"loss": 0.6054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5938665270805359,
|
|
"step": 7395,
|
|
"valid_targets_mean": 16084.2,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 1.9749132639444889,
|
|
"grad_norm": 0.10566871444745696,
|
|
"learning_rate": 7.342977893546875e-08,
|
|
"loss": 0.6083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6060525178909302,
|
|
"step": 7400,
|
|
"valid_targets_mean": 16333.7,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 1.9762476647985054,
|
|
"grad_norm": 0.09749390670046561,
|
|
"learning_rate": 6.590483754409237e-08,
|
|
"loss": 0.5744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5636769533157349,
|
|
"step": 7405,
|
|
"valid_targets_mean": 16822.6,
|
|
"valid_targets_min": 209
|
|
},
|
|
{
|
|
"epoch": 1.977582065652522,
|
|
"grad_norm": 0.11092173648538958,
|
|
"learning_rate": 5.878641801087547e-08,
|
|
"loss": 0.589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5932129621505737,
|
|
"step": 7410,
|
|
"valid_targets_mean": 15366.3,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.9789164665065386,
|
|
"grad_norm": 0.10269954867675517,
|
|
"learning_rate": 5.207455895365198e-08,
|
|
"loss": 0.5908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5902327299118042,
|
|
"step": 7415,
|
|
"valid_targets_mean": 15743.9,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 1.9802508673605552,
|
|
"grad_norm": 0.10327250901637106,
|
|
"learning_rate": 4.5769296784653463e-08,
|
|
"loss": 0.5963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.57696533203125,
|
|
"step": 7420,
|
|
"valid_targets_mean": 15775.1,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 1.9815852682145718,
|
|
"grad_norm": 0.10474078714099273,
|
|
"learning_rate": 3.9870665710300954e-08,
|
|
"loss": 0.5982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6041386127471924,
|
|
"step": 7425,
|
|
"valid_targets_mean": 15682.6,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 1.9829196690685882,
|
|
"grad_norm": 0.11011438975518267,
|
|
"learning_rate": 3.437869773101343e-08,
|
|
"loss": 0.5929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6006224155426025,
|
|
"step": 7430,
|
|
"valid_targets_mean": 14573.4,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 1.9842540699226048,
|
|
"grad_norm": 0.10785042466983051,
|
|
"learning_rate": 2.929342264103296e-08,
|
|
"loss": 0.5782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6067730784416199,
|
|
"step": 7435,
|
|
"valid_targets_mean": 17377.5,
|
|
"valid_targets_min": 199
|
|
},
|
|
{
|
|
"epoch": 1.9855884707766212,
|
|
"grad_norm": 0.10847529204402115,
|
|
"learning_rate": 2.4614868028274837e-08,
|
|
"loss": 0.5853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5615088939666748,
|
|
"step": 7440,
|
|
"valid_targets_mean": 15786.1,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.9869228716306377,
|
|
"grad_norm": 0.10017211729266047,
|
|
"learning_rate": 2.034305927416935e-08,
|
|
"loss": 0.5842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5783184766769409,
|
|
"step": 7445,
|
|
"valid_targets_mean": 17086.3,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.9882572724846543,
|
|
"grad_norm": 0.10239017393007949,
|
|
"learning_rate": 1.647801955354522e-08,
|
|
"loss": 0.5771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.565893292427063,
|
|
"step": 7450,
|
|
"valid_targets_mean": 15870.3,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 1.989591673338671,
|
|
"grad_norm": 0.10269734101477318,
|
|
"learning_rate": 1.301976983445474e-08,
|
|
"loss": 0.5851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5717909336090088,
|
|
"step": 7455,
|
|
"valid_targets_mean": 16363.2,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 1.9909260741926875,
|
|
"grad_norm": 0.10335049595234001,
|
|
"learning_rate": 9.968328878115495e-09,
|
|
"loss": 0.6131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6074311137199402,
|
|
"step": 7460,
|
|
"valid_targets_mean": 15293.2,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 1.9922604750467041,
|
|
"grad_norm": 0.10779248661193865,
|
|
"learning_rate": 7.3237132387604646e-09,
|
|
"loss": 0.607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6135603189468384,
|
|
"step": 7465,
|
|
"valid_targets_mean": 15164.7,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 1.9935948759007207,
|
|
"grad_norm": 0.10479306145925003,
|
|
"learning_rate": 5.0859372635964065e-09,
|
|
"loss": 0.5819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5816622972488403,
|
|
"step": 7470,
|
|
"valid_targets_mean": 15709.1,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 1.994929276754737,
|
|
"grad_norm": 0.1040250522239671,
|
|
"learning_rate": 3.2550130926789487e-09,
|
|
"loss": 0.5919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6114310026168823,
|
|
"step": 7475,
|
|
"valid_targets_mean": 16027.9,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 1.9962636776087537,
|
|
"grad_norm": 0.1063982033117486,
|
|
"learning_rate": 1.8309506588959356e-09,
|
|
"loss": 0.5921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6096254587173462,
|
|
"step": 7480,
|
|
"valid_targets_mean": 16148.9,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.9975980784627703,
|
|
"grad_norm": 0.10167680252386486,
|
|
"learning_rate": 8.137576878508578e-10,
|
|
"loss": 0.6098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5815737247467041,
|
|
"step": 7485,
|
|
"valid_targets_mean": 15848.2,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 1.9989324793167866,
|
|
"grad_norm": 0.10102538478634103,
|
|
"learning_rate": 2.0343969787950387e-10,
|
|
"loss": 0.5871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5756806135177612,
|
|
"step": 7490,
|
|
"valid_targets_mean": 16549.2,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.598960816860199,
|
|
"step": 7494,
|
|
"total_flos": 2.3824840797454336e+16,
|
|
"train_loss": 0.11019459065355108,
|
|
"train_runtime": 14786.2521,
|
|
"train_samples_per_second": 129.744,
|
|
"train_steps_per_second": 0.507,
|
|
"valid_targets_mean": 15636.4,
|
|
"valid_targets_min": 509
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 7494,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 2,
|
|
"save_steps": 100,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.3824840797454336e+16,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|