Model: laion/exp-uns-tezos-128unique_glm_4_7_traces_jupiter_cleaned Source: Original Platform
9750 lines
271 KiB
JSON
9750 lines
271 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4410,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.007942811755361398,
|
|
"grad_norm": 15.88839536943387,
|
|
"learning_rate": 3.6281179138322e-07,
|
|
"loss": 0.9424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4239775240421295,
|
|
"step": 5,
|
|
"valid_targets_mean": 3129.0,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 0.015885623510722795,
|
|
"grad_norm": 18.031331492852573,
|
|
"learning_rate": 8.163265306122449e-07,
|
|
"loss": 0.9247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4598994851112366,
|
|
"step": 10,
|
|
"valid_targets_mean": 3977.2,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 0.023828435266084195,
|
|
"grad_norm": 19.058217701860677,
|
|
"learning_rate": 1.26984126984127e-06,
|
|
"loss": 0.8698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48775196075439453,
|
|
"step": 15,
|
|
"valid_targets_mean": 5410.9,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 0.03177124702144559,
|
|
"grad_norm": 12.208471388372054,
|
|
"learning_rate": 1.723356009070295e-06,
|
|
"loss": 0.8659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3456057906150818,
|
|
"step": 20,
|
|
"valid_targets_mean": 2584.9,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 0.03971405877680699,
|
|
"grad_norm": 6.601801693684324,
|
|
"learning_rate": 2.17687074829932e-06,
|
|
"loss": 0.7898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33370092511177063,
|
|
"step": 25,
|
|
"valid_targets_mean": 4835.8,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 0.04765687053216839,
|
|
"grad_norm": 4.878347883548458,
|
|
"learning_rate": 2.6303854875283447e-06,
|
|
"loss": 0.8092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4149326980113983,
|
|
"step": 30,
|
|
"valid_targets_mean": 4071.9,
|
|
"valid_targets_min": 1731
|
|
},
|
|
{
|
|
"epoch": 0.05559968228752978,
|
|
"grad_norm": 2.4038946995328665,
|
|
"learning_rate": 3.08390022675737e-06,
|
|
"loss": 0.7858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3171205222606659,
|
|
"step": 35,
|
|
"valid_targets_mean": 4289.4,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 0.06354249404289118,
|
|
"grad_norm": 1.4519792639897695,
|
|
"learning_rate": 3.537414965986395e-06,
|
|
"loss": 0.6642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2357153296470642,
|
|
"step": 40,
|
|
"valid_targets_mean": 4063.1,
|
|
"valid_targets_min": 1523
|
|
},
|
|
{
|
|
"epoch": 0.07148530579825259,
|
|
"grad_norm": 1.5091068187851147,
|
|
"learning_rate": 3.99092970521542e-06,
|
|
"loss": 0.6961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.510128378868103,
|
|
"step": 45,
|
|
"valid_targets_mean": 6111.4,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 0.07942811755361398,
|
|
"grad_norm": 1.4453259285988391,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": 0.6755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4404022693634033,
|
|
"step": 50,
|
|
"valid_targets_mean": 3902.5,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 0.08737092930897537,
|
|
"grad_norm": 1.0175913637193847,
|
|
"learning_rate": 4.897959183673469e-06,
|
|
"loss": 0.6565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2261810451745987,
|
|
"step": 55,
|
|
"valid_targets_mean": 2665.4,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 0.09531374106433678,
|
|
"grad_norm": 0.9398288742227603,
|
|
"learning_rate": 5.3514739229024945e-06,
|
|
"loss": 0.6363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38304466009140015,
|
|
"step": 60,
|
|
"valid_targets_mean": 4811.5,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 0.10325655281969817,
|
|
"grad_norm": 0.8769037213232134,
|
|
"learning_rate": 5.80498866213152e-06,
|
|
"loss": 0.6527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212600350379944,
|
|
"step": 65,
|
|
"valid_targets_mean": 3743.1,
|
|
"valid_targets_min": 1857
|
|
},
|
|
{
|
|
"epoch": 0.11119936457505956,
|
|
"grad_norm": 0.8683053551792943,
|
|
"learning_rate": 6.258503401360545e-06,
|
|
"loss": 0.5966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2907598614692688,
|
|
"step": 70,
|
|
"valid_targets_mean": 3064.0,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 0.11914217633042097,
|
|
"grad_norm": 0.801902034863832,
|
|
"learning_rate": 6.71201814058957e-06,
|
|
"loss": 0.5969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3111416697502136,
|
|
"step": 75,
|
|
"valid_targets_mean": 3569.8,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 0.12708498808578236,
|
|
"grad_norm": 0.6874693490877023,
|
|
"learning_rate": 7.165532879818595e-06,
|
|
"loss": 0.5707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32024240493774414,
|
|
"step": 80,
|
|
"valid_targets_mean": 4723.2,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 0.13502779984114377,
|
|
"grad_norm": 0.8165161754258365,
|
|
"learning_rate": 7.61904761904762e-06,
|
|
"loss": 0.6225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32171016931533813,
|
|
"step": 85,
|
|
"valid_targets_mean": 2914.5,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 0.14297061159650518,
|
|
"grad_norm": 0.5719857671906913,
|
|
"learning_rate": 8.072562358276645e-06,
|
|
"loss": 0.5233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3289507329463959,
|
|
"step": 90,
|
|
"valid_targets_mean": 6027.5,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 0.15091342335186655,
|
|
"grad_norm": 0.5852370700473665,
|
|
"learning_rate": 8.52607709750567e-06,
|
|
"loss": 0.5401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717599868774414,
|
|
"step": 95,
|
|
"valid_targets_mean": 6088.8,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 0.15885623510722796,
|
|
"grad_norm": 0.726523588288475,
|
|
"learning_rate": 8.979591836734695e-06,
|
|
"loss": 0.5459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2042022943496704,
|
|
"step": 100,
|
|
"valid_targets_mean": 3331.1,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 0.16679904686258937,
|
|
"grad_norm": 0.6930023073890275,
|
|
"learning_rate": 9.43310657596372e-06,
|
|
"loss": 0.5354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30130016803741455,
|
|
"step": 105,
|
|
"valid_targets_mean": 4307.9,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 0.17474185861795075,
|
|
"grad_norm": 0.723302571074593,
|
|
"learning_rate": 9.886621315192746e-06,
|
|
"loss": 0.5352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22850549221038818,
|
|
"step": 110,
|
|
"valid_targets_mean": 2945.8,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 0.18268467037331215,
|
|
"grad_norm": 0.8881282876639128,
|
|
"learning_rate": 1.034013605442177e-05,
|
|
"loss": 0.5412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2877466678619385,
|
|
"step": 115,
|
|
"valid_targets_mean": 3769.9,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 0.19062748212867356,
|
|
"grad_norm": 0.7689021021624995,
|
|
"learning_rate": 1.0793650793650794e-05,
|
|
"loss": 0.5387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30142125487327576,
|
|
"step": 120,
|
|
"valid_targets_mean": 3052.6,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 0.19857029388403494,
|
|
"grad_norm": 0.8210613336610383,
|
|
"learning_rate": 1.124716553287982e-05,
|
|
"loss": 0.5463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3588235378265381,
|
|
"step": 125,
|
|
"valid_targets_mean": 6682.8,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 0.20651310563939634,
|
|
"grad_norm": 0.7487467890065992,
|
|
"learning_rate": 1.1700680272108845e-05,
|
|
"loss": 0.5379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263919472694397,
|
|
"step": 130,
|
|
"valid_targets_mean": 2884.8,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 0.21445591739475775,
|
|
"grad_norm": 0.7848493674732989,
|
|
"learning_rate": 1.215419501133787e-05,
|
|
"loss": 0.4798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28911158442497253,
|
|
"step": 135,
|
|
"valid_targets_mean": 4039.6,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 0.22239872915011913,
|
|
"grad_norm": 0.7581094907119672,
|
|
"learning_rate": 1.2607709750566895e-05,
|
|
"loss": 0.5336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21103256940841675,
|
|
"step": 140,
|
|
"valid_targets_mean": 3157.1,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 0.23034154090548054,
|
|
"grad_norm": 0.7500399116196574,
|
|
"learning_rate": 1.3061224489795918e-05,
|
|
"loss": 0.5153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27222275733947754,
|
|
"step": 145,
|
|
"valid_targets_mean": 3168.5,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 0.23828435266084194,
|
|
"grad_norm": 0.7181997386866019,
|
|
"learning_rate": 1.3514739229024945e-05,
|
|
"loss": 0.5223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24166783690452576,
|
|
"step": 150,
|
|
"valid_targets_mean": 3206.4,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 0.24622716441620335,
|
|
"grad_norm": 0.7678163847969501,
|
|
"learning_rate": 1.3968253968253968e-05,
|
|
"loss": 0.5212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3517448902130127,
|
|
"step": 155,
|
|
"valid_targets_mean": 4127.5,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 0.2541699761715647,
|
|
"grad_norm": 0.7942317681083627,
|
|
"learning_rate": 1.4421768707482994e-05,
|
|
"loss": 0.5005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3304540514945984,
|
|
"step": 160,
|
|
"valid_targets_mean": 4254.9,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 0.2621127879269261,
|
|
"grad_norm": 0.7939106591873948,
|
|
"learning_rate": 1.4875283446712018e-05,
|
|
"loss": 0.5201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26098117232322693,
|
|
"step": 165,
|
|
"valid_targets_mean": 3207.9,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 0.27005559968228754,
|
|
"grad_norm": 0.7108227548721311,
|
|
"learning_rate": 1.5328798185941044e-05,
|
|
"loss": 0.4941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33575257658958435,
|
|
"step": 170,
|
|
"valid_targets_mean": 5431.5,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 0.2779984114376489,
|
|
"grad_norm": 0.6543797078634814,
|
|
"learning_rate": 1.578231292517007e-05,
|
|
"loss": 0.4971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17337322235107422,
|
|
"step": 175,
|
|
"valid_targets_mean": 2671.1,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 0.28594122319301035,
|
|
"grad_norm": 0.5931833585139151,
|
|
"learning_rate": 1.6235827664399097e-05,
|
|
"loss": 0.4916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17989198863506317,
|
|
"step": 180,
|
|
"valid_targets_mean": 4090.4,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 0.29388403494837173,
|
|
"grad_norm": 0.645404061454077,
|
|
"learning_rate": 1.668934240362812e-05,
|
|
"loss": 0.4803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629571259021759,
|
|
"step": 185,
|
|
"valid_targets_mean": 4718.5,
|
|
"valid_targets_min": 1737
|
|
},
|
|
{
|
|
"epoch": 0.3018268467037331,
|
|
"grad_norm": 0.7560422766343962,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 0.4906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20798392593860626,
|
|
"step": 190,
|
|
"valid_targets_mean": 3290.1,
|
|
"valid_targets_min": 1478
|
|
},
|
|
{
|
|
"epoch": 0.30976965845909454,
|
|
"grad_norm": 0.8198246938935282,
|
|
"learning_rate": 1.759637188208617e-05,
|
|
"loss": 0.5118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21638375520706177,
|
|
"step": 195,
|
|
"valid_targets_mean": 2587.9,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 0.3177124702144559,
|
|
"grad_norm": 0.595446050621745,
|
|
"learning_rate": 1.8049886621315194e-05,
|
|
"loss": 0.4844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2102392166852951,
|
|
"step": 200,
|
|
"valid_targets_mean": 4375.9,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 0.3256552819698173,
|
|
"grad_norm": 0.6224904991446684,
|
|
"learning_rate": 1.8503401360544218e-05,
|
|
"loss": 0.4957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1138230413198471,
|
|
"step": 205,
|
|
"valid_targets_mean": 2274.1,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 0.33359809372517873,
|
|
"grad_norm": 0.7317111137256921,
|
|
"learning_rate": 1.8956916099773243e-05,
|
|
"loss": 0.5305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26143375039100647,
|
|
"step": 210,
|
|
"valid_targets_mean": 3561.1,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 0.3415409054805401,
|
|
"grad_norm": 0.6320013005327049,
|
|
"learning_rate": 1.941043083900227e-05,
|
|
"loss": 0.4558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822107970714569,
|
|
"step": 215,
|
|
"valid_targets_mean": 5738.0,
|
|
"valid_targets_min": 2781
|
|
},
|
|
{
|
|
"epoch": 0.3494837172359015,
|
|
"grad_norm": 0.7098401032777851,
|
|
"learning_rate": 1.9863945578231295e-05,
|
|
"loss": 0.4481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2187938094139099,
|
|
"step": 220,
|
|
"valid_targets_mean": 2924.2,
|
|
"valid_targets_min": 1858
|
|
},
|
|
{
|
|
"epoch": 0.3574265289912629,
|
|
"grad_norm": 0.8747764062086805,
|
|
"learning_rate": 2.031746031746032e-05,
|
|
"loss": 0.4921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866303622722626,
|
|
"step": 225,
|
|
"valid_targets_mean": 3105.2,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 0.3653693407466243,
|
|
"grad_norm": 0.6618968210978577,
|
|
"learning_rate": 2.0770975056689343e-05,
|
|
"loss": 0.4545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715775966644287,
|
|
"step": 230,
|
|
"valid_targets_mean": 4876.9,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 0.3733121525019857,
|
|
"grad_norm": 0.8745539591377555,
|
|
"learning_rate": 2.122448979591837e-05,
|
|
"loss": 0.4937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24022126197814941,
|
|
"step": 235,
|
|
"valid_targets_mean": 2440.0,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 0.3812549642573471,
|
|
"grad_norm": 1.0464331555936317,
|
|
"learning_rate": 2.1678004535147395e-05,
|
|
"loss": 0.4819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28444114327430725,
|
|
"step": 240,
|
|
"valid_targets_mean": 3890.6,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 0.3891977760127085,
|
|
"grad_norm": 0.5495845787018652,
|
|
"learning_rate": 2.213151927437642e-05,
|
|
"loss": 0.4663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19772498309612274,
|
|
"step": 245,
|
|
"valid_targets_mean": 4502.8,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 0.3971405877680699,
|
|
"grad_norm": 0.7089695661005441,
|
|
"learning_rate": 2.2585034013605444e-05,
|
|
"loss": 0.4223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19527561962604523,
|
|
"step": 250,
|
|
"valid_targets_mean": 4025.4,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 0.4050833995234313,
|
|
"grad_norm": 0.8748381704458628,
|
|
"learning_rate": 2.3038548752834472e-05,
|
|
"loss": 0.4683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2667570114135742,
|
|
"step": 255,
|
|
"valid_targets_mean": 2651.0,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 0.4130262112787927,
|
|
"grad_norm": 0.7838591820755033,
|
|
"learning_rate": 2.3492063492063496e-05,
|
|
"loss": 0.4549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19007517397403717,
|
|
"step": 260,
|
|
"valid_targets_mean": 3150.8,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 0.42096902303415407,
|
|
"grad_norm": 0.6652588645693622,
|
|
"learning_rate": 2.394557823129252e-05,
|
|
"loss": 0.4802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24852830171585083,
|
|
"step": 265,
|
|
"valid_targets_mean": 3741.9,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 0.4289118347895155,
|
|
"grad_norm": 0.6178216162818065,
|
|
"learning_rate": 2.439909297052154e-05,
|
|
"loss": 0.4765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32896026968955994,
|
|
"step": 270,
|
|
"valid_targets_mean": 6348.9,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 0.4368546465448769,
|
|
"grad_norm": 0.773659433180416,
|
|
"learning_rate": 2.4852607709750566e-05,
|
|
"loss": 0.474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31554922461509705,
|
|
"step": 275,
|
|
"valid_targets_mean": 3922.5,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 0.44479745830023826,
|
|
"grad_norm": 0.7714114491405641,
|
|
"learning_rate": 2.5306122448979597e-05,
|
|
"loss": 0.4515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24538660049438477,
|
|
"step": 280,
|
|
"valid_targets_mean": 3182.1,
|
|
"valid_targets_min": 1534
|
|
},
|
|
{
|
|
"epoch": 0.4527402700555997,
|
|
"grad_norm": 0.628268693011033,
|
|
"learning_rate": 2.5759637188208618e-05,
|
|
"loss": 0.4377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1170404776930809,
|
|
"step": 285,
|
|
"valid_targets_mean": 2256.4,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 0.46068308181096107,
|
|
"grad_norm": 0.6928897873984537,
|
|
"learning_rate": 2.6213151927437642e-05,
|
|
"loss": 0.4451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18939167261123657,
|
|
"step": 290,
|
|
"valid_targets_mean": 3506.6,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 0.4686258935663225,
|
|
"grad_norm": 0.6456168670095845,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.4433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2719925045967102,
|
|
"step": 295,
|
|
"valid_targets_mean": 5328.2,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 0.4765687053216839,
|
|
"grad_norm": 0.5885023635798841,
|
|
"learning_rate": 2.7120181405895694e-05,
|
|
"loss": 0.4492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16384145617485046,
|
|
"step": 300,
|
|
"valid_targets_mean": 4114.1,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 0.48451151707704526,
|
|
"grad_norm": 0.6946223462717845,
|
|
"learning_rate": 2.757369614512472e-05,
|
|
"loss": 0.4586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1989206075668335,
|
|
"step": 305,
|
|
"valid_targets_mean": 3628.1,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 0.4924543288324067,
|
|
"grad_norm": 0.7924439314597271,
|
|
"learning_rate": 2.8027210884353743e-05,
|
|
"loss": 0.4199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24060654640197754,
|
|
"step": 310,
|
|
"valid_targets_mean": 3894.8,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 0.5003971405877681,
|
|
"grad_norm": 0.6894130204288108,
|
|
"learning_rate": 2.8480725623582767e-05,
|
|
"loss": 0.4523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19084176421165466,
|
|
"step": 315,
|
|
"valid_targets_mean": 3388.4,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 0.5083399523431295,
|
|
"grad_norm": 0.6182330058174214,
|
|
"learning_rate": 2.893424036281179e-05,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1790224015712738,
|
|
"step": 320,
|
|
"valid_targets_mean": 3838.2,
|
|
"valid_targets_min": 1952
|
|
},
|
|
{
|
|
"epoch": 0.5162827640984908,
|
|
"grad_norm": 0.6692685644592893,
|
|
"learning_rate": 2.938775510204082e-05,
|
|
"loss": 0.453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815963625907898,
|
|
"step": 325,
|
|
"valid_targets_mean": 4856.5,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 0.5242255758538522,
|
|
"grad_norm": 0.7281808220702274,
|
|
"learning_rate": 2.9841269841269844e-05,
|
|
"loss": 0.446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2099233865737915,
|
|
"step": 330,
|
|
"valid_targets_mean": 2939.2,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 0.5321683876092137,
|
|
"grad_norm": 0.7307523992662687,
|
|
"learning_rate": 3.0294784580498868e-05,
|
|
"loss": 0.4166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21184232831001282,
|
|
"step": 335,
|
|
"valid_targets_mean": 3696.9,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 0.5401111993645751,
|
|
"grad_norm": 0.6324496910956222,
|
|
"learning_rate": 3.074829931972789e-05,
|
|
"loss": 0.4511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1693590134382248,
|
|
"step": 340,
|
|
"valid_targets_mean": 4246.6,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 0.5480540111199365,
|
|
"grad_norm": 0.7276746606270877,
|
|
"learning_rate": 3.1201814058956924e-05,
|
|
"loss": 0.433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3968878984451294,
|
|
"step": 345,
|
|
"valid_targets_mean": 7364.2,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 0.5559968228752978,
|
|
"grad_norm": 0.7167678694766842,
|
|
"learning_rate": 3.1655328798185945e-05,
|
|
"loss": 0.4544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1543784886598587,
|
|
"step": 350,
|
|
"valid_targets_mean": 3295.4,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 0.5639396346306592,
|
|
"grad_norm": 0.9162178049144889,
|
|
"learning_rate": 3.2108843537414965e-05,
|
|
"loss": 0.4609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23491612076759338,
|
|
"step": 355,
|
|
"valid_targets_mean": 3100.6,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 0.5718824463860207,
|
|
"grad_norm": 0.9104142287756266,
|
|
"learning_rate": 3.256235827664399e-05,
|
|
"loss": 0.4213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2722020149230957,
|
|
"step": 360,
|
|
"valid_targets_mean": 2748.6,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 0.5798252581413821,
|
|
"grad_norm": 0.7039183501027523,
|
|
"learning_rate": 3.3015873015873014e-05,
|
|
"loss": 0.4351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23884740471839905,
|
|
"step": 365,
|
|
"valid_targets_mean": 5024.6,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 0.5877680698967435,
|
|
"grad_norm": 0.6157781834064768,
|
|
"learning_rate": 3.346938775510204e-05,
|
|
"loss": 0.4309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2479819357395172,
|
|
"step": 370,
|
|
"valid_targets_mean": 5908.1,
|
|
"valid_targets_min": 1764
|
|
},
|
|
{
|
|
"epoch": 0.5957108816521048,
|
|
"grad_norm": 0.6767049300821247,
|
|
"learning_rate": 3.392290249433107e-05,
|
|
"loss": 0.4582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17496052384376526,
|
|
"step": 375,
|
|
"valid_targets_mean": 3295.9,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 0.6036536934074662,
|
|
"grad_norm": 0.7110437170706237,
|
|
"learning_rate": 3.437641723356009e-05,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2034318745136261,
|
|
"step": 380,
|
|
"valid_targets_mean": 5350.4,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 0.6115965051628276,
|
|
"grad_norm": 0.6599676328490429,
|
|
"learning_rate": 3.482993197278912e-05,
|
|
"loss": 0.4071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17227444052696228,
|
|
"step": 385,
|
|
"valid_targets_mean": 3946.8,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 0.6195393169181891,
|
|
"grad_norm": 0.6208145903676437,
|
|
"learning_rate": 3.5283446712018146e-05,
|
|
"loss": 0.4249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2087390273809433,
|
|
"step": 390,
|
|
"valid_targets_mean": 4787.0,
|
|
"valid_targets_min": 2263
|
|
},
|
|
{
|
|
"epoch": 0.6274821286735505,
|
|
"grad_norm": 0.793276196872295,
|
|
"learning_rate": 3.573696145124717e-05,
|
|
"loss": 0.4339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24527285993099213,
|
|
"step": 395,
|
|
"valid_targets_mean": 3381.0,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 0.6354249404289118,
|
|
"grad_norm": 0.6917657653660046,
|
|
"learning_rate": 3.6190476190476195e-05,
|
|
"loss": 0.4382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2250712662935257,
|
|
"step": 400,
|
|
"valid_targets_mean": 4008.0,
|
|
"valid_targets_min": 2209
|
|
},
|
|
{
|
|
"epoch": 0.6433677521842732,
|
|
"grad_norm": 0.6906712995315898,
|
|
"learning_rate": 3.6643990929705216e-05,
|
|
"loss": 0.4003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.196695938706398,
|
|
"step": 405,
|
|
"valid_targets_mean": 4711.5,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 0.6513105639396346,
|
|
"grad_norm": 0.8390576949488375,
|
|
"learning_rate": 3.7097505668934243e-05,
|
|
"loss": 0.4111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19600464403629303,
|
|
"step": 410,
|
|
"valid_targets_mean": 2610.9,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 0.659253375694996,
|
|
"grad_norm": 0.780045630069556,
|
|
"learning_rate": 3.755102040816327e-05,
|
|
"loss": 0.4333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23739062249660492,
|
|
"step": 415,
|
|
"valid_targets_mean": 3535.1,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 0.6671961874503575,
|
|
"grad_norm": 0.6778938227483191,
|
|
"learning_rate": 3.800453514739229e-05,
|
|
"loss": 0.4321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1786542534828186,
|
|
"step": 420,
|
|
"valid_targets_mean": 3915.8,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 0.6751389992057188,
|
|
"grad_norm": 0.7225825124296799,
|
|
"learning_rate": 3.845804988662132e-05,
|
|
"loss": 0.4218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29683467745780945,
|
|
"step": 425,
|
|
"valid_targets_mean": 5180.6,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 0.6830818109610802,
|
|
"grad_norm": 0.6915873876780335,
|
|
"learning_rate": 3.891156462585034e-05,
|
|
"loss": 0.4243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2797970771789551,
|
|
"step": 430,
|
|
"valid_targets_mean": 4947.5,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 0.6910246227164416,
|
|
"grad_norm": 0.7850742574648016,
|
|
"learning_rate": 3.936507936507937e-05,
|
|
"loss": 0.4152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25987759232521057,
|
|
"step": 435,
|
|
"valid_targets_mean": 3789.6,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 0.698967434471803,
|
|
"grad_norm": 0.884759489174661,
|
|
"learning_rate": 3.9818594104308396e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22449225187301636,
|
|
"step": 440,
|
|
"valid_targets_mean": 2535.1,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 0.7069102462271644,
|
|
"grad_norm": 0.6537546595806286,
|
|
"learning_rate": 3.999994361288785e-05,
|
|
"loss": 0.4495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2553768754005432,
|
|
"step": 445,
|
|
"valid_targets_mean": 4750.8,
|
|
"valid_targets_min": 2086
|
|
},
|
|
{
|
|
"epoch": 0.7148530579825259,
|
|
"grad_norm": 0.7249177545913884,
|
|
"learning_rate": 3.9999599026131644e-05,
|
|
"loss": 0.4181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21963393688201904,
|
|
"step": 450,
|
|
"valid_targets_mean": 3955.5,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 0.7227958697378872,
|
|
"grad_norm": 0.7433142335065659,
|
|
"learning_rate": 3.999894118418342e-05,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1622614562511444,
|
|
"step": 455,
|
|
"valid_targets_mean": 2351.6,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 0.7307386814932486,
|
|
"grad_norm": 0.7965403375033488,
|
|
"learning_rate": 3.999797009734697e-05,
|
|
"loss": 0.4358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20540842413902283,
|
|
"step": 460,
|
|
"valid_targets_mean": 2787.9,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 0.73868149324861,
|
|
"grad_norm": 0.7048577120871824,
|
|
"learning_rate": 3.999668578083253e-05,
|
|
"loss": 0.4178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20939108729362488,
|
|
"step": 465,
|
|
"valid_targets_mean": 3625.5,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 0.7466243050039714,
|
|
"grad_norm": 0.7910627464488521,
|
|
"learning_rate": 3.9995088254756434e-05,
|
|
"loss": 0.4179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17419838905334473,
|
|
"step": 470,
|
|
"valid_targets_mean": 2084.9,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 0.7545671167593329,
|
|
"grad_norm": 0.8048863918776025,
|
|
"learning_rate": 3.999317754414084e-05,
|
|
"loss": 0.4175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868063449859619,
|
|
"step": 475,
|
|
"valid_targets_mean": 3651.4,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 0.7625099285146942,
|
|
"grad_norm": 0.6148380050135807,
|
|
"learning_rate": 3.999095367891337e-05,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14060398936271667,
|
|
"step": 480,
|
|
"valid_targets_mean": 3484.0,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 0.7704527402700556,
|
|
"grad_norm": 0.9061015074633573,
|
|
"learning_rate": 3.9988416693906563e-05,
|
|
"loss": 0.4202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20532460510730743,
|
|
"step": 485,
|
|
"valid_targets_mean": 3235.5,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 0.778395552025417,
|
|
"grad_norm": 0.5907642425231314,
|
|
"learning_rate": 3.9985566628857425e-05,
|
|
"loss": 0.4156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18049964308738708,
|
|
"step": 490,
|
|
"valid_targets_mean": 4417.5,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 0.7863383637807784,
|
|
"grad_norm": 0.7203217268866515,
|
|
"learning_rate": 3.998240352840672e-05,
|
|
"loss": 0.4074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2723429203033447,
|
|
"step": 495,
|
|
"valid_targets_mean": 5176.4,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 0.7942811755361397,
|
|
"grad_norm": 0.6673647510177279,
|
|
"learning_rate": 3.997892744209833e-05,
|
|
"loss": 0.4108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1939367651939392,
|
|
"step": 500,
|
|
"valid_targets_mean": 3500.8,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 0.8022239872915012,
|
|
"grad_norm": 0.7978326895047658,
|
|
"learning_rate": 3.997513842437845e-05,
|
|
"loss": 0.4185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20836496353149414,
|
|
"step": 505,
|
|
"valid_targets_mean": 3163.5,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 0.8101667990468626,
|
|
"grad_norm": 0.8456731767521818,
|
|
"learning_rate": 3.997103653459475e-05,
|
|
"loss": 0.3955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.146197110414505,
|
|
"step": 510,
|
|
"valid_targets_mean": 2262.0,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 0.818109610802224,
|
|
"grad_norm": 0.5940517142504953,
|
|
"learning_rate": 3.996662183699541e-05,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21038401126861572,
|
|
"step": 515,
|
|
"valid_targets_mean": 4221.8,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 0.8260524225575854,
|
|
"grad_norm": 0.9219548662045634,
|
|
"learning_rate": 3.996189440072818e-05,
|
|
"loss": 0.4037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20819224417209625,
|
|
"step": 520,
|
|
"valid_targets_mean": 5295.8,
|
|
"valid_targets_min": 1792
|
|
},
|
|
{
|
|
"epoch": 0.8339952343129468,
|
|
"grad_norm": 0.7211513007371394,
|
|
"learning_rate": 3.9956854299839246e-05,
|
|
"loss": 0.4193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18311676383018494,
|
|
"step": 525,
|
|
"valid_targets_mean": 3641.9,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 0.8419380460683081,
|
|
"grad_norm": 0.8075739583715514,
|
|
"learning_rate": 3.9951501613272076e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18437093496322632,
|
|
"step": 530,
|
|
"valid_targets_mean": 2647.4,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 0.8498808578236696,
|
|
"grad_norm": 0.6427987357888786,
|
|
"learning_rate": 3.994583642486618e-05,
|
|
"loss": 0.4062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27264687418937683,
|
|
"step": 535,
|
|
"valid_targets_mean": 6443.0,
|
|
"valid_targets_min": 1643
|
|
},
|
|
{
|
|
"epoch": 0.857823669579031,
|
|
"grad_norm": 0.6657942579672185,
|
|
"learning_rate": 3.993985882335584e-05,
|
|
"loss": 0.4155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13556276261806488,
|
|
"step": 540,
|
|
"valid_targets_mean": 2404.2,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 0.8657664813343924,
|
|
"grad_norm": 0.6199819553320746,
|
|
"learning_rate": 3.993356890236866e-05,
|
|
"loss": 0.4138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20995840430259705,
|
|
"step": 545,
|
|
"valid_targets_mean": 5180.8,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 0.8737092930897538,
|
|
"grad_norm": 0.6929690004455854,
|
|
"learning_rate": 3.992696676042414e-05,
|
|
"loss": 0.4095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1625194251537323,
|
|
"step": 550,
|
|
"valid_targets_mean": 3864.9,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 0.8816521048451151,
|
|
"grad_norm": 0.6389312494133765,
|
|
"learning_rate": 3.992005250093211e-05,
|
|
"loss": 0.4406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20096394419670105,
|
|
"step": 555,
|
|
"valid_targets_mean": 3627.8,
|
|
"valid_targets_min": 1970
|
|
},
|
|
{
|
|
"epoch": 0.8895949166004765,
|
|
"grad_norm": 0.7091610816015569,
|
|
"learning_rate": 3.991282623219113e-05,
|
|
"loss": 0.3658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17697596549987793,
|
|
"step": 560,
|
|
"valid_targets_mean": 3188.5,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 0.897537728355838,
|
|
"grad_norm": 0.6965415102600724,
|
|
"learning_rate": 3.9905288067386776e-05,
|
|
"loss": 0.4058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19609087705612183,
|
|
"step": 565,
|
|
"valid_targets_mean": 3570.1,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 0.9054805401111994,
|
|
"grad_norm": 0.7019238303008924,
|
|
"learning_rate": 3.989743812458987e-05,
|
|
"loss": 0.4049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10176820307970047,
|
|
"step": 570,
|
|
"valid_targets_mean": 2146.9,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 0.9134233518665608,
|
|
"grad_norm": 0.8014883925615424,
|
|
"learning_rate": 3.9889276526754664e-05,
|
|
"loss": 0.3908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2539423406124115,
|
|
"step": 575,
|
|
"valid_targets_mean": 4461.6,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 0.9213661636219221,
|
|
"grad_norm": 0.7655786217732675,
|
|
"learning_rate": 3.988080340171685e-05,
|
|
"loss": 0.3974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2201818823814392,
|
|
"step": 580,
|
|
"valid_targets_mean": 3568.0,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 0.9293089753772835,
|
|
"grad_norm": 0.6341471064662558,
|
|
"learning_rate": 3.987201888219161e-05,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1557033658027649,
|
|
"step": 585,
|
|
"valid_targets_mean": 3253.9,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 0.937251787132645,
|
|
"grad_norm": 0.8146148993169845,
|
|
"learning_rate": 3.986292310577153e-05,
|
|
"loss": 0.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21185220777988434,
|
|
"step": 590,
|
|
"valid_targets_mean": 3847.4,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 0.9451945988880064,
|
|
"grad_norm": 0.9311031392483279,
|
|
"learning_rate": 3.9853516214924416e-05,
|
|
"loss": 0.4177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22197425365447998,
|
|
"step": 595,
|
|
"valid_targets_mean": 2172.2,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 0.9531374106433678,
|
|
"grad_norm": 0.8165640419219957,
|
|
"learning_rate": 3.9843798356991096e-05,
|
|
"loss": 0.3753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19587013125419617,
|
|
"step": 600,
|
|
"valid_targets_mean": 2491.4,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 0.9610802223987291,
|
|
"grad_norm": 0.5298679809571485,
|
|
"learning_rate": 3.9833769684183104e-05,
|
|
"loss": 0.3873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17645713686943054,
|
|
"step": 605,
|
|
"valid_targets_mean": 5622.4,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 0.9690230341540905,
|
|
"grad_norm": 0.620968742897941,
|
|
"learning_rate": 3.982343035358026e-05,
|
|
"loss": 0.423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23084129393100739,
|
|
"step": 610,
|
|
"valid_targets_mean": 3864.9,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 0.9769658459094519,
|
|
"grad_norm": 0.6247139533864813,
|
|
"learning_rate": 3.981278052712827e-05,
|
|
"loss": 0.3834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18587160110473633,
|
|
"step": 615,
|
|
"valid_targets_mean": 4687.6,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 0.9849086576648134,
|
|
"grad_norm": 0.6897680671596705,
|
|
"learning_rate": 3.9801820371636157e-05,
|
|
"loss": 0.414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21354234218597412,
|
|
"step": 620,
|
|
"valid_targets_mean": 3890.6,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 0.9928514694201748,
|
|
"grad_norm": 0.6421989042946505,
|
|
"learning_rate": 3.979055005877364e-05,
|
|
"loss": 0.3866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2025032639503479,
|
|
"step": 625,
|
|
"valid_targets_mean": 4244.0,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.8477847731928947,
|
|
"learning_rate": 3.977896976506845e-05,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41692882776260376,
|
|
"step": 630,
|
|
"valid_targets_mean": 5076.0,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 1.0079428117553615,
|
|
"grad_norm": 0.661566518673142,
|
|
"learning_rate": 3.976707967190358e-05,
|
|
"loss": 0.407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12952452898025513,
|
|
"step": 635,
|
|
"valid_targets_mean": 3413.2,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 1.0158856235107228,
|
|
"grad_norm": 0.6929809383011546,
|
|
"learning_rate": 3.9754879965514456e-05,
|
|
"loss": 0.3744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17370137572288513,
|
|
"step": 640,
|
|
"valid_targets_mean": 3278.9,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 1.0238284352660842,
|
|
"grad_norm": 1.2311295600372945,
|
|
"learning_rate": 3.9742370836985956e-05,
|
|
"loss": 0.3961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2153688669204712,
|
|
"step": 645,
|
|
"valid_targets_mean": 3091.6,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 1.0317712470214455,
|
|
"grad_norm": 0.6895474030337402,
|
|
"learning_rate": 3.972955248224949e-05,
|
|
"loss": 0.3873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.139690101146698,
|
|
"step": 650,
|
|
"valid_targets_mean": 2687.9,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 1.039714058776807,
|
|
"grad_norm": 0.6544190828238339,
|
|
"learning_rate": 3.971642510207989e-05,
|
|
"loss": 0.3584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18974576890468597,
|
|
"step": 655,
|
|
"valid_targets_mean": 4095.8,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 1.0476568705321685,
|
|
"grad_norm": 0.7096380131585008,
|
|
"learning_rate": 3.9702988902092274e-05,
|
|
"loss": 0.3816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11100424081087112,
|
|
"step": 660,
|
|
"valid_targets_mean": 1966.2,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 1.0555996822875298,
|
|
"grad_norm": 0.5698890491251554,
|
|
"learning_rate": 3.968924409273884e-05,
|
|
"loss": 0.3573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16953882575035095,
|
|
"step": 665,
|
|
"valid_targets_mean": 5492.6,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 1.0635424940428913,
|
|
"grad_norm": 0.6707789718624412,
|
|
"learning_rate": 3.9675190889305545e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19646087288856506,
|
|
"step": 670,
|
|
"valid_targets_mean": 4259.0,
|
|
"valid_targets_min": 2162
|
|
},
|
|
{
|
|
"epoch": 1.0714853057982525,
|
|
"grad_norm": 0.7110397264879814,
|
|
"learning_rate": 3.966082951190874e-05,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20094986259937286,
|
|
"step": 675,
|
|
"valid_targets_mean": 3985.1,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 1.079428117553614,
|
|
"grad_norm": 0.7107252626008872,
|
|
"learning_rate": 3.9646160185491756e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20733362436294556,
|
|
"step": 680,
|
|
"valid_targets_mean": 3251.6,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 1.0873709293089753,
|
|
"grad_norm": 0.670634696547269,
|
|
"learning_rate": 3.963118313982131e-05,
|
|
"loss": 0.3779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15481668710708618,
|
|
"step": 685,
|
|
"valid_targets_mean": 3278.0,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 1.0953137410643368,
|
|
"grad_norm": 0.7026033504571114,
|
|
"learning_rate": 3.961589860948399e-05,
|
|
"loss": 0.3877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713404893875122,
|
|
"step": 690,
|
|
"valid_targets_mean": 4801.2,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 1.1032565528196983,
|
|
"grad_norm": 0.7272977847233454,
|
|
"learning_rate": 3.960030683388251e-05,
|
|
"loss": 0.4103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17259646952152252,
|
|
"step": 695,
|
|
"valid_targets_mean": 3893.1,
|
|
"valid_targets_min": 1994
|
|
},
|
|
{
|
|
"epoch": 1.1111993645750595,
|
|
"grad_norm": 0.7198365284196062,
|
|
"learning_rate": 3.9584408057232e-05,
|
|
"loss": 0.3725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23098380863666534,
|
|
"step": 700,
|
|
"valid_targets_mean": 3467.6,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 1.119142176330421,
|
|
"grad_norm": 0.8708510713008433,
|
|
"learning_rate": 3.956820252855618e-05,
|
|
"loss": 0.3725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17111672461032867,
|
|
"step": 705,
|
|
"valid_targets_mean": 2625.8,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 1.1270849880857823,
|
|
"grad_norm": 0.6506593622245677,
|
|
"learning_rate": 3.955169050168343e-05,
|
|
"loss": 0.3718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15990178287029266,
|
|
"step": 710,
|
|
"valid_targets_mean": 2555.2,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 1.1350277998411438,
|
|
"grad_norm": 0.6052209088151864,
|
|
"learning_rate": 3.953487223524283e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16166146099567413,
|
|
"step": 715,
|
|
"valid_targets_mean": 4034.1,
|
|
"valid_targets_min": 2053
|
|
},
|
|
{
|
|
"epoch": 1.1429706115965053,
|
|
"grad_norm": 0.6115704828138339,
|
|
"learning_rate": 3.951774799266014e-05,
|
|
"loss": 0.3644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15948382019996643,
|
|
"step": 720,
|
|
"valid_targets_mean": 3279.2,
|
|
"valid_targets_min": 1667
|
|
},
|
|
{
|
|
"epoch": 1.1509134233518665,
|
|
"grad_norm": 0.8000394429323502,
|
|
"learning_rate": 3.950031804215364e-05,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21206721663475037,
|
|
"step": 725,
|
|
"valid_targets_mean": 2899.6,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 1.158856235107228,
|
|
"grad_norm": 0.8369079105766967,
|
|
"learning_rate": 3.948258265672991e-05,
|
|
"loss": 0.3918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1932494044303894,
|
|
"step": 730,
|
|
"valid_targets_mean": 2779.5,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 1.1667990468625893,
|
|
"grad_norm": 0.741632603009136,
|
|
"learning_rate": 3.946454211417961e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1464959681034088,
|
|
"step": 735,
|
|
"valid_targets_mean": 2544.9,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 1.1747418586179508,
|
|
"grad_norm": 0.5706507428301105,
|
|
"learning_rate": 3.944619669707309e-05,
|
|
"loss": 0.3556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19921299815177917,
|
|
"step": 740,
|
|
"valid_targets_mean": 5769.1,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 1.182684670373312,
|
|
"grad_norm": 0.6200662319363982,
|
|
"learning_rate": 3.9427546692755946e-05,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15817837417125702,
|
|
"step": 745,
|
|
"valid_targets_mean": 3948.1,
|
|
"valid_targets_min": 2305
|
|
},
|
|
{
|
|
"epoch": 1.1906274821286735,
|
|
"grad_norm": 0.6918703594746533,
|
|
"learning_rate": 3.9408592393344596e-05,
|
|
"loss": 0.3456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2157401293516159,
|
|
"step": 750,
|
|
"valid_targets_mean": 5014.1,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 1.198570293884035,
|
|
"grad_norm": 0.9309096987109412,
|
|
"learning_rate": 3.9389334095721606e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19136428833007812,
|
|
"step": 755,
|
|
"valid_targets_mean": 3785.0,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 1.2065131056393963,
|
|
"grad_norm": 0.7027586894167684,
|
|
"learning_rate": 3.936977210153113e-05,
|
|
"loss": 0.3662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638143301010132,
|
|
"step": 760,
|
|
"valid_targets_mean": 3838.4,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 1.2144559173947578,
|
|
"grad_norm": 0.8377585311746043,
|
|
"learning_rate": 3.93499067171741e-05,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21581588685512543,
|
|
"step": 765,
|
|
"valid_targets_mean": 3522.2,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 1.222398729150119,
|
|
"grad_norm": 0.6326294484578919,
|
|
"learning_rate": 3.932973825380351e-05,
|
|
"loss": 0.3886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22675026953220367,
|
|
"step": 770,
|
|
"valid_targets_mean": 5868.8,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 1.2303415409054805,
|
|
"grad_norm": 0.6226504876004453,
|
|
"learning_rate": 3.9309267027319485e-05,
|
|
"loss": 0.3482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893569827079773,
|
|
"step": 775,
|
|
"valid_targets_mean": 4600.5,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 1.238284352660842,
|
|
"grad_norm": 0.6235394462340826,
|
|
"learning_rate": 3.928849335836435e-05,
|
|
"loss": 0.3663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1813928782939911,
|
|
"step": 780,
|
|
"valid_targets_mean": 3877.6,
|
|
"valid_targets_min": 2375
|
|
},
|
|
{
|
|
"epoch": 1.2462271644162033,
|
|
"grad_norm": 0.7535096803836269,
|
|
"learning_rate": 3.926741757231761e-05,
|
|
"loss": 0.3767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2331167757511139,
|
|
"step": 785,
|
|
"valid_targets_mean": 5097.6,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 1.2541699761715648,
|
|
"grad_norm": 0.662731552542052,
|
|
"learning_rate": 3.924603999929086e-05,
|
|
"loss": 0.3701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19351565837860107,
|
|
"step": 790,
|
|
"valid_targets_mean": 4239.5,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 1.262112787926926,
|
|
"grad_norm": 0.8346393835940104,
|
|
"learning_rate": 3.9224360974122584e-05,
|
|
"loss": 0.361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19002623856067657,
|
|
"step": 795,
|
|
"valid_targets_mean": 2821.1,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 1.2700555996822875,
|
|
"grad_norm": 0.4618864405784487,
|
|
"learning_rate": 3.920238083637297e-05,
|
|
"loss": 0.3635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11394622921943665,
|
|
"step": 800,
|
|
"valid_targets_mean": 2866.1,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 1.277998411437649,
|
|
"grad_norm": 0.6557746360255144,
|
|
"learning_rate": 3.9180099930318524e-05,
|
|
"loss": 0.3508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23234955966472626,
|
|
"step": 805,
|
|
"valid_targets_mean": 4667.0,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 1.2859412231930103,
|
|
"grad_norm": 0.650086062678356,
|
|
"learning_rate": 3.915751860494672e-05,
|
|
"loss": 0.3613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20403829216957092,
|
|
"step": 810,
|
|
"valid_targets_mean": 3960.8,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 1.2938840349483718,
|
|
"grad_norm": 0.7212977427474128,
|
|
"learning_rate": 3.913463721395051e-05,
|
|
"loss": 0.4049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20112933218479156,
|
|
"step": 815,
|
|
"valid_targets_mean": 4157.2,
|
|
"valid_targets_min": 1815
|
|
},
|
|
{
|
|
"epoch": 1.301826846703733,
|
|
"grad_norm": 0.5450911212539763,
|
|
"learning_rate": 3.911145611572282e-05,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22640587389469147,
|
|
"step": 820,
|
|
"valid_targets_mean": 6111.2,
|
|
"valid_targets_min": 2251
|
|
},
|
|
{
|
|
"epoch": 1.3097696584590945,
|
|
"grad_norm": 0.540082784651201,
|
|
"learning_rate": 3.908797567335089e-05,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13462777435779572,
|
|
"step": 825,
|
|
"valid_targets_mean": 3390.0,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 1.317712470214456,
|
|
"grad_norm": 0.6894580916306856,
|
|
"learning_rate": 3.906419625461062e-05,
|
|
"loss": 0.3739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19432513415813446,
|
|
"step": 830,
|
|
"valid_targets_mean": 4012.8,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 1.3256552819698173,
|
|
"grad_norm": 0.553001244973504,
|
|
"learning_rate": 3.90401182319608e-05,
|
|
"loss": 0.3716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16354134678840637,
|
|
"step": 835,
|
|
"valid_targets_mean": 3907.6,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 1.3335980937251788,
|
|
"grad_norm": 0.7965707131002338,
|
|
"learning_rate": 3.9015741982537265e-05,
|
|
"loss": 0.3637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24085572361946106,
|
|
"step": 840,
|
|
"valid_targets_mean": 4185.0,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 1.34154090548054,
|
|
"grad_norm": 0.5080372288254641,
|
|
"learning_rate": 3.899106788814701e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14088109135627747,
|
|
"step": 845,
|
|
"valid_targets_mean": 4589.4,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 1.3494837172359015,
|
|
"grad_norm": 0.7515033763751663,
|
|
"learning_rate": 3.896609633526219e-05,
|
|
"loss": 0.3709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15434691309928894,
|
|
"step": 850,
|
|
"valid_targets_mean": 2515.4,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 1.357426528991263,
|
|
"grad_norm": 0.739817765020064,
|
|
"learning_rate": 3.894082771501407e-05,
|
|
"loss": 0.3724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20573550462722778,
|
|
"step": 855,
|
|
"valid_targets_mean": 3992.4,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 1.3653693407466243,
|
|
"grad_norm": 0.5675976625718987,
|
|
"learning_rate": 3.891526242318692e-05,
|
|
"loss": 0.3945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665615677833557,
|
|
"step": 860,
|
|
"valid_targets_mean": 6979.1,
|
|
"valid_targets_min": 1897
|
|
},
|
|
{
|
|
"epoch": 1.3733121525019856,
|
|
"grad_norm": 0.6431088696489751,
|
|
"learning_rate": 3.8889400860211785e-05,
|
|
"loss": 0.3551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28015702962875366,
|
|
"step": 865,
|
|
"valid_targets_mean": 6430.5,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 1.381254964257347,
|
|
"grad_norm": 0.6477689386332175,
|
|
"learning_rate": 3.886324343116023e-05,
|
|
"loss": 0.3679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20045560598373413,
|
|
"step": 870,
|
|
"valid_targets_mean": 4748.0,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 1.3891977760127086,
|
|
"grad_norm": 0.6311273033433231,
|
|
"learning_rate": 3.883679054573799e-05,
|
|
"loss": 0.3767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13051976263523102,
|
|
"step": 875,
|
|
"valid_targets_mean": 2956.1,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 1.3971405877680698,
|
|
"grad_norm": 0.5331577978247257,
|
|
"learning_rate": 3.881004261827856e-05,
|
|
"loss": 0.3556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19640755653381348,
|
|
"step": 880,
|
|
"valid_targets_mean": 5918.6,
|
|
"valid_targets_min": 2562
|
|
},
|
|
{
|
|
"epoch": 1.4050833995234313,
|
|
"grad_norm": 0.5923390291087418,
|
|
"learning_rate": 3.878300006773669e-05,
|
|
"loss": 0.3846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11862044781446457,
|
|
"step": 885,
|
|
"valid_targets_mean": 2625.4,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 1.4130262112787926,
|
|
"grad_norm": 0.6898951235844368,
|
|
"learning_rate": 3.875566331768184e-05,
|
|
"loss": 0.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18505030870437622,
|
|
"step": 890,
|
|
"valid_targets_mean": 3439.4,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 1.420969023034154,
|
|
"grad_norm": 0.5915321808074748,
|
|
"learning_rate": 3.872803279629155e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1515580415725708,
|
|
"step": 895,
|
|
"valid_targets_mean": 4368.2,
|
|
"valid_targets_min": 1797
|
|
},
|
|
{
|
|
"epoch": 1.4289118347895156,
|
|
"grad_norm": 0.7440593698106558,
|
|
"learning_rate": 3.8700108936344705e-05,
|
|
"loss": 0.3819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23959174752235413,
|
|
"step": 900,
|
|
"valid_targets_mean": 3901.9,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 1.4368546465448768,
|
|
"grad_norm": 0.6132836133761006,
|
|
"learning_rate": 3.867189217521477e-05,
|
|
"loss": 0.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17579707503318787,
|
|
"step": 905,
|
|
"valid_targets_mean": 3806.8,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 1.4447974583002383,
|
|
"grad_norm": 0.6886869247649108,
|
|
"learning_rate": 3.864338295486297e-05,
|
|
"loss": 0.3735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16529691219329834,
|
|
"step": 910,
|
|
"valid_targets_mean": 3045.6,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 1.4527402700555996,
|
|
"grad_norm": 0.6803926511989434,
|
|
"learning_rate": 3.8614581721831316e-05,
|
|
"loss": 0.4077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1687852442264557,
|
|
"step": 915,
|
|
"valid_targets_mean": 2925.0,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 1.460683081810961,
|
|
"grad_norm": 1.158602258161799,
|
|
"learning_rate": 3.858548892723563e-05,
|
|
"loss": 0.3802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2452494502067566,
|
|
"step": 920,
|
|
"valid_targets_mean": 6068.4,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 1.4686258935663226,
|
|
"grad_norm": 2.919967001523072,
|
|
"learning_rate": 3.855610502675851e-05,
|
|
"loss": 0.4077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17100700736045837,
|
|
"step": 925,
|
|
"valid_targets_mean": 3988.8,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 1.4765687053216838,
|
|
"grad_norm": 0.6390192023170168,
|
|
"learning_rate": 3.852643048064215e-05,
|
|
"loss": 0.3592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1362331360578537,
|
|
"step": 930,
|
|
"valid_targets_mean": 3353.8,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 1.4845115170770453,
|
|
"grad_norm": 0.5716598826583745,
|
|
"learning_rate": 3.8496465753681145e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13138733804225922,
|
|
"step": 935,
|
|
"valid_targets_mean": 3740.8,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 1.4924543288324066,
|
|
"grad_norm": 0.5581871731302529,
|
|
"learning_rate": 3.846621131521522e-05,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17948713898658752,
|
|
"step": 940,
|
|
"valid_targets_mean": 4578.6,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 1.500397140587768,
|
|
"grad_norm": 0.7393836905336594,
|
|
"learning_rate": 3.843566763912187e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17798201739788055,
|
|
"step": 945,
|
|
"valid_targets_mean": 2874.9,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 1.5083399523431296,
|
|
"grad_norm": 0.680020838927224,
|
|
"learning_rate": 3.840483520380896e-05,
|
|
"loss": 0.3713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14968064427375793,
|
|
"step": 950,
|
|
"valid_targets_mean": 3160.2,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 1.5162827640984908,
|
|
"grad_norm": 0.648431270218946,
|
|
"learning_rate": 3.837371449220717e-05,
|
|
"loss": 0.3539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12272893637418747,
|
|
"step": 955,
|
|
"valid_targets_mean": 2917.0,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 1.524225575853852,
|
|
"grad_norm": 0.5605076493624305,
|
|
"learning_rate": 3.834230599176251e-05,
|
|
"loss": 0.3461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15232917666435242,
|
|
"step": 960,
|
|
"valid_targets_mean": 2880.4,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 1.5321683876092136,
|
|
"grad_norm": 0.6347040648856374,
|
|
"learning_rate": 3.831061019442864e-05,
|
|
"loss": 0.3374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1705571413040161,
|
|
"step": 965,
|
|
"valid_targets_mean": 3844.8,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 1.540111199364575,
|
|
"grad_norm": 0.6096423014978278,
|
|
"learning_rate": 3.827862759665916e-05,
|
|
"loss": 0.3755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21905672550201416,
|
|
"step": 970,
|
|
"valid_targets_mean": 4792.5,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 1.5480540111199366,
|
|
"grad_norm": 0.6341636619306559,
|
|
"learning_rate": 3.8246358699399853e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12396818399429321,
|
|
"step": 975,
|
|
"valid_targets_mean": 2724.4,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 1.5559968228752978,
|
|
"grad_norm": 0.5522608331931583,
|
|
"learning_rate": 3.8213804008080824e-05,
|
|
"loss": 0.3447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1615956723690033,
|
|
"step": 980,
|
|
"valid_targets_mean": 5614.4,
|
|
"valid_targets_min": 2178
|
|
},
|
|
{
|
|
"epoch": 1.563939634630659,
|
|
"grad_norm": 0.6407359302352256,
|
|
"learning_rate": 3.818096403260862e-05,
|
|
"loss": 0.3678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1821938455104828,
|
|
"step": 985,
|
|
"valid_targets_mean": 4047.9,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 1.5718824463860206,
|
|
"grad_norm": 0.5789412073100989,
|
|
"learning_rate": 3.8147839287358185e-05,
|
|
"loss": 0.3689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15816397964954376,
|
|
"step": 990,
|
|
"valid_targets_mean": 3731.5,
|
|
"valid_targets_min": 1957
|
|
},
|
|
{
|
|
"epoch": 1.579825258141382,
|
|
"grad_norm": 0.9085252446540194,
|
|
"learning_rate": 3.8114430291164836e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25151893496513367,
|
|
"step": 995,
|
|
"valid_targets_mean": 3759.4,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 1.5877680698967436,
|
|
"grad_norm": 0.9317060549576407,
|
|
"learning_rate": 3.808073756731615e-05,
|
|
"loss": 0.3576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16280008852481842,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3193.0,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 1.5957108816521048,
|
|
"grad_norm": 0.6761272369501097,
|
|
"learning_rate": 3.8046761643543734e-05,
|
|
"loss": 0.3542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16579510271549225,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3134.9,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 1.603653693407466,
|
|
"grad_norm": 0.6365063161347707,
|
|
"learning_rate": 3.8012503052014996e-05,
|
|
"loss": 0.3801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1891859471797943,
|
|
"step": 1010,
|
|
"valid_targets_mean": 4158.6,
|
|
"valid_targets_min": 2344
|
|
},
|
|
{
|
|
"epoch": 1.6115965051628276,
|
|
"grad_norm": 0.6997283303001371,
|
|
"learning_rate": 3.797796232932476e-05,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16245466470718384,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3201.6,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 1.619539316918189,
|
|
"grad_norm": 0.5461594870768208,
|
|
"learning_rate": 3.794314001648692e-05,
|
|
"loss": 0.3621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19310882687568665,
|
|
"step": 1020,
|
|
"valid_targets_mean": 4475.5,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 1.6274821286735506,
|
|
"grad_norm": 0.663616114820501,
|
|
"learning_rate": 3.7908036658925926e-05,
|
|
"loss": 0.3655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14360517263412476,
|
|
"step": 1025,
|
|
"valid_targets_mean": 2849.1,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 1.6354249404289118,
|
|
"grad_norm": 0.5319933304825046,
|
|
"learning_rate": 3.787265280646825e-05,
|
|
"loss": 0.3648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18699121475219727,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4654.5,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 1.6433677521842731,
|
|
"grad_norm": 0.5952870075655246,
|
|
"learning_rate": 3.7836989013333776e-05,
|
|
"loss": 0.3744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19125506281852722,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4620.6,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 1.6513105639396346,
|
|
"grad_norm": 0.5942520435903411,
|
|
"learning_rate": 3.780104583812712e-05,
|
|
"loss": 0.3605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19023838639259338,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3851.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.659253375694996,
|
|
"grad_norm": 0.6361418681997557,
|
|
"learning_rate": 3.7764823843828883e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1665622889995575,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3129.5,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 1.6671961874503576,
|
|
"grad_norm": 0.8002941661948504,
|
|
"learning_rate": 3.7728323597786834e-05,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2122931033372879,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3373.6,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 1.6751389992057188,
|
|
"grad_norm": 0.9393550061714956,
|
|
"learning_rate": 3.7691545671707007e-05,
|
|
"loss": 0.376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2068556547164917,
|
|
"step": 1055,
|
|
"valid_targets_mean": 2595.6,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 1.6830818109610801,
|
|
"grad_norm": 0.7217009132537323,
|
|
"learning_rate": 3.765449064164477e-05,
|
|
"loss": 0.3741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19811221957206726,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3672.9,
|
|
"valid_targets_min": 2932
|
|
},
|
|
{
|
|
"epoch": 1.6910246227164416,
|
|
"grad_norm": 0.6696837854968951,
|
|
"learning_rate": 3.7617159087995784e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1839665174484253,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3158.8,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 1.698967434471803,
|
|
"grad_norm": 0.618514808698761,
|
|
"learning_rate": 3.757955159548693e-05,
|
|
"loss": 0.3786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533153295516968,
|
|
"step": 1070,
|
|
"valid_targets_mean": 5633.4,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 1.7069102462271644,
|
|
"grad_norm": 0.7735552520559144,
|
|
"learning_rate": 3.754166875316713e-05,
|
|
"loss": 0.3677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342828869819641,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3160.0,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 1.7148530579825259,
|
|
"grad_norm": 0.6651224141870751,
|
|
"learning_rate": 3.750351115439812e-05,
|
|
"loss": 0.3707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2238263040781021,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4436.6,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 1.7227958697378871,
|
|
"grad_norm": 0.6383806885333645,
|
|
"learning_rate": 3.746507939684519e-05,
|
|
"loss": 0.346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13717526197433472,
|
|
"step": 1085,
|
|
"valid_targets_mean": 2467.2,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 1.7307386814932486,
|
|
"grad_norm": 0.7423000074159443,
|
|
"learning_rate": 3.742637408246779e-05,
|
|
"loss": 0.3693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13697458803653717,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3113.4,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 1.73868149324861,
|
|
"grad_norm": 0.824768923833467,
|
|
"learning_rate": 3.73873958175101e-05,
|
|
"loss": 0.3642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21649792790412903,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4265.0,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 1.7466243050039714,
|
|
"grad_norm": 0.7329628986130192,
|
|
"learning_rate": 3.734814521249156e-05,
|
|
"loss": 0.371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17463067173957825,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3486.0,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 1.7545671167593329,
|
|
"grad_norm": 0.62433406373444,
|
|
"learning_rate": 3.7308622882197294e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1860063672065735,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4182.5,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 1.7625099285146941,
|
|
"grad_norm": 0.7041194455223349,
|
|
"learning_rate": 3.7268829445668456e-05,
|
|
"loss": 0.346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13077063858509064,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2230.1,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 1.7704527402700556,
|
|
"grad_norm": 1.1288674950328432,
|
|
"learning_rate": 3.722876552619257e-05,
|
|
"loss": 0.3622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20401421189308167,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3293.2,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.778395552025417,
|
|
"grad_norm": 0.6313542526155985,
|
|
"learning_rate": 3.718843175129378e-05,
|
|
"loss": 0.3527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17658226191997528,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4324.4,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 1.7863383637807784,
|
|
"grad_norm": 0.6474309707492921,
|
|
"learning_rate": 3.7147828752722944e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16822749376296997,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3621.0,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 1.7942811755361396,
|
|
"grad_norm": 0.5919043354004183,
|
|
"learning_rate": 3.7106957166447834e-05,
|
|
"loss": 0.3418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18284419178962708,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4889.5,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.8022239872915011,
|
|
"grad_norm": 0.7574958449044487,
|
|
"learning_rate": 3.7065817632643115e-05,
|
|
"loss": 0.3475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17157089710235596,
|
|
"step": 1135,
|
|
"valid_targets_mean": 2429.4,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 1.8101667990468626,
|
|
"grad_norm": 0.5997701676238865,
|
|
"learning_rate": 3.7024410795680326e-05,
|
|
"loss": 0.3645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13526824116706848,
|
|
"step": 1140,
|
|
"valid_targets_mean": 2804.8,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 1.818109610802224,
|
|
"grad_norm": 0.6157182006506525,
|
|
"learning_rate": 3.698273730411782e-05,
|
|
"loss": 0.3666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21906962990760803,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4311.5,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 1.8260524225575854,
|
|
"grad_norm": 0.5347218330560266,
|
|
"learning_rate": 3.694079781069053e-05,
|
|
"loss": 0.3546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19278281927108765,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4562.5,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 1.8339952343129466,
|
|
"grad_norm": 0.6631762061866002,
|
|
"learning_rate": 3.6898592972299875e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2089191973209381,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3584.5,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 1.8419380460683081,
|
|
"grad_norm": 1.076136130707019,
|
|
"learning_rate": 3.6856123450003306e-05,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1652892678976059,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4394.6,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 1.8498808578236696,
|
|
"grad_norm": 0.6241191437864102,
|
|
"learning_rate": 3.68133899090041e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12641064822673798,
|
|
"step": 1165,
|
|
"valid_targets_mean": 2539.2,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 1.8578236695790311,
|
|
"grad_norm": 0.674120258751231,
|
|
"learning_rate": 3.677039301864085e-05,
|
|
"loss": 0.3517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23707684874534607,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4709.9,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 1.8657664813343924,
|
|
"grad_norm": 0.5978030381296966,
|
|
"learning_rate": 3.672713345237701e-05,
|
|
"loss": 0.3605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23114074766635895,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4435.8,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 1.8737092930897536,
|
|
"grad_norm": 0.6863621960567521,
|
|
"learning_rate": 3.6683611887790356e-05,
|
|
"loss": 0.346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21143671870231628,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3733.0,
|
|
"valid_targets_min": 1795
|
|
},
|
|
{
|
|
"epoch": 1.8816521048451151,
|
|
"grad_norm": 0.6099005303971567,
|
|
"learning_rate": 3.663982900656236e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15922856330871582,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3633.9,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 1.8895949166004766,
|
|
"grad_norm": 0.6805323305815317,
|
|
"learning_rate": 3.6595785494467516e-05,
|
|
"loss": 0.3516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15649083256721497,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3937.8,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 1.8975377283558381,
|
|
"grad_norm": 0.7192698496397726,
|
|
"learning_rate": 3.655148204136259e-05,
|
|
"loss": 0.3593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2191399782896042,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3151.8,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 1.9054805401111994,
|
|
"grad_norm": 0.6668509849584682,
|
|
"learning_rate": 3.650691934117584e-05,
|
|
"loss": 0.3889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20382438600063324,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3486.8,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 1.9134233518665607,
|
|
"grad_norm": 0.8035617729492049,
|
|
"learning_rate": 3.646209809189611e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19142624735832214,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3432.2,
|
|
"valid_targets_min": 1846
|
|
},
|
|
{
|
|
"epoch": 1.9213661636219221,
|
|
"grad_norm": 1.2963513606083084,
|
|
"learning_rate": 3.641701899556192e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15828022360801697,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4063.4,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 1.9293089753772836,
|
|
"grad_norm": 0.5432603608113251,
|
|
"learning_rate": 3.63716827582505e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22869016230106354,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5632.2,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 1.9372517871326451,
|
|
"grad_norm": 0.5147185789798251,
|
|
"learning_rate": 3.632609009006665e-05,
|
|
"loss": 0.375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21668154001235962,
|
|
"step": 1220,
|
|
"valid_targets_mean": 7057.2,
|
|
"valid_targets_min": 1860
|
|
},
|
|
{
|
|
"epoch": 1.9451945988880064,
|
|
"grad_norm": 0.7260582316391612,
|
|
"learning_rate": 3.62802417051317e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2201724350452423,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2750.9,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 1.9531374106433677,
|
|
"grad_norm": 0.6056677253880359,
|
|
"learning_rate": 3.6234138321572274e-05,
|
|
"loss": 0.3534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14943622052669525,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2949.8,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 1.9610802223987291,
|
|
"grad_norm": 0.6127123738269298,
|
|
"learning_rate": 3.6187780661509074e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21178683638572693,
|
|
"step": 1235,
|
|
"valid_targets_mean": 4950.9,
|
|
"valid_targets_min": 1917
|
|
},
|
|
{
|
|
"epoch": 1.9690230341540906,
|
|
"grad_norm": 0.6413483259644231,
|
|
"learning_rate": 3.6141169451045526e-05,
|
|
"loss": 0.3441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21938207745552063,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4870.8,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 1.976965845909452,
|
|
"grad_norm": 0.5787481197923067,
|
|
"learning_rate": 3.609430542025646e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2136591076850891,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5270.0,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 1.9849086576648134,
|
|
"grad_norm": 0.5033505798532041,
|
|
"learning_rate": 3.604718930317664e-05,
|
|
"loss": 0.3661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19724534451961517,
|
|
"step": 1250,
|
|
"valid_targets_mean": 5154.5,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 1.9928514694201747,
|
|
"grad_norm": 0.5579253530612401,
|
|
"learning_rate": 3.5999821837789275e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23530352115631104,
|
|
"step": 1255,
|
|
"valid_targets_mean": 5154.9,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.8640078084495102,
|
|
"learning_rate": 3.595220376601447e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3643714487552643,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3635.0,
|
|
"valid_targets_min": 1834
|
|
},
|
|
{
|
|
"epoch": 2.0079428117553615,
|
|
"grad_norm": 0.6883897039519221,
|
|
"learning_rate": 3.590433583369758e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18529482185840607,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3742.1,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 2.015885623510723,
|
|
"grad_norm": 0.6916129686312249,
|
|
"learning_rate": 3.5856218790597554e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2372957170009613,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4251.2,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 2.023828435266084,
|
|
"grad_norm": 0.6894142413805101,
|
|
"learning_rate": 3.580785339037519e-05,
|
|
"loss": 0.3377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11776354908943176,
|
|
"step": 1275,
|
|
"valid_targets_mean": 2528.1,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 2.0317712470214455,
|
|
"grad_norm": 0.6821805947548037,
|
|
"learning_rate": 3.57592403905813e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19787834584712982,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3713.6,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 2.039714058776807,
|
|
"grad_norm": 0.7361836467394816,
|
|
"learning_rate": 3.571038055264489e-05,
|
|
"loss": 0.3401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15118679404258728,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2429.4,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 2.0476568705321685,
|
|
"grad_norm": 0.7154261404582988,
|
|
"learning_rate": 3.566127464186119e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14867375791072845,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3092.2,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 2.05559968228753,
|
|
"grad_norm": 0.5727860716587474,
|
|
"learning_rate": 3.56119234273797e-05,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18642055988311768,
|
|
"step": 1295,
|
|
"valid_targets_mean": 5919.6,
|
|
"valid_targets_min": 2102
|
|
},
|
|
{
|
|
"epoch": 2.063542494042891,
|
|
"grad_norm": 0.7209469969836777,
|
|
"learning_rate": 3.5562327682192134e-05,
|
|
"loss": 0.3377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1358249932527542,
|
|
"step": 1300,
|
|
"valid_targets_mean": 2797.1,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 2.0714853057982525,
|
|
"grad_norm": 0.6424180936658248,
|
|
"learning_rate": 3.5512488183120286e-05,
|
|
"loss": 0.3537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1923716813325882,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3561.5,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 2.079428117553614,
|
|
"grad_norm": 0.5928719487107835,
|
|
"learning_rate": 3.54624057108039e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1337837278842926,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3300.4,
|
|
"valid_targets_min": 1538
|
|
},
|
|
{
|
|
"epoch": 2.0873709293089755,
|
|
"grad_norm": 0.5396413350231086,
|
|
"learning_rate": 3.5412081049688444e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10441135615110397,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3128.4,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 2.095313741064337,
|
|
"grad_norm": 0.5206720510158308,
|
|
"learning_rate": 3.5361514988012774e-05,
|
|
"loss": 0.3231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17735737562179565,
|
|
"step": 1320,
|
|
"valid_targets_mean": 6169.2,
|
|
"valid_targets_min": 1835
|
|
},
|
|
{
|
|
"epoch": 2.103256552819698,
|
|
"grad_norm": 0.5779075598316524,
|
|
"learning_rate": 3.5310708317796844e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18341679871082306,
|
|
"step": 1325,
|
|
"valid_targets_mean": 5148.0,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 2.1111993645750595,
|
|
"grad_norm": 0.6935016981243504,
|
|
"learning_rate": 3.5259661834829266e-05,
|
|
"loss": 0.3287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.178381085395813,
|
|
"step": 1330,
|
|
"valid_targets_mean": 2967.8,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 2.119142176330421,
|
|
"grad_norm": 0.6083731048539996,
|
|
"learning_rate": 3.5208376338654866e-05,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16906628012657166,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4543.4,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 2.1270849880857825,
|
|
"grad_norm": 0.690739546961002,
|
|
"learning_rate": 3.515685263256214e-05,
|
|
"loss": 0.3399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14242608845233917,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3002.5,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 2.1350277998411435,
|
|
"grad_norm": 0.48496692106385997,
|
|
"learning_rate": 3.51050915235707e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0746113657951355,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3492.1,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 2.142970611596505,
|
|
"grad_norm": 0.6400446320871603,
|
|
"learning_rate": 3.5053093822418596e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15652357041835785,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3430.2,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 2.1509134233518665,
|
|
"grad_norm": 0.6217161941762326,
|
|
"learning_rate": 3.500086034354966e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19390447437763214,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3531.0,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 2.158856235107228,
|
|
"grad_norm": 0.5817356312208215,
|
|
"learning_rate": 3.494839190510071e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18005506694316864,
|
|
"step": 1360,
|
|
"valid_targets_mean": 5271.1,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 2.1667990468625895,
|
|
"grad_norm": 0.5888340815304541,
|
|
"learning_rate": 3.489568932888877e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21646028757095337,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4386.9,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 2.1747418586179506,
|
|
"grad_norm": 0.6769591199936031,
|
|
"learning_rate": 3.484275344039815e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15648269653320312,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3075.2,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 2.182684670373312,
|
|
"grad_norm": 0.8348161514962841,
|
|
"learning_rate": 3.478958506876759e-05,
|
|
"loss": 0.3483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1615811139345169,
|
|
"step": 1375,
|
|
"valid_targets_mean": 2233.8,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 2.1906274821286735,
|
|
"grad_norm": 0.6837205971987226,
|
|
"learning_rate": 3.47361850467772e-05,
|
|
"loss": 0.3397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12098369002342224,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3434.2,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 2.198570293884035,
|
|
"grad_norm": 0.6330296618696781,
|
|
"learning_rate": 3.468255421083546e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2086292952299118,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4254.2,
|
|
"valid_targets_min": 1975
|
|
},
|
|
{
|
|
"epoch": 2.2065131056393965,
|
|
"grad_norm": 0.5838619289523211,
|
|
"learning_rate": 3.46286934009661e-05,
|
|
"loss": 0.3393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15831007063388824,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4396.8,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 2.2144559173947576,
|
|
"grad_norm": 0.47711541633575705,
|
|
"learning_rate": 3.457460346079495e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13950583338737488,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4265.6,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 2.222398729150119,
|
|
"grad_norm": 0.6573095798617108,
|
|
"learning_rate": 3.452028523753673e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16848234832286835,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3131.1,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 2.2303415409054805,
|
|
"grad_norm": 0.7173595596267622,
|
|
"learning_rate": 3.446573958198176e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18041779100894928,
|
|
"step": 1405,
|
|
"valid_targets_mean": 2864.4,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 2.238284352660842,
|
|
"grad_norm": 0.6930245010167919,
|
|
"learning_rate": 3.4410967348482666e-05,
|
|
"loss": 0.3523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14827093482017517,
|
|
"step": 1410,
|
|
"valid_targets_mean": 2936.5,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 2.2462271644162035,
|
|
"grad_norm": 0.7973680592200161,
|
|
"learning_rate": 3.435596939494098e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14461973309516907,
|
|
"step": 1415,
|
|
"valid_targets_mean": 2543.2,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 2.2541699761715646,
|
|
"grad_norm": 0.684569952237263,
|
|
"learning_rate": 3.430074658279369e-05,
|
|
"loss": 0.3483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20371250808238983,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3003.2,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 2.262112787926926,
|
|
"grad_norm": 0.5723749209639714,
|
|
"learning_rate": 3.424529977699977e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1421903669834137,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4014.0,
|
|
"valid_targets_min": 1701
|
|
},
|
|
{
|
|
"epoch": 2.2700555996822875,
|
|
"grad_norm": 0.6135490580466698,
|
|
"learning_rate": 3.418962984602661e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14674530923366547,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2968.1,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 2.277998411437649,
|
|
"grad_norm": 0.6889006474626673,
|
|
"learning_rate": 3.413373766183646e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26962199807167053,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5678.6,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 2.2859412231930105,
|
|
"grad_norm": 0.8002559180936438,
|
|
"learning_rate": 3.40776240998727e-05,
|
|
"loss": 0.3279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18627554178237915,
|
|
"step": 1440,
|
|
"valid_targets_mean": 2480.4,
|
|
"valid_targets_min": 1529
|
|
},
|
|
{
|
|
"epoch": 2.2938840349483716,
|
|
"grad_norm": 0.6491008607378753,
|
|
"learning_rate": 3.4021290039046184e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1679387092590332,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3282.9,
|
|
"valid_targets_min": 2175
|
|
},
|
|
{
|
|
"epoch": 2.301826846703733,
|
|
"grad_norm": 0.5999044882859321,
|
|
"learning_rate": 3.396473636172146e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13808989524841309,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3742.4,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 2.3097696584590945,
|
|
"grad_norm": 0.7135892843300226,
|
|
"learning_rate": 3.390796395370294e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18330369889736176,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3123.2,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 2.317712470214456,
|
|
"grad_norm": 0.5078943847012218,
|
|
"learning_rate": 3.385097370422102e-05,
|
|
"loss": 0.3535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11011771857738495,
|
|
"step": 1460,
|
|
"valid_targets_mean": 2963.8,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 2.3256552819698175,
|
|
"grad_norm": 0.5521887014200857,
|
|
"learning_rate": 3.3793766505918185e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14288313686847687,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4384.4,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 2.3335980937251786,
|
|
"grad_norm": 0.5914207614596828,
|
|
"learning_rate": 3.3736343254834994e-05,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1308402419090271,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3888.4,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 2.34154090548054,
|
|
"grad_norm": 0.6217136869424513,
|
|
"learning_rate": 3.3678704850396045e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12883442640304565,
|
|
"step": 1475,
|
|
"valid_targets_mean": 2748.2,
|
|
"valid_targets_min": 1570
|
|
},
|
|
{
|
|
"epoch": 2.3494837172359015,
|
|
"grad_norm": 1.4830052997173284,
|
|
"learning_rate": 3.362085219539592e-05,
|
|
"loss": 0.3226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17358991503715515,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3021.0,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 2.357426528991263,
|
|
"grad_norm": 0.6093523306393117,
|
|
"learning_rate": 3.3562786195985025e-05,
|
|
"loss": 0.3527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21074923872947693,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4772.8,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 2.365369340746624,
|
|
"grad_norm": 0.5125380037140108,
|
|
"learning_rate": 3.350450776165535e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12818077206611633,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4097.9,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 2.3733121525019856,
|
|
"grad_norm": 0.6176217509148066,
|
|
"learning_rate": 3.344601780522634e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2087707817554474,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4995.2,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 2.381254964257347,
|
|
"grad_norm": 0.6836640162353634,
|
|
"learning_rate": 3.3387317242830466e-05,
|
|
"loss": 0.34,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19987516105175018,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3887.8,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 2.3891977760127086,
|
|
"grad_norm": 0.5467606166200022,
|
|
"learning_rate": 3.332840699389897e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1252206563949585,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3900.5,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 2.39714058776807,
|
|
"grad_norm": 0.6101414442594505,
|
|
"learning_rate": 3.32692879811474e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14912310242652893,
|
|
"step": 1510,
|
|
"valid_targets_mean": 2982.1,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 2.4050833995234315,
|
|
"grad_norm": 0.6189085389374146,
|
|
"learning_rate": 3.320996113056123e-05,
|
|
"loss": 0.3309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15729518234729767,
|
|
"step": 1515,
|
|
"valid_targets_mean": 3697.1,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 2.4130262112787926,
|
|
"grad_norm": 0.559639941267365,
|
|
"learning_rate": 3.315042737138128e-05,
|
|
"loss": 0.313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10560771077871323,
|
|
"step": 1520,
|
|
"valid_targets_mean": 2938.1,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 2.420969023034154,
|
|
"grad_norm": 0.5083163452880828,
|
|
"learning_rate": 3.309068763608919e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15285441279411316,
|
|
"step": 1525,
|
|
"valid_targets_mean": 5561.5,
|
|
"valid_targets_min": 1818
|
|
},
|
|
{
|
|
"epoch": 2.4289118347895156,
|
|
"grad_norm": 0.6723502623093427,
|
|
"learning_rate": 3.303074286039285e-05,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1948283612728119,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3516.6,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 2.436854646544877,
|
|
"grad_norm": 0.49886186023969564,
|
|
"learning_rate": 3.2970593983211694e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14688044786453247,
|
|
"step": 1535,
|
|
"valid_targets_mean": 6552.9,
|
|
"valid_targets_min": 1594
|
|
},
|
|
{
|
|
"epoch": 2.444797458300238,
|
|
"grad_norm": 0.5948485561388479,
|
|
"learning_rate": 3.2910241946661993e-05,
|
|
"loss": 0.3463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14875787496566772,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4147.0,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 2.4527402700555996,
|
|
"grad_norm": 0.5128126816507781,
|
|
"learning_rate": 3.2849687696042165e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505034863948822,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4538.0,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 2.460683081810961,
|
|
"grad_norm": 0.5741792324797496,
|
|
"learning_rate": 3.2788932179817886e-05,
|
|
"loss": 0.3266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1744004338979721,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4510.9,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 2.4686258935663226,
|
|
"grad_norm": 0.6222958758247095,
|
|
"learning_rate": 3.2727976349607276e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13966131210327148,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3072.0,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 2.476568705321684,
|
|
"grad_norm": 0.6649143406023839,
|
|
"learning_rate": 3.266682116016599e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16786207258701324,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3400.6,
|
|
"valid_targets_min": 1857
|
|
},
|
|
{
|
|
"epoch": 2.484511517077045,
|
|
"grad_norm": 0.5313287038544193,
|
|
"learning_rate": 3.260546756937227e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15318438410758972,
|
|
"step": 1565,
|
|
"valid_targets_mean": 4922.4,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 2.4924543288324066,
|
|
"grad_norm": 0.6201672890812036,
|
|
"learning_rate": 3.254391653821192e-05,
|
|
"loss": 0.3497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13001108169555664,
|
|
"step": 1570,
|
|
"valid_targets_mean": 2395.8,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 2.500397140587768,
|
|
"grad_norm": 0.6387024082583965,
|
|
"learning_rate": 3.248216903076328e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18185295164585114,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3048.2,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 2.5083399523431296,
|
|
"grad_norm": 0.600184983096639,
|
|
"learning_rate": 3.24202260141821e-05,
|
|
"loss": 0.3528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22399401664733887,
|
|
"step": 1580,
|
|
"valid_targets_mean": 5785.1,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 2.5162827640984906,
|
|
"grad_norm": 0.7305237609527007,
|
|
"learning_rate": 3.235808845868641e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1235419511795044,
|
|
"step": 1585,
|
|
"valid_targets_mean": 2077.8,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 2.524225575853852,
|
|
"grad_norm": 0.5784390965427145,
|
|
"learning_rate": 3.229575733754132e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14943808317184448,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3744.4,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 2.5321683876092136,
|
|
"grad_norm": 0.5112172069274676,
|
|
"learning_rate": 3.2233233627043765e-05,
|
|
"loss": 0.3418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09919888526201248,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3577.8,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 2.540111199364575,
|
|
"grad_norm": 0.6926590405822822,
|
|
"learning_rate": 3.217051830650722e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1612141728401184,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4581.0,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 2.5480540111199366,
|
|
"grad_norm": 0.6546947409468704,
|
|
"learning_rate": 3.210761235824639e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17820803821086884,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4696.9,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 2.555996822875298,
|
|
"grad_norm": 0.7244041106645208,
|
|
"learning_rate": 3.204451676756175e-05,
|
|
"loss": 0.3444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2862376272678375,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3994.5,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 2.563939634630659,
|
|
"grad_norm": 0.6288151952253788,
|
|
"learning_rate": 3.198123252272419e-05,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11625784635543823,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2790.0,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 2.5718824463860206,
|
|
"grad_norm": 0.7070644970401541,
|
|
"learning_rate": 3.1917760614959505e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1929509937763214,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3893.2,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 2.579825258141382,
|
|
"grad_norm": 0.5752943165375942,
|
|
"learning_rate": 3.1854102038432856e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1538190245628357,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4710.6,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 2.5877680698967436,
|
|
"grad_norm": 0.6123539801881518,
|
|
"learning_rate": 3.17902577902332e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20326970517635345,
|
|
"step": 1630,
|
|
"valid_targets_mean": 5094.5,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 2.5957108816521046,
|
|
"grad_norm": 0.7336291381461233,
|
|
"learning_rate": 3.172622887035771e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15672045946121216,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2564.5,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 2.603653693407466,
|
|
"grad_norm": 0.5771276926524828,
|
|
"learning_rate": 3.1662016281696073e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17378583550453186,
|
|
"step": 1640,
|
|
"valid_targets_mean": 5557.1,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 2.6115965051628276,
|
|
"grad_norm": 0.5643345166416681,
|
|
"learning_rate": 3.15976210300148e-05,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14159995317459106,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3652.1,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 2.619539316918189,
|
|
"grad_norm": 0.5665842207633304,
|
|
"learning_rate": 3.153304412394143e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1777251809835434,
|
|
"step": 1650,
|
|
"valid_targets_mean": 5056.5,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 2.6274821286735506,
|
|
"grad_norm": 0.6276509401627544,
|
|
"learning_rate": 3.146828657494883e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14502498507499695,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3652.8,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 2.635424940428912,
|
|
"grad_norm": 0.6422943007980536,
|
|
"learning_rate": 3.140334939733924e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1564156413078308,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3408.9,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 2.643367752184273,
|
|
"grad_norm": 0.5765139120855705,
|
|
"learning_rate": 3.1338233608228455e-05,
|
|
"loss": 0.3345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12053699046373367,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3112.1,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 2.6513105639396346,
|
|
"grad_norm": 0.5667344440262309,
|
|
"learning_rate": 3.127294022752988e-05,
|
|
"loss": 0.3332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13345958292484283,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3199.0,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 2.659253375694996,
|
|
"grad_norm": 0.5551005204822776,
|
|
"learning_rate": 3.120747027793854e-05,
|
|
"loss": 0.3328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14009833335876465,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4249.0,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 2.6671961874503576,
|
|
"grad_norm": 0.7510508984204671,
|
|
"learning_rate": 3.114182478491509e-05,
|
|
"loss": 0.3328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12906675040721893,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2554.1,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 2.6751389992057186,
|
|
"grad_norm": 0.7130667793690249,
|
|
"learning_rate": 3.107600477666969e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22158196568489075,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3311.2,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 2.68308181096108,
|
|
"grad_norm": 0.5839056028608296,
|
|
"learning_rate": 3.1010011284146004e-05,
|
|
"loss": 0.3327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10266700387001038,
|
|
"step": 1690,
|
|
"valid_targets_mean": 2095.2,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 2.6910246227164416,
|
|
"grad_norm": 0.6632377678035932,
|
|
"learning_rate": 3.0943845341004944e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18976278603076935,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3568.4,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 2.698967434471803,
|
|
"grad_norm": 0.6392845758385937,
|
|
"learning_rate": 3.087750798360856e-05,
|
|
"loss": 0.367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956574559211731,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4520.0,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 2.7069102462271646,
|
|
"grad_norm": 0.5175390333185903,
|
|
"learning_rate": 3.0811000251003774e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.141355499625206,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4782.6,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 2.714853057982526,
|
|
"grad_norm": 0.5762891207384547,
|
|
"learning_rate": 3.074432318490608e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11263174563646317,
|
|
"step": 1710,
|
|
"valid_targets_mean": 2563.4,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 2.722795869737887,
|
|
"grad_norm": 0.5905930251883714,
|
|
"learning_rate": 3.067747782968328e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17377814650535583,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4768.9,
|
|
"valid_targets_min": 1253
|
|
},
|
|
{
|
|
"epoch": 2.7307386814932486,
|
|
"grad_norm": 0.7215362158463677,
|
|
"learning_rate": 3.0610465232339096e-05,
|
|
"loss": 0.3292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.207193523645401,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3284.9,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 2.73868149324861,
|
|
"grad_norm": 0.5889277067969804,
|
|
"learning_rate": 3.054328644249677e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21541063487529755,
|
|
"step": 1725,
|
|
"valid_targets_mean": 6107.4,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 2.746624305003971,
|
|
"grad_norm": 0.486828126628298,
|
|
"learning_rate": 3.047594251238265e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15856409072875977,
|
|
"step": 1730,
|
|
"valid_targets_mean": 6418.0,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 2.7545671167593326,
|
|
"grad_norm": 0.5821136191752717,
|
|
"learning_rate": 3.0408434496809643e-05,
|
|
"loss": 0.3312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20481270551681519,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4174.1,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 2.762509928514694,
|
|
"grad_norm": 0.5160818722729761,
|
|
"learning_rate": 3.034076345316079e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12224921584129333,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4027.4,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 2.7704527402700556,
|
|
"grad_norm": 0.5669799867831029,
|
|
"learning_rate": 3.0272930441372628e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18682673573493958,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4827.0,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 2.778395552025417,
|
|
"grad_norm": 0.7285417861032627,
|
|
"learning_rate": 3.02049365239186e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14413371682167053,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2237.5,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 2.7863383637807786,
|
|
"grad_norm": 0.6912846003130035,
|
|
"learning_rate": 3.0136782765792455e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14892491698265076,
|
|
"step": 1755,
|
|
"valid_targets_mean": 2729.5,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 2.7942811755361396,
|
|
"grad_norm": 0.6442405597460399,
|
|
"learning_rate": 3.0068470234491517e-05,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1731141209602356,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3135.4,
|
|
"valid_targets_min": 1949
|
|
},
|
|
{
|
|
"epoch": 2.802223987291501,
|
|
"grad_norm": 0.601151446350948,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.3236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1069628894329071,
|
|
"step": 1765,
|
|
"valid_targets_mean": 2210.1,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 2.8101667990468626,
|
|
"grad_norm": 0.5317740486436834,
|
|
"learning_rate": 2.993137313477223e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1319223791360855,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4668.9,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 2.818109610802224,
|
|
"grad_norm": 0.6432287245032964,
|
|
"learning_rate": 2.9862590713715837e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15356799960136414,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3416.8,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 2.826052422557585,
|
|
"grad_norm": 0.5676699295249068,
|
|
"learning_rate": 2.9793653814174957e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12742754817008972,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3529.1,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 2.8339952343129466,
|
|
"grad_norm": 0.578102058244052,
|
|
"learning_rate": 2.9724563515913317e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18010340631008148,
|
|
"step": 1785,
|
|
"valid_targets_mean": 5393.8,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 2.841938046068308,
|
|
"grad_norm": 0.5939921792375766,
|
|
"learning_rate": 2.9655320901097348e-05,
|
|
"loss": 0.3264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14800448715686798,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3184.6,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 2.8498808578236696,
|
|
"grad_norm": 0.6265721135094762,
|
|
"learning_rate": 2.9585927054279224e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20304659008979797,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4807.9,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 2.857823669579031,
|
|
"grad_norm": 0.7088664995461751,
|
|
"learning_rate": 2.951638306237988e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17646431922912598,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3002.9,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 2.8657664813343926,
|
|
"grad_norm": 0.5161194976778836,
|
|
"learning_rate": 2.9446690014671976e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1292266696691513,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4327.6,
|
|
"valid_targets_min": 2043
|
|
},
|
|
{
|
|
"epoch": 2.8737092930897536,
|
|
"grad_norm": 0.6411131280615942,
|
|
"learning_rate": 2.937684900276285e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14806044101715088,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3220.0,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 2.881652104845115,
|
|
"grad_norm": 0.6708028066615901,
|
|
"learning_rate": 2.9306861120577416e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16317380964756012,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3222.0,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 2.8895949166004766,
|
|
"grad_norm": 0.7685915285002095,
|
|
"learning_rate": 2.923672746434103e-05,
|
|
"loss": 0.324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15771472454071045,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2313.8,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 2.897537728355838,
|
|
"grad_norm": 0.68941732098669,
|
|
"learning_rate": 2.9166449132562303e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1985657662153244,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3244.8,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 2.905480540111199,
|
|
"grad_norm": 0.5838357433965427,
|
|
"learning_rate": 2.9096027226015927e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10654276609420776,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3140.8,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 2.9134233518665607,
|
|
"grad_norm": 0.6192446359845323,
|
|
"learning_rate": 2.9025462847725405e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10276107490062714,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2366.5,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 2.921366163621922,
|
|
"grad_norm": 0.8393717995724054,
|
|
"learning_rate": 2.8954757102945798e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16333425045013428,
|
|
"step": 1840,
|
|
"valid_targets_mean": 3055.2,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 2.9293089753772836,
|
|
"grad_norm": 0.5620989776402179,
|
|
"learning_rate": 2.888391109914638e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18653210997581482,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5592.2,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 2.937251787132645,
|
|
"grad_norm": 0.6095093542036041,
|
|
"learning_rate": 2.8812925945993333e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22141271829605103,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4033.2,
|
|
"valid_targets_min": 1775
|
|
},
|
|
{
|
|
"epoch": 2.9451945988880066,
|
|
"grad_norm": 0.7089995163334321,
|
|
"learning_rate": 2.8741802755332332e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21728792786598206,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4480.9,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 2.9531374106433677,
|
|
"grad_norm": 0.573109703179896,
|
|
"learning_rate": 2.8670542641171155e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256847381591797,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3280.8,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 2.961080222398729,
|
|
"grad_norm": 0.566696520882888,
|
|
"learning_rate": 2.859914671966221e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17556504905223846,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4416.1,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 2.9690230341540906,
|
|
"grad_norm": 0.7143365014736737,
|
|
"learning_rate": 2.8527616109085082e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15887391567230225,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3609.6,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 2.9769658459094517,
|
|
"grad_norm": 0.625235923551544,
|
|
"learning_rate": 2.8455951929828977e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17307358980178833,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3388.9,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 2.984908657664813,
|
|
"grad_norm": 0.48183836909167393,
|
|
"learning_rate": 2.8384155304375223e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12500984966754913,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4632.8,
|
|
"valid_targets_min": 2517
|
|
},
|
|
{
|
|
"epoch": 2.9928514694201747,
|
|
"grad_norm": 0.521156147657319,
|
|
"learning_rate": 2.8312227357279646e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16076016426086426,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4642.1,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.7987496381165351,
|
|
"learning_rate": 2.8240169215154977e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33519262075424194,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3866.1,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 3.0079428117553615,
|
|
"grad_norm": 0.6315789983480494,
|
|
"learning_rate": 2.8167982006653196e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132240891456604,
|
|
"step": 1895,
|
|
"valid_targets_mean": 2964.6,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 3.015885623510723,
|
|
"grad_norm": 0.5984668070369723,
|
|
"learning_rate": 2.8095666862447876e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14100432395935059,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3778.8,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 3.023828435266084,
|
|
"grad_norm": 0.5685891085404295,
|
|
"learning_rate": 2.8023224915216442e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20584864914417267,
|
|
"step": 1905,
|
|
"valid_targets_mean": 6293.9,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 3.0317712470214455,
|
|
"grad_norm": 0.6304207690618385,
|
|
"learning_rate": 2.795065729962244e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15217378735542297,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3906.9,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 3.039714058776807,
|
|
"grad_norm": 0.5687653825016662,
|
|
"learning_rate": 2.7877965152297785e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1515602469444275,
|
|
"step": 1915,
|
|
"valid_targets_mean": 5531.2,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 3.0476568705321685,
|
|
"grad_norm": 0.6885465466560985,
|
|
"learning_rate": 2.780514961182492e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12114958465099335,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2787.4,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 3.05559968228753,
|
|
"grad_norm": 0.61302594963952,
|
|
"learning_rate": 2.773221181871903e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371764838695526,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3457.4,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 3.063542494042891,
|
|
"grad_norm": 0.600469864900935,
|
|
"learning_rate": 2.765915291541013e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18044528365135193,
|
|
"step": 1930,
|
|
"valid_targets_mean": 4154.0,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 3.0714853057982525,
|
|
"grad_norm": 0.5201137268275172,
|
|
"learning_rate": 2.7585974046225206e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1330631673336029,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3923.4,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 3.079428117553614,
|
|
"grad_norm": 0.6419739264029368,
|
|
"learning_rate": 2.751267635737027e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08891747891902924,
|
|
"step": 1940,
|
|
"valid_targets_mean": 2002.8,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 3.0873709293089755,
|
|
"grad_norm": 0.7932733281467648,
|
|
"learning_rate": 2.7439260996912423e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.150013267993927,
|
|
"step": 1945,
|
|
"valid_targets_mean": 2835.6,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 3.095313741064337,
|
|
"grad_norm": 0.6730883380447639,
|
|
"learning_rate": 2.7365729114761862e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24750065803527832,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4701.1,
|
|
"valid_targets_min": 1770
|
|
},
|
|
{
|
|
"epoch": 3.103256552819698,
|
|
"grad_norm": 0.6496050140021246,
|
|
"learning_rate": 2.729208186265386e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1526293158531189,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3685.2,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 3.1111993645750595,
|
|
"grad_norm": 0.6132448787151672,
|
|
"learning_rate": 2.721832039413077e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13251447677612305,
|
|
"step": 1960,
|
|
"valid_targets_mean": 4775.8,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 3.119142176330421,
|
|
"grad_norm": 0.6534089674125801,
|
|
"learning_rate": 2.7144445864523887e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1042649894952774,
|
|
"step": 1965,
|
|
"valid_targets_mean": 2116.1,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 3.1270849880857825,
|
|
"grad_norm": 0.5610654286323192,
|
|
"learning_rate": 2.7070459430935407e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20575225353240967,
|
|
"step": 1970,
|
|
"valid_targets_mean": 5781.6,
|
|
"valid_targets_min": 1481
|
|
},
|
|
{
|
|
"epoch": 3.1350277998411435,
|
|
"grad_norm": 0.6595378333126724,
|
|
"learning_rate": 2.69963622522203e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14640724658966064,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3687.5,
|
|
"valid_targets_min": 1907
|
|
},
|
|
{
|
|
"epoch": 3.142970611596505,
|
|
"grad_norm": 0.6510311759523941,
|
|
"learning_rate": 2.6922155488968117e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17651942372322083,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3911.5,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 3.1509134233518665,
|
|
"grad_norm": 0.7153864063386693,
|
|
"learning_rate": 2.684784030348486e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13526928424835205,
|
|
"step": 1985,
|
|
"valid_targets_mean": 2498.2,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 3.158856235107228,
|
|
"grad_norm": 0.5884381935708952,
|
|
"learning_rate": 2.6773417859774755e-05,
|
|
"loss": 0.313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17918311059474945,
|
|
"step": 1990,
|
|
"valid_targets_mean": 4486.4,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 3.1667990468625895,
|
|
"grad_norm": 0.5566351040730733,
|
|
"learning_rate": 2.669888932352201e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19241678714752197,
|
|
"step": 1995,
|
|
"valid_targets_mean": 5184.5,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 3.1747418586179506,
|
|
"grad_norm": 0.576534821246122,
|
|
"learning_rate": 2.662425586207259e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15660223364830017,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4241.0,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 3.182684670373312,
|
|
"grad_norm": 0.5093986523514747,
|
|
"learning_rate": 2.6549518644415876e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1638888120651245,
|
|
"step": 2005,
|
|
"valid_targets_mean": 5898.8,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 3.1906274821286735,
|
|
"grad_norm": 0.7353756285905392,
|
|
"learning_rate": 2.6474678841166426e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1899694949388504,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3826.9,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 3.198570293884035,
|
|
"grad_norm": 0.5663303623735051,
|
|
"learning_rate": 2.639973762454558e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10249858349561691,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3137.1,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 3.2065131056393965,
|
|
"grad_norm": 0.6011339238503539,
|
|
"learning_rate": 2.6324696168363134e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15374822914600372,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3815.1,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 3.2144559173947576,
|
|
"grad_norm": 5.853107157888295,
|
|
"learning_rate": 2.624955564799894e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13784492015838623,
|
|
"step": 2025,
|
|
"valid_targets_mean": 5436.4,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 3.222398729150119,
|
|
"grad_norm": 0.5874098233172632,
|
|
"learning_rate": 2.617431724038451e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.208487868309021,
|
|
"step": 2030,
|
|
"valid_targets_mean": 5751.9,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 3.2303415409054805,
|
|
"grad_norm": 0.6173584639783526,
|
|
"learning_rate": 2.609898212398455e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18671675026416779,
|
|
"step": 2035,
|
|
"valid_targets_mean": 4935.9,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 3.238284352660842,
|
|
"grad_norm": 0.6098129166515552,
|
|
"learning_rate": 2.6023551478778535e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1249222457408905,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3286.8,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 3.2462271644162035,
|
|
"grad_norm": 0.6047489191749628,
|
|
"learning_rate": 2.5948026486242225e-05,
|
|
"loss": 0.3186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1503782868385315,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3488.1,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 3.2541699761715646,
|
|
"grad_norm": 0.5739721667387558,
|
|
"learning_rate": 2.5872408329329136e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15386000275611877,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4441.9,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 3.262112787926926,
|
|
"grad_norm": 0.588154230434957,
|
|
"learning_rate": 2.5796698192452016e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18854063749313354,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4966.8,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 3.2700555996822875,
|
|
"grad_norm": 0.6473408615966887,
|
|
"learning_rate": 2.572089726146432e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13326723873615265,
|
|
"step": 2060,
|
|
"valid_targets_mean": 2754.5,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 3.277998411437649,
|
|
"grad_norm": 0.5774874456420306,
|
|
"learning_rate": 2.564500672364162e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1316845715045929,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4395.5,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 3.2859412231930105,
|
|
"grad_norm": 0.6365307133099977,
|
|
"learning_rate": 2.556902776766298e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09234365820884705,
|
|
"step": 2070,
|
|
"valid_targets_mean": 1934.1,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 3.2938840349483716,
|
|
"grad_norm": 0.8578366997723025,
|
|
"learning_rate": 2.5492961583592397e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20784443616867065,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5239.0,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 3.301826846703733,
|
|
"grad_norm": 0.5738475455070982,
|
|
"learning_rate": 2.5416809362860107e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14686894416809082,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4413.5,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 3.3097696584590945,
|
|
"grad_norm": 0.6045700146248006,
|
|
"learning_rate": 2.5340572298243946e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16430670022964478,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3787.9,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 3.317712470214456,
|
|
"grad_norm": 0.7886239749738346,
|
|
"learning_rate": 2.5264251583850677e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18329930305480957,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2829.6,
|
|
"valid_targets_min": 1612
|
|
},
|
|
{
|
|
"epoch": 3.3256552819698175,
|
|
"grad_norm": 0.7791601465759149,
|
|
"learning_rate": 2.518784841509726e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15704891085624695,
|
|
"step": 2095,
|
|
"valid_targets_mean": 2412.9,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 3.3335980937251786,
|
|
"grad_norm": 0.7507507465704205,
|
|
"learning_rate": 2.511136398869216e-05,
|
|
"loss": 0.3088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1647796928882599,
|
|
"step": 2100,
|
|
"valid_targets_mean": 2612.4,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 3.34154090548054,
|
|
"grad_norm": 0.6228913438892982,
|
|
"learning_rate": 2.503479950261658e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15700379014015198,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3925.2,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 3.3494837172359015,
|
|
"grad_norm": 0.6332665770818949,
|
|
"learning_rate": 2.4958156156105693e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16648569703102112,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4664.9,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 3.357426528991263,
|
|
"grad_norm": 0.4790099245230599,
|
|
"learning_rate": 2.4881435149629892e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12598669528961182,
|
|
"step": 2115,
|
|
"valid_targets_mean": 5562.6,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 3.365369340746624,
|
|
"grad_norm": 0.6171689369614306,
|
|
"learning_rate": 2.4804637684875937e-05,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15395745635032654,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4411.2,
|
|
"valid_targets_min": 1909
|
|
},
|
|
{
|
|
"epoch": 3.3733121525019856,
|
|
"grad_norm": 0.7566073853850409,
|
|
"learning_rate": 2.4727764964728177e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21996080875396729,
|
|
"step": 2125,
|
|
"valid_targets_mean": 2844.5,
|
|
"valid_targets_min": 1764
|
|
},
|
|
{
|
|
"epoch": 3.381254964257347,
|
|
"grad_norm": 0.6399984838196628,
|
|
"learning_rate": 2.4650818193249693e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13925543427467346,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3153.8,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 3.3891977760127086,
|
|
"grad_norm": 0.5625471522353397,
|
|
"learning_rate": 2.4573798575663425e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1482044905424118,
|
|
"step": 2135,
|
|
"valid_targets_mean": 5004.6,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 3.39714058776807,
|
|
"grad_norm": 0.631679882926346,
|
|
"learning_rate": 2.4496707318333323e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12517651915550232,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3032.6,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 3.4050833995234315,
|
|
"grad_norm": 0.47574984017149663,
|
|
"learning_rate": 2.441954562874541e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21265673637390137,
|
|
"step": 2145,
|
|
"valid_targets_mean": 9415.5,
|
|
"valid_targets_min": 4182
|
|
},
|
|
{
|
|
"epoch": 3.4130262112787926,
|
|
"grad_norm": 0.6661376460493598,
|
|
"learning_rate": 2.434231471548893e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10115528106689453,
|
|
"step": 2150,
|
|
"valid_targets_mean": 2145.5,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 3.420969023034154,
|
|
"grad_norm": 0.6176205727403162,
|
|
"learning_rate": 2.4265015788237348e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19787681102752686,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4502.1,
|
|
"valid_targets_min": 1688
|
|
},
|
|
{
|
|
"epoch": 3.4289118347895156,
|
|
"grad_norm": 0.6216915585108073,
|
|
"learning_rate": 2.4187650057729465e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15480130910873413,
|
|
"step": 2160,
|
|
"valid_targets_mean": 5040.2,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 3.436854646544877,
|
|
"grad_norm": 0.6075534954364197,
|
|
"learning_rate": 2.4110218735750403e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2163972109556198,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4961.2,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 3.444797458300238,
|
|
"grad_norm": 0.6180629798794636,
|
|
"learning_rate": 2.4032723035112667e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12424934655427933,
|
|
"step": 2170,
|
|
"valid_targets_mean": 2995.1,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 3.4527402700555996,
|
|
"grad_norm": 0.5613976187272206,
|
|
"learning_rate": 2.3955164169637124e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17193597555160522,
|
|
"step": 2175,
|
|
"valid_targets_mean": 5647.6,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 3.460683081810961,
|
|
"grad_norm": 0.6526939873613553,
|
|
"learning_rate": 2.387754335413398e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1878625750541687,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3916.4,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 3.4686258935663226,
|
|
"grad_norm": 0.5840804275706781,
|
|
"learning_rate": 2.3799861804383807e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16877813637256622,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4690.9,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 3.476568705321684,
|
|
"grad_norm": 0.613348170343459,
|
|
"learning_rate": 2.3722120737118414e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440698206424713,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3375.8,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 3.484511517077045,
|
|
"grad_norm": 0.5285974209845816,
|
|
"learning_rate": 2.3644321370001868e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10451663285493851,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4442.6,
|
|
"valid_targets_min": 1959
|
|
},
|
|
{
|
|
"epoch": 3.4924543288324066,
|
|
"grad_norm": 0.6111610049829532,
|
|
"learning_rate": 2.3566464921611393e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15845367312431335,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3827.8,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 3.500397140587768,
|
|
"grad_norm": 0.6237354270867042,
|
|
"learning_rate": 2.348855261141827e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1428501456975937,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3174.9,
|
|
"valid_targets_min": 1596
|
|
},
|
|
{
|
|
"epoch": 3.5083399523431296,
|
|
"grad_norm": 0.703664840592864,
|
|
"learning_rate": 2.341058565976874e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15409211814403534,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3040.4,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 3.5162827640984906,
|
|
"grad_norm": 0.7086999284098755,
|
|
"learning_rate": 2.3332565287864918e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16192835569381714,
|
|
"step": 2215,
|
|
"valid_targets_mean": 2940.1,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 3.524225575853852,
|
|
"grad_norm": 0.6070737967669658,
|
|
"learning_rate": 2.325449271774563e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12152931839227676,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3693.1,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 3.5321683876092136,
|
|
"grad_norm": 0.6316878806231209,
|
|
"learning_rate": 2.3176369172267286e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14942045509815216,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3589.9,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 3.540111199364575,
|
|
"grad_norm": 0.6415538018525233,
|
|
"learning_rate": 2.3098195875084732e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14482775330543518,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3481.4,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 3.5480540111199366,
|
|
"grad_norm": 2.3664588738708834,
|
|
"learning_rate": 2.301997405063208e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17670771479606628,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5234.1,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 3.555996822875298,
|
|
"grad_norm": 0.7314036659570711,
|
|
"learning_rate": 2.2941704924103535e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329488754272461,
|
|
"step": 2240,
|
|
"valid_targets_mean": 2318.8,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 3.563939634630659,
|
|
"grad_norm": 0.6030103292770627,
|
|
"learning_rate": 2.2863389721434165e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1715383231639862,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3617.8,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 3.5718824463860206,
|
|
"grad_norm": 0.561961823631029,
|
|
"learning_rate": 2.2785029669280775e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12069827318191528,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3658.5,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 3.579825258141382,
|
|
"grad_norm": 0.601322165290189,
|
|
"learning_rate": 2.2706625995002626e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1481800228357315,
|
|
"step": 2255,
|
|
"valid_targets_mean": 4147.8,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 3.5877680698967436,
|
|
"grad_norm": 0.7195763630455532,
|
|
"learning_rate": 2.262817992664224e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1924723982810974,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3254.5,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 3.5957108816521046,
|
|
"grad_norm": 0.6401864420538239,
|
|
"learning_rate": 2.2549692692906158e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11905810981988907,
|
|
"step": 2265,
|
|
"valid_targets_mean": 2926.4,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 3.603653693407466,
|
|
"grad_norm": 0.6019808569186965,
|
|
"learning_rate": 2.24711655231457e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14506419003009796,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3604.8,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 3.6115965051628276,
|
|
"grad_norm": 0.6140281713988858,
|
|
"learning_rate": 2.2392599647337724e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1503479927778244,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4059.1,
|
|
"valid_targets_min": 1657
|
|
},
|
|
{
|
|
"epoch": 3.619539316918189,
|
|
"grad_norm": 0.49035870838997564,
|
|
"learning_rate": 2.23139962960653e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07658762484788895,
|
|
"step": 2280,
|
|
"valid_targets_mean": 2776.0,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 3.6274821286735506,
|
|
"grad_norm": 0.6578367012382262,
|
|
"learning_rate": 2.2235356700498528e-05,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11105146259069443,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2410.1,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 3.635424940428912,
|
|
"grad_norm": 0.6520306309233517,
|
|
"learning_rate": 2.2156682092375175e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12693750858306885,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3313.4,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 3.643367752184273,
|
|
"grad_norm": 0.5793069318484247,
|
|
"learning_rate": 2.2077973703981423e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15687263011932373,
|
|
"step": 2295,
|
|
"valid_targets_mean": 4591.0,
|
|
"valid_targets_min": 1772
|
|
},
|
|
{
|
|
"epoch": 3.6513105639396346,
|
|
"grad_norm": 0.457085140032391,
|
|
"learning_rate": 2.1999232768132552e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12205037474632263,
|
|
"step": 2300,
|
|
"valid_targets_mean": 5903.8,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 3.659253375694996,
|
|
"grad_norm": 0.6281884572162371,
|
|
"learning_rate": 2.1920460518153637e-05,
|
|
"loss": 0.3077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13666808605194092,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3802.2,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 3.6671961874503576,
|
|
"grad_norm": 0.64559760718112,
|
|
"learning_rate": 2.1841658187860232e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2218000292778015,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4554.2,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 3.6751389992057186,
|
|
"grad_norm": 0.6782134319799065,
|
|
"learning_rate": 2.176282701153904e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15616141259670258,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3361.6,
|
|
"valid_targets_min": 1762
|
|
},
|
|
{
|
|
"epoch": 3.68308181096108,
|
|
"grad_norm": 0.5528577293278913,
|
|
"learning_rate": 2.1683968223928572e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16833969950675964,
|
|
"step": 2320,
|
|
"valid_targets_mean": 4415.0,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 3.6910246227164416,
|
|
"grad_norm": 0.633857747232347,
|
|
"learning_rate": 2.1605083060199835e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13570350408554077,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3166.8,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 3.698967434471803,
|
|
"grad_norm": 0.72619663265857,
|
|
"learning_rate": 2.152617275593694e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386081874370575,
|
|
"step": 2330,
|
|
"valid_targets_mean": 2537.1,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 3.7069102462271646,
|
|
"grad_norm": 0.647430896853528,
|
|
"learning_rate": 2.144723854711781e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13986042141914368,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3586.8,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 3.714853057982526,
|
|
"grad_norm": 0.6181427651276352,
|
|
"learning_rate": 2.1368281670094766e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12512773275375366,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3322.6,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 3.722795869737887,
|
|
"grad_norm": 0.6428825494669178,
|
|
"learning_rate": 2.1289303361575175e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16176003217697144,
|
|
"step": 2345,
|
|
"valid_targets_mean": 4072.2,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 3.7307386814932486,
|
|
"grad_norm": 0.6059180419577727,
|
|
"learning_rate": 2.121030485860211e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32556942105293274,
|
|
"step": 2350,
|
|
"valid_targets_mean": 6431.6,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 3.73868149324861,
|
|
"grad_norm": 0.7465270735961784,
|
|
"learning_rate": 2.113128739853493e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1761673539876938,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3359.0,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 3.746624305003971,
|
|
"grad_norm": 0.5336508493406652,
|
|
"learning_rate": 2.1052252219029944e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12158143520355225,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4102.9,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 3.7545671167593326,
|
|
"grad_norm": 0.5681918187079632,
|
|
"learning_rate": 2.0973200558020967e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14112915098667145,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4258.0,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 3.762509928514694,
|
|
"grad_norm": 0.611967168926953,
|
|
"learning_rate": 2.0894133653700005e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20243264734745026,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4995.1,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 3.7704527402700556,
|
|
"grad_norm": 0.6376788181955011,
|
|
"learning_rate": 2.0815052744497795e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11911755800247192,
|
|
"step": 2375,
|
|
"valid_targets_mean": 2519.4,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 3.778395552025417,
|
|
"grad_norm": 0.620933671598644,
|
|
"learning_rate": 2.0735959069064434e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12556111812591553,
|
|
"step": 2380,
|
|
"valid_targets_mean": 2685.4,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 3.7863383637807786,
|
|
"grad_norm": 0.6789226655224198,
|
|
"learning_rate": 2.065685386624999e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09773990511894226,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2231.5,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 3.7942811755361396,
|
|
"grad_norm": 0.46619442910636705,
|
|
"learning_rate": 2.0577738375085076e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.126412495970726,
|
|
"step": 2390,
|
|
"valid_targets_mean": 6071.9,
|
|
"valid_targets_min": 2377
|
|
},
|
|
{
|
|
"epoch": 3.802223987291501,
|
|
"grad_norm": 0.666394441339621,
|
|
"learning_rate": 2.0498613834761462e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16123801469802856,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3939.0,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 3.8101667990468626,
|
|
"grad_norm": 0.7171689279656462,
|
|
"learning_rate": 2.041948148461264e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17233435809612274,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3277.1,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 3.818109610802224,
|
|
"grad_norm": 0.641975193993443,
|
|
"learning_rate": 2.0340342564094436e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17322856187820435,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3500.4,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 3.826052422557585,
|
|
"grad_norm": 0.6425074868502922,
|
|
"learning_rate": 2.0261198312765597e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12405849993228912,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3151.9,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 3.8339952343129466,
|
|
"grad_norm": 0.7462787537337754,
|
|
"learning_rate": 2.0182049970268355e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1604951024055481,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2962.5,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 3.841938046068308,
|
|
"grad_norm": 0.6717985157750199,
|
|
"learning_rate": 2.010289877630902e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16532018780708313,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3833.0,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 3.8498808578236696,
|
|
"grad_norm": 0.6599818649431433,
|
|
"learning_rate": 2.002374597063858e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14597034454345703,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3193.0,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 3.857823669579031,
|
|
"grad_norm": 0.6104532804967295,
|
|
"learning_rate": 1.9944592793033255e-05,
|
|
"loss": 0.3098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13235798478126526,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3797.5,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 3.8657664813343926,
|
|
"grad_norm": 0.6002259765438007,
|
|
"learning_rate": 1.9865440483275086e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1723363697528839,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4235.0,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 3.8737092930897536,
|
|
"grad_norm": 0.6287313787554729,
|
|
"learning_rate": 1.978629028113254e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16525313258171082,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3611.9,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 3.881652104845115,
|
|
"grad_norm": 0.6918012853160886,
|
|
"learning_rate": 1.9707143426341058e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13738566637039185,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3536.8,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 3.8895949166004766,
|
|
"grad_norm": 0.5868393658597864,
|
|
"learning_rate": 1.962800115858364e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15031300485134125,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3405.0,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 3.897537728355838,
|
|
"grad_norm": 0.5872452067744168,
|
|
"learning_rate": 1.9548864717471472e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17125283181667328,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4321.5,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 3.905480540111199,
|
|
"grad_norm": 0.7738749562749725,
|
|
"learning_rate": 1.9469735342524454e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11742620170116425,
|
|
"step": 2460,
|
|
"valid_targets_mean": 2773.5,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 3.9134233518665607,
|
|
"grad_norm": 0.6025436722307198,
|
|
"learning_rate": 1.939061427315179e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1319112479686737,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4137.2,
|
|
"valid_targets_min": 1716
|
|
},
|
|
{
|
|
"epoch": 3.921366163621922,
|
|
"grad_norm": 0.6639466948912108,
|
|
"learning_rate": 1.931150274863265e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19464164972305298,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3540.2,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 3.9293089753772836,
|
|
"grad_norm": 0.7025428017768329,
|
|
"learning_rate": 1.9232402008096643e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15977001190185547,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3535.8,
|
|
"valid_targets_min": 1600
|
|
},
|
|
{
|
|
"epoch": 3.937251787132645,
|
|
"grad_norm": 0.5875981019693655,
|
|
"learning_rate": 1.9153313290504495e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1541690230369568,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4129.4,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 3.9451945988880066,
|
|
"grad_norm": 0.6805162953613475,
|
|
"learning_rate": 1.9074237834628623e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12247131764888763,
|
|
"step": 2485,
|
|
"valid_targets_mean": 2277.8,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 3.9531374106433677,
|
|
"grad_norm": 0.6533108786758957,
|
|
"learning_rate": 1.8995176879033698e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1789095103740692,
|
|
"step": 2490,
|
|
"valid_targets_mean": 3914.1,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 3.961080222398729,
|
|
"grad_norm": 0.5345394160209156,
|
|
"learning_rate": 1.89161316620573e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329527199268341,
|
|
"step": 2495,
|
|
"valid_targets_mean": 5273.9,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 3.9690230341540906,
|
|
"grad_norm": 0.6633942949554794,
|
|
"learning_rate": 1.8837103421790486e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13019979000091553,
|
|
"step": 2500,
|
|
"valid_targets_mean": 2423.9,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 3.9769658459094517,
|
|
"grad_norm": 0.5681968343384123,
|
|
"learning_rate": 1.8758093396058386e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24214231967926025,
|
|
"step": 2505,
|
|
"valid_targets_mean": 6875.4,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 3.984908657664813,
|
|
"grad_norm": 0.5080363879096551,
|
|
"learning_rate": 1.8679102822400874e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2088535726070404,
|
|
"step": 2510,
|
|
"valid_targets_mean": 6129.5,
|
|
"valid_targets_min": 1772
|
|
},
|
|
{
|
|
"epoch": 3.9928514694201747,
|
|
"grad_norm": 0.5718576303858326,
|
|
"learning_rate": 1.8600132938053098e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15278580784797668,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4876.6,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.7956993113873996,
|
|
"learning_rate": 1.8521184979926177e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2419133186340332,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3791.1,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 4.0079428117553615,
|
|
"grad_norm": 0.8004063141501536,
|
|
"learning_rate": 1.8442260184587804e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256811022758484,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4247.5,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 4.015885623510723,
|
|
"grad_norm": 0.622776473880821,
|
|
"learning_rate": 1.8363359788242842e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16206404566764832,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4747.0,
|
|
"valid_targets_min": 1772
|
|
},
|
|
{
|
|
"epoch": 4.0238284352660845,
|
|
"grad_norm": 0.5670058209658294,
|
|
"learning_rate": 1.8284485026714013e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1455042064189911,
|
|
"step": 2535,
|
|
"valid_targets_mean": 4745.1,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 4.031771247021446,
|
|
"grad_norm": 0.668871812199874,
|
|
"learning_rate": 1.8205637135422525e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13962621986865997,
|
|
"step": 2540,
|
|
"valid_targets_mean": 3120.8,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 4.039714058776807,
|
|
"grad_norm": 0.705222718738015,
|
|
"learning_rate": 1.8126817349368697e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16245445609092712,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3063.0,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 4.047656870532168,
|
|
"grad_norm": 0.4615326349872557,
|
|
"learning_rate": 1.8048026903112632e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06604734063148499,
|
|
"step": 2550,
|
|
"valid_targets_mean": 2568.0,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 4.0555996822875295,
|
|
"grad_norm": 0.6618328643050813,
|
|
"learning_rate": 1.7969267030754903e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10539937019348145,
|
|
"step": 2555,
|
|
"valid_targets_mean": 2500.1,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 4.063542494042891,
|
|
"grad_norm": 0.784059347134206,
|
|
"learning_rate": 1.7890538965917184e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18110109865665436,
|
|
"step": 2560,
|
|
"valid_targets_mean": 2444.0,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 4.0714853057982525,
|
|
"grad_norm": 0.6616363811416438,
|
|
"learning_rate": 1.7811843941722952e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11682207137346268,
|
|
"step": 2565,
|
|
"valid_targets_mean": 2944.9,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 4.079428117553614,
|
|
"grad_norm": 0.7257572063467572,
|
|
"learning_rate": 1.7733183190778174e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12826158106327057,
|
|
"step": 2570,
|
|
"valid_targets_mean": 2612.0,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 4.0873709293089755,
|
|
"grad_norm": 0.9077785690938163,
|
|
"learning_rate": 1.7654557945151968e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20185674726963043,
|
|
"step": 2575,
|
|
"valid_targets_mean": 4580.2,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 4.095313741064337,
|
|
"grad_norm": 0.5909235553628687,
|
|
"learning_rate": 1.7575969436357352e-05,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17884165048599243,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4765.5,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 4.1032565528196985,
|
|
"grad_norm": 0.4983006388965543,
|
|
"learning_rate": 1.7497418895331934e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11452636867761612,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3368.2,
|
|
"valid_targets_min": 1740
|
|
},
|
|
{
|
|
"epoch": 4.11119936457506,
|
|
"grad_norm": 0.6791180582727433,
|
|
"learning_rate": 1.7418907552418597e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1645531803369522,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4252.5,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 4.119142176330421,
|
|
"grad_norm": 0.5659775698670351,
|
|
"learning_rate": 1.7340436637346315e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09709826856851578,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4434.1,
|
|
"valid_targets_min": 1918
|
|
},
|
|
{
|
|
"epoch": 4.127084988085782,
|
|
"grad_norm": 0.5204521089575127,
|
|
"learning_rate": 1.726200737921079e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14451345801353455,
|
|
"step": 2600,
|
|
"valid_targets_mean": 5093.6,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 4.1350277998411435,
|
|
"grad_norm": 0.588617614208705,
|
|
"learning_rate": 1.718362100645527e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19152230024337769,
|
|
"step": 2605,
|
|
"valid_targets_mean": 5329.5,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 4.142970611596505,
|
|
"grad_norm": 0.6101962074103487,
|
|
"learning_rate": 1.710527874685129e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15386857092380524,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3850.6,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 4.1509134233518665,
|
|
"grad_norm": 0.7787448318657972,
|
|
"learning_rate": 1.702698182747942e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21218350529670715,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3291.5,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 4.158856235107228,
|
|
"grad_norm": 0.60706299853299,
|
|
"learning_rate": 1.6948731474710075e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14830046892166138,
|
|
"step": 2620,
|
|
"valid_targets_mean": 4144.9,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 4.1667990468625895,
|
|
"grad_norm": 1.299806599125462,
|
|
"learning_rate": 1.68705289141843e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14542904496192932,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3742.6,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 4.174741858617951,
|
|
"grad_norm": 0.5190415150800741,
|
|
"learning_rate": 1.679237537079454e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1323857307434082,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4482.5,
|
|
"valid_targets_min": 1612
|
|
},
|
|
{
|
|
"epoch": 4.1826846703733125,
|
|
"grad_norm": 0.5779088384315564,
|
|
"learning_rate": 1.6714272068665526e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11714215576648712,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3572.6,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 4.190627482128674,
|
|
"grad_norm": 0.4892757342981797,
|
|
"learning_rate": 1.663622023113501e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11094695329666138,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4947.4,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 4.198570293884035,
|
|
"grad_norm": 0.6705040872532491,
|
|
"learning_rate": 1.655822108073467e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2056352198123932,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4601.8,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 4.206513105639396,
|
|
"grad_norm": 0.6499653594385914,
|
|
"learning_rate": 1.648027583917095e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16701552271842957,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4196.8,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 4.2144559173947576,
|
|
"grad_norm": 0.7593705181010602,
|
|
"learning_rate": 1.640238572730591e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16883152723312378,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3234.1,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 4.222398729150119,
|
|
"grad_norm": 0.5756850295124694,
|
|
"learning_rate": 1.632455196513809e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12224465608596802,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4402.2,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 4.2303415409054805,
|
|
"grad_norm": 0.6994744569667497,
|
|
"learning_rate": 1.624677577178345e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16906169056892395,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3629.9,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 4.238284352660842,
|
|
"grad_norm": 0.6322605802726673,
|
|
"learning_rate": 1.616905836545624e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1373470276594162,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4247.8,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 4.2462271644162035,
|
|
"grad_norm": 0.6622253588327762,
|
|
"learning_rate": 1.6091400963449894e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15044862031936646,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3325.4,
|
|
"valid_targets_min": 1608
|
|
},
|
|
{
|
|
"epoch": 4.254169976171565,
|
|
"grad_norm": 0.6088179127238271,
|
|
"learning_rate": 1.6013804782118043e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15745404362678528,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3533.0,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 4.2621127879269265,
|
|
"grad_norm": 0.6473343517218992,
|
|
"learning_rate": 1.5936271036855372e-05,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14949601888656616,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4151.4,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 4.270055599682287,
|
|
"grad_norm": 0.6202451684677918,
|
|
"learning_rate": 1.585880094207864e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12828192114830017,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4194.4,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 4.277998411437649,
|
|
"grad_norm": 0.600317154466383,
|
|
"learning_rate": 1.5781395711207664e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12559902667999268,
|
|
"step": 2695,
|
|
"valid_targets_mean": 3775.8,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 4.28594122319301,
|
|
"grad_norm": 0.5781787270418594,
|
|
"learning_rate": 1.5704056556646255e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1382368803024292,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4870.5,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 4.293884034948372,
|
|
"grad_norm": 0.7211397341551755,
|
|
"learning_rate": 1.562678468976329e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1793462634086609,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4469.5,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 4.301826846703733,
|
|
"grad_norm": 0.6778166706221618,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15689727663993835,
|
|
"step": 2710,
|
|
"valid_targets_mean": 4091.0,
|
|
"valid_targets_min": 1865
|
|
},
|
|
{
|
|
"epoch": 4.3097696584590945,
|
|
"grad_norm": 0.836668212897397,
|
|
"learning_rate": 1.5472447659219573e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12563787400722504,
|
|
"step": 2715,
|
|
"valid_targets_mean": 1859.4,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 4.317712470214456,
|
|
"grad_norm": 0.8113602493300687,
|
|
"learning_rate": 1.5395384912951096e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18390220403671265,
|
|
"step": 2720,
|
|
"valid_targets_mean": 2674.6,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 4.3256552819698175,
|
|
"grad_norm": 0.6279717477455469,
|
|
"learning_rate": 1.531839428910774e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08969171345233917,
|
|
"step": 2725,
|
|
"valid_targets_mean": 2641.1,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 4.333598093725179,
|
|
"grad_norm": 0.6338373482902728,
|
|
"learning_rate": 1.5241476993599318e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09362320601940155,
|
|
"step": 2730,
|
|
"valid_targets_mean": 2982.1,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 4.3415409054805405,
|
|
"grad_norm": 0.5352788182819581,
|
|
"learning_rate": 1.5164634231187106e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15598449110984802,
|
|
"step": 2735,
|
|
"valid_targets_mean": 5687.5,
|
|
"valid_targets_min": 2349
|
|
},
|
|
{
|
|
"epoch": 4.349483717235901,
|
|
"grad_norm": 0.6599896817500032,
|
|
"learning_rate": 1.5087867205464933e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16144415736198425,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4070.5,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 4.357426528991263,
|
|
"grad_norm": 0.7135919132497727,
|
|
"learning_rate": 1.5011177118840376e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13649824261665344,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2641.2,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 4.365369340746624,
|
|
"grad_norm": 0.6060763384695484,
|
|
"learning_rate": 1.4934565172515917e-05,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1539280116558075,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4064.1,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 4.373312152501986,
|
|
"grad_norm": 0.6782225424101075,
|
|
"learning_rate": 1.4858032566470107e-05,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13185054063796997,
|
|
"step": 2755,
|
|
"valid_targets_mean": 2893.8,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 4.381254964257347,
|
|
"grad_norm": 0.6916685345440985,
|
|
"learning_rate": 1.4781580499438794e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10695084929466248,
|
|
"step": 2760,
|
|
"valid_targets_mean": 2349.4,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 4.3891977760127086,
|
|
"grad_norm": 0.6751020583395295,
|
|
"learning_rate": 1.4705210168896327e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13968747854232788,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3439.0,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 4.39714058776807,
|
|
"grad_norm": 0.5544332345406878,
|
|
"learning_rate": 1.462892277103681e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16161727905273438,
|
|
"step": 2770,
|
|
"valid_targets_mean": 6458.9,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 4.4050833995234315,
|
|
"grad_norm": 0.6163382721135435,
|
|
"learning_rate": 1.455271950075539e-05,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12818969786167145,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3624.4,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 4.413026211278793,
|
|
"grad_norm": 0.6756029998666417,
|
|
"learning_rate": 1.4476601551629493e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14901140332221985,
|
|
"step": 2780,
|
|
"valid_targets_mean": 3367.8,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 4.420969023034154,
|
|
"grad_norm": 0.6709507321543058,
|
|
"learning_rate": 1.4400570115900147e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1553899198770523,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3820.5,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 4.428911834789515,
|
|
"grad_norm": 0.6896552683187516,
|
|
"learning_rate": 1.4324626384453345e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16133293509483337,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3324.1,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 4.436854646544877,
|
|
"grad_norm": 0.7008976850232289,
|
|
"learning_rate": 1.4248771546801339e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19187995791435242,
|
|
"step": 2795,
|
|
"valid_targets_mean": 4186.2,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 4.444797458300238,
|
|
"grad_norm": 0.5776084511030536,
|
|
"learning_rate": 1.4173006791064023e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13368406891822815,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3788.5,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 4.4527402700556,
|
|
"grad_norm": 0.6267410164546168,
|
|
"learning_rate": 1.4097333303950368e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13375627994537354,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3865.6,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 4.460683081810961,
|
|
"grad_norm": 0.7745157334715562,
|
|
"learning_rate": 1.4021752270739759e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1615782380104065,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2538.9,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 4.468625893566323,
|
|
"grad_norm": 0.7283049361144723,
|
|
"learning_rate": 1.3946264875263485e-05,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09900561720132828,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3258.6,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 4.476568705321684,
|
|
"grad_norm": 0.6911339760566637,
|
|
"learning_rate": 1.3870872299886184e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14855235815048218,
|
|
"step": 2820,
|
|
"valid_targets_mean": 3070.4,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 4.4845115170770455,
|
|
"grad_norm": 0.6521045338179122,
|
|
"learning_rate": 1.3795575725487303e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16655588150024414,
|
|
"step": 2825,
|
|
"valid_targets_mean": 4251.9,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 4.492454328832407,
|
|
"grad_norm": 0.7675897756905521,
|
|
"learning_rate": 1.3720376331442652e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14236781001091003,
|
|
"step": 2830,
|
|
"valid_targets_mean": 2368.2,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 4.5003971405877685,
|
|
"grad_norm": 0.5073650952571984,
|
|
"learning_rate": 1.364527529560586e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13535702228546143,
|
|
"step": 2835,
|
|
"valid_targets_mean": 6599.0,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 4.508339952343129,
|
|
"grad_norm": 0.5972816812903718,
|
|
"learning_rate": 1.3570273794289978e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1085084080696106,
|
|
"step": 2840,
|
|
"valid_targets_mean": 4352.2,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 4.516282764098491,
|
|
"grad_norm": 0.5738048862599441,
|
|
"learning_rate": 1.3495373002249061e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09322848916053772,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2842.1,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 4.524225575853852,
|
|
"grad_norm": 0.6675814083846257,
|
|
"learning_rate": 1.3420574092659713e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12001092731952667,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3931.6,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 4.532168387609214,
|
|
"grad_norm": 0.6200122151003018,
|
|
"learning_rate": 1.3345878237102766e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13022911548614502,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4162.5,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 4.540111199364575,
|
|
"grad_norm": 0.5967606134923394,
|
|
"learning_rate": 1.3271286605544906e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1340520977973938,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4796.6,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 4.548054011119937,
|
|
"grad_norm": 0.830663096046581,
|
|
"learning_rate": 1.3196800366320357e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16767288744449615,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2471.9,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 4.555996822875298,
|
|
"grad_norm": 0.5463863638304459,
|
|
"learning_rate": 1.3122420686112554e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14915570616722107,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4614.5,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 4.5639396346306595,
|
|
"grad_norm": 0.7940683189630128,
|
|
"learning_rate": 1.3048148729935917e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16848161816596985,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3178.6,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 4.571882446386021,
|
|
"grad_norm": 0.5809882635307205,
|
|
"learning_rate": 1.297398566111756e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1943894922733307,
|
|
"step": 2880,
|
|
"valid_targets_mean": 5502.1,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 4.579825258141382,
|
|
"grad_norm": 0.7250345514889305,
|
|
"learning_rate": 1.2899932641279082e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14435060322284698,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3359.8,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 4.587768069896743,
|
|
"grad_norm": 0.5964484727843641,
|
|
"learning_rate": 1.2825990830318395e-05,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17853567004203796,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4937.9,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 4.595710881652105,
|
|
"grad_norm": 0.661094433049212,
|
|
"learning_rate": 1.2752161386391526e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.157152459025383,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4007.2,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 4.603653693407466,
|
|
"grad_norm": 0.6059347742194487,
|
|
"learning_rate": 1.2678445465894491e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1649794578552246,
|
|
"step": 2900,
|
|
"valid_targets_mean": 5275.2,
|
|
"valid_targets_min": 1951
|
|
},
|
|
{
|
|
"epoch": 4.611596505162828,
|
|
"grad_norm": 0.6048858145471392,
|
|
"learning_rate": 1.2604844223445181e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14307013154029846,
|
|
"step": 2905,
|
|
"valid_targets_mean": 4460.4,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 4.619539316918189,
|
|
"grad_norm": 0.7221777861809002,
|
|
"learning_rate": 1.2531358811865268e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10635461658239365,
|
|
"step": 2910,
|
|
"valid_targets_mean": 2729.2,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 4.627482128673551,
|
|
"grad_norm": 0.6936681973963905,
|
|
"learning_rate": 1.2457990382162173e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09785018116235733,
|
|
"step": 2915,
|
|
"valid_targets_mean": 2428.0,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 4.635424940428912,
|
|
"grad_norm": 0.5461472327249861,
|
|
"learning_rate": 1.238474008351101e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09418754279613495,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3833.6,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 4.6433677521842736,
|
|
"grad_norm": 0.7264666639928669,
|
|
"learning_rate": 1.2311609063236594e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12254303693771362,
|
|
"step": 2925,
|
|
"valid_targets_mean": 4643.1,
|
|
"valid_targets_min": 1613
|
|
},
|
|
{
|
|
"epoch": 4.651310563939635,
|
|
"grad_norm": 0.8036238137063538,
|
|
"learning_rate": 1.2238598466795493e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10492721199989319,
|
|
"step": 2930,
|
|
"valid_targets_mean": 2865.8,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 4.659253375694996,
|
|
"grad_norm": 0.5201734273990247,
|
|
"learning_rate": 1.2165709437758042e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12454238533973694,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4250.1,
|
|
"valid_targets_min": 1905
|
|
},
|
|
{
|
|
"epoch": 4.667196187450357,
|
|
"grad_norm": 0.6161201501542579,
|
|
"learning_rate": 1.209294311779047e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1596311628818512,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4272.9,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 4.675138999205719,
|
|
"grad_norm": 0.642570030243398,
|
|
"learning_rate": 1.2020300646637018e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09582018852233887,
|
|
"step": 2945,
|
|
"valid_targets_mean": 2740.8,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 4.68308181096108,
|
|
"grad_norm": 0.5558460452608506,
|
|
"learning_rate": 1.1947783162102043e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1075383871793747,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4152.2,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 4.691024622716442,
|
|
"grad_norm": 0.625061581812015,
|
|
"learning_rate": 1.1875391800032248e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10913947224617004,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3939.4,
|
|
"valid_targets_min": 2681
|
|
},
|
|
{
|
|
"epoch": 4.698967434471803,
|
|
"grad_norm": 0.7638760114669543,
|
|
"learning_rate": 1.1803127694298873e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21768397092819214,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4078.5,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 4.706910246227165,
|
|
"grad_norm": 0.6686637830244812,
|
|
"learning_rate": 1.173099197677992e-05,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1383422166109085,
|
|
"step": 2965,
|
|
"valid_targets_mean": 3422.5,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 4.714853057982526,
|
|
"grad_norm": 0.6246107563012437,
|
|
"learning_rate": 1.1658985777342458e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12738335132598877,
|
|
"step": 2970,
|
|
"valid_targets_mean": 3646.6,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 4.722795869737888,
|
|
"grad_norm": 0.5576943035460122,
|
|
"learning_rate": 1.1587110223824874e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10974416136741638,
|
|
"step": 2975,
|
|
"valid_targets_mean": 4219.5,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 4.730738681493248,
|
|
"grad_norm": 0.6616519582919831,
|
|
"learning_rate": 1.151536644201925e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18830126523971558,
|
|
"step": 2980,
|
|
"valid_targets_mean": 4745.5,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 4.73868149324861,
|
|
"grad_norm": 0.636274534963714,
|
|
"learning_rate": 1.1443755555653751e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11045973747968674,
|
|
"step": 2985,
|
|
"valid_targets_mean": 2858.5,
|
|
"valid_targets_min": 1608
|
|
},
|
|
{
|
|
"epoch": 4.746624305003971,
|
|
"grad_norm": 0.6520658165264172,
|
|
"learning_rate": 1.1372278686374935e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08704160153865814,
|
|
"step": 2990,
|
|
"valid_targets_mean": 2876.5,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 4.754567116759333,
|
|
"grad_norm": 0.6575278282551669,
|
|
"learning_rate": 1.1300936953730273e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12461285293102264,
|
|
"step": 2995,
|
|
"valid_targets_mean": 2899.4,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 4.762509928514694,
|
|
"grad_norm": 0.6273750071102007,
|
|
"learning_rate": 1.1229731475150594e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1416163444519043,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3483.5,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 4.770452740270056,
|
|
"grad_norm": 0.5486040656279033,
|
|
"learning_rate": 1.1158663365932529e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10742324590682983,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3415.9,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 4.778395552025417,
|
|
"grad_norm": 0.6178686824385464,
|
|
"learning_rate": 1.1087733739221109e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17544828355312347,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4035.8,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 4.786338363780779,
|
|
"grad_norm": 0.6138862623372935,
|
|
"learning_rate": 1.1016943705992311e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15495526790618896,
|
|
"step": 3015,
|
|
"valid_targets_mean": 4168.0,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 4.79428117553614,
|
|
"grad_norm": 0.7272914524454224,
|
|
"learning_rate": 1.0946294375035639e-05,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17077623307704926,
|
|
"step": 3020,
|
|
"valid_targets_mean": 3443.2,
|
|
"valid_targets_min": 1447
|
|
},
|
|
{
|
|
"epoch": 4.802223987291502,
|
|
"grad_norm": 0.518967973686742,
|
|
"learning_rate": 1.087578685293674e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15811672806739807,
|
|
"step": 3025,
|
|
"valid_targets_mean": 5575.0,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 4.810166799046863,
|
|
"grad_norm": 0.7059415451661776,
|
|
"learning_rate": 1.080542224406015e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1632978320121765,
|
|
"step": 3030,
|
|
"valid_targets_mean": 3622.9,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 4.818109610802224,
|
|
"grad_norm": 0.6639884119755587,
|
|
"learning_rate": 1.0735201650531915e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1511397361755371,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3694.0,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 4.826052422557585,
|
|
"grad_norm": 0.6383922934496731,
|
|
"learning_rate": 1.066512617222235e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18669170141220093,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4856.9,
|
|
"valid_targets_min": 2262
|
|
},
|
|
{
|
|
"epoch": 4.833995234312947,
|
|
"grad_norm": 0.5638301887342296,
|
|
"learning_rate": 1.059519690672884e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09561211615800858,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3818.8,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 4.841938046068308,
|
|
"grad_norm": 0.7545148586957668,
|
|
"learning_rate": 1.0525414949358614e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18211820721626282,
|
|
"step": 3050,
|
|
"valid_targets_mean": 3581.1,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 4.84988085782367,
|
|
"grad_norm": 0.7964672178633107,
|
|
"learning_rate": 1.0455781393111613e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1444091498851776,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2166.6,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 4.857823669579031,
|
|
"grad_norm": 0.649238802688989,
|
|
"learning_rate": 1.0386297328663353e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17640064656734467,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3792.9,
|
|
"valid_targets_min": 2140
|
|
},
|
|
{
|
|
"epoch": 4.865766481334393,
|
|
"grad_norm": 0.6192051323747704,
|
|
"learning_rate": 1.0316963844347843e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21739062666893005,
|
|
"step": 3065,
|
|
"valid_targets_mean": 4151.0,
|
|
"valid_targets_min": 1681
|
|
},
|
|
{
|
|
"epoch": 4.873709293089754,
|
|
"grad_norm": 0.6906344130878459,
|
|
"learning_rate": 1.0247782026140576e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1661054790019989,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3769.9,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 4.881652104845115,
|
|
"grad_norm": 0.71186934204812,
|
|
"learning_rate": 1.017875295764144e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12687590718269348,
|
|
"step": 3075,
|
|
"valid_targets_mean": 2592.5,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 4.889594916600476,
|
|
"grad_norm": 0.5233899344768943,
|
|
"learning_rate": 1.0109877720057818e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588259279727936,
|
|
"step": 3080,
|
|
"valid_targets_mean": 7748.4,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 4.897537728355838,
|
|
"grad_norm": 0.6074421896578559,
|
|
"learning_rate": 1.0041157392187651e-05,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15932172536849976,
|
|
"step": 3085,
|
|
"valid_targets_mean": 4063.4,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 4.905480540111199,
|
|
"grad_norm": 0.5892391547169168,
|
|
"learning_rate": 9.972593050402471e-06,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14810599386692047,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4396.1,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 4.913423351866561,
|
|
"grad_norm": 0.5786869045693848,
|
|
"learning_rate": 9.904185768630612e-06,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11338655650615692,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4177.8,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 4.921366163621922,
|
|
"grad_norm": 0.6558806079174495,
|
|
"learning_rate": 9.835936618340377e-06,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09096279740333557,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2190.9,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 4.929308975377284,
|
|
"grad_norm": 0.5972481569048912,
|
|
"learning_rate": 9.76784666852323e-06,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09569524228572845,
|
|
"step": 3105,
|
|
"valid_targets_mean": 2860.4,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 4.937251787132645,
|
|
"grad_norm": 0.6801238744370699,
|
|
"learning_rate": 9.699916985677062e-06,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14781427383422852,
|
|
"step": 3110,
|
|
"valid_targets_mean": 4072.8,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 4.945194598888007,
|
|
"grad_norm": 0.5256892311936279,
|
|
"learning_rate": 9.6321486337895e-06,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15232108533382416,
|
|
"step": 3115,
|
|
"valid_targets_mean": 5149.6,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 4.953137410643368,
|
|
"grad_norm": 0.6924158012460585,
|
|
"learning_rate": 9.564542674321228e-06,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1249108612537384,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2912.8,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 4.96108022239873,
|
|
"grad_norm": 0.6510975037981102,
|
|
"learning_rate": 9.49710016618937e-06,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16331085562705994,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3948.2,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 4.96902303415409,
|
|
"grad_norm": 0.6124133174178378,
|
|
"learning_rate": 9.429822165750893e-06,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10189686715602875,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4481.2,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 4.976965845909452,
|
|
"grad_norm": 0.671362375432401,
|
|
"learning_rate": 9.36270972678607e-06,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11943770200014114,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3156.0,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 4.984908657664813,
|
|
"grad_norm": 0.5171414327505941,
|
|
"learning_rate": 9.295763900481977e-06,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1351032853126526,
|
|
"step": 3140,
|
|
"valid_targets_mean": 6699.6,
|
|
"valid_targets_min": 2158
|
|
},
|
|
{
|
|
"epoch": 4.992851469420175,
|
|
"grad_norm": 0.5642781059350356,
|
|
"learning_rate": 9.22898573541602e-06,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12088729441165924,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3684.5,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.8488687117208205,
|
|
"learning_rate": 9.162376277539513e-06,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22990277409553528,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3454.6,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 5.0079428117553615,
|
|
"grad_norm": 0.6417691562937197,
|
|
"learning_rate": 9.095936570161301e-06,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11928108334541321,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2880.4,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 5.015885623510723,
|
|
"grad_norm": 0.6592896832882444,
|
|
"learning_rate": 9.029667653931411e-06,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13904760777950287,
|
|
"step": 3160,
|
|
"valid_targets_mean": 3881.5,
|
|
"valid_targets_min": 1620
|
|
},
|
|
{
|
|
"epoch": 5.0238284352660845,
|
|
"grad_norm": 0.5996959109152341,
|
|
"learning_rate": 8.96357056682475e-06,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14394472539424896,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4899.2,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 5.031771247021446,
|
|
"grad_norm": 0.6812252440647568,
|
|
"learning_rate": 8.897646344124882e-06,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1388494372367859,
|
|
"step": 3170,
|
|
"valid_targets_mean": 3335.0,
|
|
"valid_targets_min": 1818
|
|
},
|
|
{
|
|
"epoch": 5.039714058776807,
|
|
"grad_norm": 0.8169528695816257,
|
|
"learning_rate": 8.83189601840773e-06,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19398635625839233,
|
|
"step": 3175,
|
|
"valid_targets_mean": 2896.1,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 5.047656870532168,
|
|
"grad_norm": 0.7756621891827723,
|
|
"learning_rate": 8.766320619525511e-06,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1332988739013672,
|
|
"step": 3180,
|
|
"valid_targets_mean": 2539.6,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 5.0555996822875295,
|
|
"grad_norm": 0.5307040504814844,
|
|
"learning_rate": 8.700921174590525e-06,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13750728964805603,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5335.2,
|
|
"valid_targets_min": 2337
|
|
},
|
|
{
|
|
"epoch": 5.063542494042891,
|
|
"grad_norm": 0.7001821405147436,
|
|
"learning_rate": 8.63569870795907e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16505572199821472,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3766.8,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 5.0714853057982525,
|
|
"grad_norm": 0.885102098073489,
|
|
"learning_rate": 8.570654241215466e-06,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13684722781181335,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3077.4,
|
|
"valid_targets_min": 1681
|
|
},
|
|
{
|
|
"epoch": 5.079428117553614,
|
|
"grad_norm": 0.5992172291842146,
|
|
"learning_rate": 8.505788793155978e-06,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07202945649623871,
|
|
"step": 3200,
|
|
"valid_targets_mean": 2406.0,
|
|
"valid_targets_min": 1359
|
|
},
|
|
{
|
|
"epoch": 5.0873709293089755,
|
|
"grad_norm": 0.6832812141448712,
|
|
"learning_rate": 8.441103379772893e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11512985080480576,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3163.1,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 5.095313741064337,
|
|
"grad_norm": 0.676074844153189,
|
|
"learning_rate": 8.376599014238605e-06,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.144090473651886,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3881.5,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 5.1032565528196985,
|
|
"grad_norm": 1.6938232525096706,
|
|
"learning_rate": 8.312276706889738e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12732970714569092,
|
|
"step": 3215,
|
|
"valid_targets_mean": 2224.1,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 5.11119936457506,
|
|
"grad_norm": 0.5469763527942626,
|
|
"learning_rate": 8.24813746521133e-06,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11759813129901886,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4771.9,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 5.119142176330421,
|
|
"grad_norm": 0.6381916471048728,
|
|
"learning_rate": 8.184182293821046e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1382191777229309,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4916.4,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 5.127084988085782,
|
|
"grad_norm": 0.576080251044729,
|
|
"learning_rate": 8.120412194453442e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14060620963573456,
|
|
"step": 3230,
|
|
"valid_targets_mean": 4258.0,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 5.1350277998411435,
|
|
"grad_norm": 0.7538138451781584,
|
|
"learning_rate": 8.056828165944282e-06,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1403239220380783,
|
|
"step": 3235,
|
|
"valid_targets_mean": 3031.5,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 5.142970611596505,
|
|
"grad_norm": 0.7376250768278632,
|
|
"learning_rate": 7.993431204214883e-06,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1426602602005005,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3194.8,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 5.1509134233518665,
|
|
"grad_norm": 0.6506826623386598,
|
|
"learning_rate": 7.93022230225652e-06,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18983808159828186,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3417.2,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 5.158856235107228,
|
|
"grad_norm": 0.5545392346951981,
|
|
"learning_rate": 7.867202450114892e-06,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08253845572471619,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3024.5,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 5.1667990468625895,
|
|
"grad_norm": 0.9311078949265009,
|
|
"learning_rate": 7.804372634874582e-06,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15619777143001556,
|
|
"step": 3255,
|
|
"valid_targets_mean": 2095.5,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 5.174741858617951,
|
|
"grad_norm": 0.7909642717338089,
|
|
"learning_rate": 7.74173384064359e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11523836851119995,
|
|
"step": 3260,
|
|
"valid_targets_mean": 2403.5,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 5.1826846703733125,
|
|
"grad_norm": 0.6954633530950496,
|
|
"learning_rate": 7.679287048537987e-06,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12310142070055008,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3756.9,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 5.190627482128674,
|
|
"grad_norm": 0.6890888845095153,
|
|
"learning_rate": 7.617033236666469e-06,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13686847686767578,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3254.8,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 5.198570293884035,
|
|
"grad_norm": 0.7108605004666501,
|
|
"learning_rate": 7.55497338011506e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10808290541172028,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2396.9,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 5.206513105639396,
|
|
"grad_norm": 0.6290692733481396,
|
|
"learning_rate": 7.493108450931879e-06,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273719072341919,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4452.1,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 5.2144559173947576,
|
|
"grad_norm": 0.6963945264566402,
|
|
"learning_rate": 7.4314394181118636e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11993305385112762,
|
|
"step": 3285,
|
|
"valid_targets_mean": 2820.4,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 5.222398729150119,
|
|
"grad_norm": 0.6414390125606636,
|
|
"learning_rate": 7.369967247581611e-06,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12122929841279984,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3451.1,
|
|
"valid_targets_min": 1841
|
|
},
|
|
{
|
|
"epoch": 5.2303415409054805,
|
|
"grad_norm": 0.7368665063832875,
|
|
"learning_rate": 7.3086929021842575e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312812715768814,
|
|
"step": 3295,
|
|
"valid_targets_mean": 2669.1,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 5.238284352660842,
|
|
"grad_norm": 0.5701366006309365,
|
|
"learning_rate": 7.247617341664384e-06,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1599208563566208,
|
|
"step": 3300,
|
|
"valid_targets_mean": 5545.6,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 5.2462271644162035,
|
|
"grad_norm": 0.5824793540760352,
|
|
"learning_rate": 7.186741522652994e-06,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11704738438129425,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3510.8,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 5.254169976171565,
|
|
"grad_norm": 0.6588674767927929,
|
|
"learning_rate": 7.12606639865252e-06,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13645845651626587,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3816.6,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 5.2621127879269265,
|
|
"grad_norm": 0.7838761046045937,
|
|
"learning_rate": 7.065592920021893e-06,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1516021490097046,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3377.5,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 5.270055599682287,
|
|
"grad_norm": 0.7533454233765863,
|
|
"learning_rate": 7.005322033961679e-06,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1092204749584198,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2370.1,
|
|
"valid_targets_min": 1994
|
|
},
|
|
{
|
|
"epoch": 5.277998411437649,
|
|
"grad_norm": 0.6100682519096765,
|
|
"learning_rate": 6.945254684499185e-06,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1232384443283081,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3789.4,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 5.28594122319301,
|
|
"grad_norm": 0.6361597523932505,
|
|
"learning_rate": 6.8853918124737274e-06,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310729682445526,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3844.6,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 5.293884034948372,
|
|
"grad_norm": 0.6743822156080563,
|
|
"learning_rate": 6.825734355521898e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11996262520551682,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3854.0,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 5.301826846703733,
|
|
"grad_norm": 0.5387136110448117,
|
|
"learning_rate": 6.766283248062817e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10124164074659348,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3678.1,
|
|
"valid_targets_min": 1784
|
|
},
|
|
{
|
|
"epoch": 5.3097696584590945,
|
|
"grad_norm": 0.6411127900130975,
|
|
"learning_rate": 6.707039421283559e-06,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15824384987354279,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4697.9,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 5.317712470214456,
|
|
"grad_norm": 0.6949417511473137,
|
|
"learning_rate": 6.648003803124559e-06,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14748156070709229,
|
|
"step": 3350,
|
|
"valid_targets_mean": 3436.9,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 5.3256552819698175,
|
|
"grad_norm": 0.5703622953420994,
|
|
"learning_rate": 6.589177318265047e-06,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11219599843025208,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3306.5,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 5.333598093725179,
|
|
"grad_norm": 0.5929934051393402,
|
|
"learning_rate": 6.53056088810857e-06,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1041344702243805,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3802.4,
|
|
"valid_targets_min": 2202
|
|
},
|
|
{
|
|
"epoch": 5.3415409054805405,
|
|
"grad_norm": 0.763624090052488,
|
|
"learning_rate": 6.472155430768608e-06,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13723434507846832,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3251.8,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 5.349483717235901,
|
|
"grad_norm": 0.6475625968604629,
|
|
"learning_rate": 6.413961861054132e-06,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15816320478916168,
|
|
"step": 3370,
|
|
"valid_targets_mean": 4234.1,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 5.357426528991263,
|
|
"grad_norm": 0.6888228000337783,
|
|
"learning_rate": 6.3559810904553095e-06,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11879372596740723,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3385.2,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 5.365369340746624,
|
|
"grad_norm": 0.7685335016652026,
|
|
"learning_rate": 6.298214027129219e-06,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10900894552469254,
|
|
"step": 3380,
|
|
"valid_targets_mean": 2475.1,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 5.373312152501986,
|
|
"grad_norm": 0.6920512624761661,
|
|
"learning_rate": 6.240661575885629e-06,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.112863689661026,
|
|
"step": 3385,
|
|
"valid_targets_mean": 2719.0,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 5.381254964257347,
|
|
"grad_norm": 0.6166693038122358,
|
|
"learning_rate": 6.183324638172819e-06,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13053856790065765,
|
|
"step": 3390,
|
|
"valid_targets_mean": 5216.5,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 5.3891977760127086,
|
|
"grad_norm": 0.716697379893855,
|
|
"learning_rate": 6.126204112063463e-06,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1251491904258728,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3535.0,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 5.39714058776807,
|
|
"grad_norm": 0.6045727080507581,
|
|
"learning_rate": 6.069300892240564e-06,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07281094044446945,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2630.9,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 5.4050833995234315,
|
|
"grad_norm": 0.7565973628453535,
|
|
"learning_rate": 6.0126158699834625e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1563147008419037,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3728.9,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 5.413026211278793,
|
|
"grad_norm": 0.6136930704944219,
|
|
"learning_rate": 5.956149933153816e-06,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1252826452255249,
|
|
"step": 3410,
|
|
"valid_targets_mean": 4620.1,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 5.420969023034154,
|
|
"grad_norm": 0.6800184742584949,
|
|
"learning_rate": 5.899903966181751e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11582130938768387,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2928.5,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 5.428911834789515,
|
|
"grad_norm": 0.5188583258943029,
|
|
"learning_rate": 5.843878850052007e-06,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12078991532325745,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5470.8,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 5.436854646544877,
|
|
"grad_norm": 0.8225739765784345,
|
|
"learning_rate": 5.788075462290084e-06,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16426506638526917,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2683.4,
|
|
"valid_targets_min": 1959
|
|
},
|
|
{
|
|
"epoch": 5.444797458300238,
|
|
"grad_norm": 0.6718516803180173,
|
|
"learning_rate": 5.732494676948554e-06,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09926658868789673,
|
|
"step": 3430,
|
|
"valid_targets_mean": 3025.5,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 5.4527402700556,
|
|
"grad_norm": 0.6345760566209969,
|
|
"learning_rate": 5.677137364593363e-06,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12900181114673615,
|
|
"step": 3435,
|
|
"valid_targets_mean": 4452.5,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 5.460683081810961,
|
|
"grad_norm": 0.47036724607132396,
|
|
"learning_rate": 5.622004392290163e-06,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10976912081241608,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4146.0,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 5.468625893566323,
|
|
"grad_norm": 0.6089588922476947,
|
|
"learning_rate": 5.567096623590758e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09953498840332031,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3032.6,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 5.476568705321684,
|
|
"grad_norm": 0.713031080079929,
|
|
"learning_rate": 5.512414918519573e-06,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14212429523468018,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3076.2,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 5.4845115170770455,
|
|
"grad_norm": 0.6819599443996747,
|
|
"learning_rate": 5.457960133560179e-06,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09896282851696014,
|
|
"step": 3455,
|
|
"valid_targets_mean": 2584.9,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 5.492454328832407,
|
|
"grad_norm": 0.6632878411274253,
|
|
"learning_rate": 5.403733121641883e-06,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13329169154167175,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3695.5,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 5.5003971405877685,
|
|
"grad_norm": 0.8301532336233985,
|
|
"learning_rate": 5.349734732126366e-06,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17388634383678436,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3416.8,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 5.508339952343129,
|
|
"grad_norm": 0.6672020545955284,
|
|
"learning_rate": 5.295965810794376e-06,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12524458765983582,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3335.9,
|
|
"valid_targets_min": 1810
|
|
},
|
|
{
|
|
"epoch": 5.516282764098491,
|
|
"grad_norm": 0.701159921151773,
|
|
"learning_rate": 5.2424271998324895e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17789173126220703,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3714.4,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.524225575853852,
|
|
"grad_norm": 0.6257299682998071,
|
|
"learning_rate": 5.189119737819912e-06,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14521852135658264,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3532.9,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 5.532168387609214,
|
|
"grad_norm": 0.6000844804722327,
|
|
"learning_rate": 5.136044259715342e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20075108110904694,
|
|
"step": 3485,
|
|
"valid_targets_mean": 6367.8,
|
|
"valid_targets_min": 2535
|
|
},
|
|
{
|
|
"epoch": 5.540111199364575,
|
|
"grad_norm": 0.6886381742383291,
|
|
"learning_rate": 5.083201596843905e-06,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14155739545822144,
|
|
"step": 3490,
|
|
"valid_targets_mean": 2835.0,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 5.548054011119937,
|
|
"grad_norm": 0.6350131795234889,
|
|
"learning_rate": 5.030592576884117e-06,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13660326600074768,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3475.6,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 5.555996822875298,
|
|
"grad_norm": 0.6026239654302747,
|
|
"learning_rate": 4.978218023854928e-06,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13190799951553345,
|
|
"step": 3500,
|
|
"valid_targets_mean": 5261.5,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 5.5639396346306595,
|
|
"grad_norm": 0.6373596629503311,
|
|
"learning_rate": 4.926078758102834e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14005126059055328,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3911.0,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 5.571882446386021,
|
|
"grad_norm": 0.6822946278495581,
|
|
"learning_rate": 4.87417559628897e-06,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12978987395763397,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3077.5,
|
|
"valid_targets_min": 1604
|
|
},
|
|
{
|
|
"epoch": 5.579825258141382,
|
|
"grad_norm": 0.6950097696720747,
|
|
"learning_rate": 4.822509351376399e-06,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12859278917312622,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3027.2,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 5.587768069896743,
|
|
"grad_norm": 0.6087507753513112,
|
|
"learning_rate": 4.7710808326173115e-06,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0860581025481224,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2688.6,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 5.595710881652105,
|
|
"grad_norm": 0.6144992974212321,
|
|
"learning_rate": 4.719890845540385e-06,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10921843349933624,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3150.5,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 5.603653693407466,
|
|
"grad_norm": 0.6002830846669975,
|
|
"learning_rate": 4.668940191938156e-06,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1493244171142578,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3842.5,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 5.611596505162828,
|
|
"grad_norm": 0.6773984102735725,
|
|
"learning_rate": 4.618229669854464e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13071127235889435,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3630.4,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 5.619539316918189,
|
|
"grad_norm": 0.7598254893000209,
|
|
"learning_rate": 4.567760073571947e-06,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14917224645614624,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3090.2,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 5.627482128673551,
|
|
"grad_norm": 0.6860811308404915,
|
|
"learning_rate": 4.51753219359961e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1476401537656784,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4681.4,
|
|
"valid_targets_min": 2039
|
|
},
|
|
{
|
|
"epoch": 5.635424940428912,
|
|
"grad_norm": 0.6488254977233069,
|
|
"learning_rate": 4.467546816660433e-06,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338762640953064,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3832.4,
|
|
"valid_targets_min": 2224
|
|
},
|
|
{
|
|
"epoch": 5.6433677521842736,
|
|
"grad_norm": 0.5385248148147501,
|
|
"learning_rate": 4.417804725679058e-06,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13300855457782745,
|
|
"step": 3555,
|
|
"valid_targets_mean": 4987.1,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 5.651310563939635,
|
|
"grad_norm": 0.6312304902453971,
|
|
"learning_rate": 4.368306699769518e-06,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12525643408298492,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3735.2,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 5.659253375694996,
|
|
"grad_norm": 0.5838037199889438,
|
|
"learning_rate": 4.319053514223033e-06,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10277433693408966,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4321.8,
|
|
"valid_targets_min": 1762
|
|
},
|
|
{
|
|
"epoch": 5.667196187450357,
|
|
"grad_norm": 0.7347635129439717,
|
|
"learning_rate": 4.270045940495879e-06,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1876486986875534,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3659.5,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 5.675138999205719,
|
|
"grad_norm": 0.576784322977642,
|
|
"learning_rate": 4.221284746197292e-06,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1669253259897232,
|
|
"step": 3575,
|
|
"valid_targets_mean": 6618.8,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 5.68308181096108,
|
|
"grad_norm": 0.7766778335600155,
|
|
"learning_rate": 4.172770695077437e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1125335618853569,
|
|
"step": 3580,
|
|
"valid_targets_mean": 2202.2,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 5.691024622716442,
|
|
"grad_norm": 0.6977988856259152,
|
|
"learning_rate": 4.124504547015487e-06,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13138900697231293,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3518.2,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 5.698967434471803,
|
|
"grad_norm": 0.6314293053618897,
|
|
"learning_rate": 4.0764870580076675e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13971345126628876,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4597.2,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 5.706910246227165,
|
|
"grad_norm": 0.675369818531817,
|
|
"learning_rate": 4.0287189801554304e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19295042753219604,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4507.1,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 5.714853057982526,
|
|
"grad_norm": 0.7132562022144076,
|
|
"learning_rate": 3.98120106165371e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22377793490886688,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4235.1,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 5.722795869737888,
|
|
"grad_norm": 0.6484839145600759,
|
|
"learning_rate": 3.933934046779164e-06,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09109803289175034,
|
|
"step": 3605,
|
|
"valid_targets_mean": 2705.5,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 5.730738681493248,
|
|
"grad_norm": 0.6963895584955365,
|
|
"learning_rate": 3.886918675878513e-06,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12476445734500885,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3057.5,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 5.73868149324861,
|
|
"grad_norm": 0.6001286619621683,
|
|
"learning_rate": 3.840155685356983e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11611980944871902,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3899.8,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 5.746624305003971,
|
|
"grad_norm": 1.169231015096942,
|
|
"learning_rate": 3.793645807666735e-06,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11462537944316864,
|
|
"step": 3620,
|
|
"valid_targets_mean": 4545.6,
|
|
"valid_targets_min": 1868
|
|
},
|
|
{
|
|
"epoch": 5.754567116759333,
|
|
"grad_norm": 0.6353754400070897,
|
|
"learning_rate": 3.747389771295411e-06,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1402149498462677,
|
|
"step": 3625,
|
|
"valid_targets_mean": 4694.4,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 5.762509928514694,
|
|
"grad_norm": 0.5260812852047829,
|
|
"learning_rate": 3.701388300754709e-06,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13576026260852814,
|
|
"step": 3630,
|
|
"valid_targets_mean": 5659.4,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 5.770452740270056,
|
|
"grad_norm": 0.5660752501929373,
|
|
"learning_rate": 3.6556421165690516e-06,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10652667284011841,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3694.4,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 5.778395552025417,
|
|
"grad_norm": 0.6372666131943194,
|
|
"learning_rate": 3.610151935264288e-06,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293216049671173,
|
|
"step": 3640,
|
|
"valid_targets_mean": 4715.8,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 5.786338363780779,
|
|
"grad_norm": 0.56773731656031,
|
|
"learning_rate": 3.5649184693564797e-06,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09831590950489044,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3524.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 5.79428117553614,
|
|
"grad_norm": 0.6711443795910267,
|
|
"learning_rate": 3.5199424273407277e-06,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12363112717866898,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3576.5,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 5.802223987291502,
|
|
"grad_norm": 0.7555744568539098,
|
|
"learning_rate": 3.4752245136801065e-06,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15189997851848602,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3480.8,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 5.810166799046863,
|
|
"grad_norm": 0.6771361488038231,
|
|
"learning_rate": 3.430765428794569e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2114054411649704,
|
|
"step": 3660,
|
|
"valid_targets_mean": 4869.5,
|
|
"valid_targets_min": 1924
|
|
},
|
|
{
|
|
"epoch": 5.818109610802224,
|
|
"grad_norm": 0.9117501526962264,
|
|
"learning_rate": 3.3865658690500424e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14365345239639282,
|
|
"step": 3665,
|
|
"valid_targets_mean": 2838.8,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 5.826052422557585,
|
|
"grad_norm": 0.5501361674844034,
|
|
"learning_rate": 3.34262652674749e-06,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1277163326740265,
|
|
"step": 3670,
|
|
"valid_targets_mean": 5800.6,
|
|
"valid_targets_min": 2971
|
|
},
|
|
{
|
|
"epoch": 5.833995234312947,
|
|
"grad_norm": 0.5925457701348842,
|
|
"learning_rate": 3.2989480901120684e-06,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12111067026853561,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4192.2,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 5.841938046068308,
|
|
"grad_norm": 0.6758627581371583,
|
|
"learning_rate": 3.2555312432823283e-06,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1346399039030075,
|
|
"step": 3680,
|
|
"valid_targets_mean": 4890.4,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 5.84988085782367,
|
|
"grad_norm": 0.6345451284565592,
|
|
"learning_rate": 3.2123766662995572e-06,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13173308968544006,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4385.4,
|
|
"valid_targets_min": 1833
|
|
},
|
|
{
|
|
"epoch": 5.857823669579031,
|
|
"grad_norm": 0.6734274604719634,
|
|
"learning_rate": 3.1694850350970686e-06,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09148533642292023,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3135.4,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 5.865766481334393,
|
|
"grad_norm": 0.5544116019676949,
|
|
"learning_rate": 3.1268570214896265e-06,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11533394455909729,
|
|
"step": 3695,
|
|
"valid_targets_mean": 3735.2,
|
|
"valid_targets_min": 2434
|
|
},
|
|
{
|
|
"epoch": 5.873709293089754,
|
|
"grad_norm": 0.6006503514854309,
|
|
"learning_rate": 3.0844932931629602e-06,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16129495203495026,
|
|
"step": 3700,
|
|
"valid_targets_mean": 4876.5,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 5.881652104845115,
|
|
"grad_norm": 0.5785445239355657,
|
|
"learning_rate": 3.0423945136632626e-06,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10279454290866852,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3357.2,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 5.889594916600476,
|
|
"grad_norm": 0.6233234517516338,
|
|
"learning_rate": 3.000561342386814e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1142657995223999,
|
|
"step": 3710,
|
|
"valid_targets_mean": 4096.0,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 5.897537728355838,
|
|
"grad_norm": 0.5077563186369421,
|
|
"learning_rate": 2.9589944345696596e-06,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1441415548324585,
|
|
"step": 3715,
|
|
"valid_targets_mean": 5366.6,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 5.905480540111199,
|
|
"grad_norm": 0.5528848245566419,
|
|
"learning_rate": 2.9176944412773322e-06,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11273219436407089,
|
|
"step": 3720,
|
|
"valid_targets_mean": 4272.8,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 5.913423351866561,
|
|
"grad_norm": 0.5882521103507894,
|
|
"learning_rate": 2.876662009394673e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17985671758651733,
|
|
"step": 3725,
|
|
"valid_targets_mean": 5863.9,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 5.921366163621922,
|
|
"grad_norm": 0.5778077638069691,
|
|
"learning_rate": 2.8358977816156796e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13926495611667633,
|
|
"step": 3730,
|
|
"valid_targets_mean": 5245.4,
|
|
"valid_targets_min": 2359
|
|
},
|
|
{
|
|
"epoch": 5.929308975377284,
|
|
"grad_norm": 0.6790669228308279,
|
|
"learning_rate": 2.7954023964334485e-06,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14889037609100342,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4088.5,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 5.937251787132645,
|
|
"grad_norm": 0.6208217165871712,
|
|
"learning_rate": 2.7551764881301955e-06,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18381306529045105,
|
|
"step": 3740,
|
|
"valid_targets_mean": 5024.2,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 5.945194598888007,
|
|
"grad_norm": 0.5944428645300028,
|
|
"learning_rate": 2.715220686767268e-06,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07099418342113495,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4096.1,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 5.953137410643368,
|
|
"grad_norm": 0.6656126132491592,
|
|
"learning_rate": 2.6755356181753247e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13460765779018402,
|
|
"step": 3750,
|
|
"valid_targets_mean": 4540.9,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 5.96108022239873,
|
|
"grad_norm": 0.6649201378723495,
|
|
"learning_rate": 2.6361219039445328e-06,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09948737919330597,
|
|
"step": 3755,
|
|
"valid_targets_mean": 2582.2,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 5.96902303415409,
|
|
"grad_norm": 0.6325298728128234,
|
|
"learning_rate": 2.5969801614147838e-06,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15995225310325623,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4817.0,
|
|
"valid_targets_min": 1529
|
|
},
|
|
{
|
|
"epoch": 5.976965845909452,
|
|
"grad_norm": 0.6647369911112364,
|
|
"learning_rate": 2.558111003666075e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16091850399971008,
|
|
"step": 3765,
|
|
"valid_targets_mean": 4383.2,
|
|
"valid_targets_min": 2270
|
|
},
|
|
{
|
|
"epoch": 5.984908657664813,
|
|
"grad_norm": 0.8074236880073399,
|
|
"learning_rate": 2.519515039508893e-06,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12991711497306824,
|
|
"step": 3770,
|
|
"valid_targets_mean": 2659.5,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 5.992851469420175,
|
|
"grad_norm": 0.5988619468633295,
|
|
"learning_rate": 2.481192873474667e-06,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14527486264705658,
|
|
"step": 3775,
|
|
"valid_targets_mean": 4857.4,
|
|
"valid_targets_min": 1481
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.6277359077438609,
|
|
"learning_rate": 2.4431451058062928e-06,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19746717810630798,
|
|
"step": 3780,
|
|
"valid_targets_mean": 6386.4,
|
|
"valid_targets_min": 2129
|
|
},
|
|
{
|
|
"epoch": 6.0079428117553615,
|
|
"grad_norm": 0.641636951719754,
|
|
"learning_rate": 2.4053723324487677e-06,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09486865252256393,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3332.0,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 6.015885623510723,
|
|
"grad_norm": 0.6524442612886401,
|
|
"learning_rate": 2.3678751450398196e-06,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14953841269016266,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3894.8,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 6.0238284352660845,
|
|
"grad_norm": 0.8405551556164046,
|
|
"learning_rate": 2.330654130900656e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18554316461086273,
|
|
"step": 3795,
|
|
"valid_targets_mean": 3528.6,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 6.031771247021446,
|
|
"grad_norm": 0.6973173200752549,
|
|
"learning_rate": 2.2937098730267572e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11074613034725189,
|
|
"step": 3800,
|
|
"valid_targets_mean": 2983.2,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 6.039714058776807,
|
|
"grad_norm": 0.650607424133339,
|
|
"learning_rate": 2.2570429500787604e-06,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16393336653709412,
|
|
"step": 3805,
|
|
"valid_targets_mean": 4861.2,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 6.047656870532168,
|
|
"grad_norm": 0.7578932008578364,
|
|
"learning_rate": 2.2206539363733738e-06,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12462211400270462,
|
|
"step": 3810,
|
|
"valid_targets_mean": 2459.2,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 6.0555996822875295,
|
|
"grad_norm": 0.7588710779184077,
|
|
"learning_rate": 2.1845434018744038e-06,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259050965309143,
|
|
"step": 3815,
|
|
"valid_targets_mean": 2526.6,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 6.063542494042891,
|
|
"grad_norm": 0.6732311301753651,
|
|
"learning_rate": 2.148711912183803e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10093998908996582,
|
|
"step": 3820,
|
|
"valid_targets_mean": 2926.2,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 6.0714853057982525,
|
|
"grad_norm": 0.6184981160589109,
|
|
"learning_rate": 2.1131600285328458e-06,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10469699651002884,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3771.5,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 6.079428117553614,
|
|
"grad_norm": 0.7117891505427676,
|
|
"learning_rate": 2.0778883077732903e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17986169457435608,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4144.8,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 6.0873709293089755,
|
|
"grad_norm": 0.6678543925013521,
|
|
"learning_rate": 2.0428973023686983e-06,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11596953868865967,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3690.9,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 6.095313741064337,
|
|
"grad_norm": 0.513281998563933,
|
|
"learning_rate": 2.0081875603857726e-06,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05351889878511429,
|
|
"step": 3840,
|
|
"valid_targets_mean": 2834.9,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 6.1032565528196985,
|
|
"grad_norm": 0.7039194005040196,
|
|
"learning_rate": 1.973759625485743e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10084855556488037,
|
|
"step": 3845,
|
|
"valid_targets_mean": 2775.8,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 6.11119936457506,
|
|
"grad_norm": 0.6325845133135739,
|
|
"learning_rate": 1.9396140369159e-06,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08992412686347961,
|
|
"step": 3850,
|
|
"valid_targets_mean": 3172.5,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 6.119142176330421,
|
|
"grad_norm": 0.6507781045236065,
|
|
"learning_rate": 1.9057513295011087e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1008429080247879,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3087.2,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 6.127084988085782,
|
|
"grad_norm": 0.501688331473237,
|
|
"learning_rate": 1.8721720336354487e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15006273984909058,
|
|
"step": 3860,
|
|
"valid_targets_mean": 7176.2,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 6.1350277998411435,
|
|
"grad_norm": 0.6703435111699011,
|
|
"learning_rate": 1.8388766752739017e-06,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14873436093330383,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3662.8,
|
|
"valid_targets_min": 2061
|
|
},
|
|
{
|
|
"epoch": 6.142970611596505,
|
|
"grad_norm": 0.5648084636753882,
|
|
"learning_rate": 1.805865775924116e-06,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0886387825012207,
|
|
"step": 3870,
|
|
"valid_targets_mean": 3476.1,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 6.1509134233518665,
|
|
"grad_norm": 0.7521652954540752,
|
|
"learning_rate": 1.7731398526382416e-06,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12744367122650146,
|
|
"step": 3875,
|
|
"valid_targets_mean": 2963.0,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 6.158856235107228,
|
|
"grad_norm": 0.6506785499698352,
|
|
"learning_rate": 1.7406994180048231e-06,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12952809035778046,
|
|
"step": 3880,
|
|
"valid_targets_mean": 5088.5,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 6.1667990468625895,
|
|
"grad_norm": 0.8182739278660189,
|
|
"learning_rate": 1.7085449801407783e-06,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1380365788936615,
|
|
"step": 3885,
|
|
"valid_targets_mean": 2992.9,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 6.174741858617951,
|
|
"grad_norm": 0.8794151248457893,
|
|
"learning_rate": 1.67667704268343e-06,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13946077227592468,
|
|
"step": 3890,
|
|
"valid_targets_mean": 2898.5,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 6.1826846703733125,
|
|
"grad_norm": 0.7956062147868668,
|
|
"learning_rate": 1.6450961047826353e-06,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12268726527690887,
|
|
"step": 3895,
|
|
"valid_targets_mean": 2514.1,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 6.190627482128674,
|
|
"grad_norm": 0.5847394891787983,
|
|
"learning_rate": 1.6138026610929446e-06,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10958193987607956,
|
|
"step": 3900,
|
|
"valid_targets_mean": 4538.8,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 6.198570293884035,
|
|
"grad_norm": 0.7756902282782346,
|
|
"learning_rate": 1.5827972017658732e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1292639672756195,
|
|
"step": 3905,
|
|
"valid_targets_mean": 2877.9,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 6.206513105639396,
|
|
"grad_norm": 0.5486487706857012,
|
|
"learning_rate": 1.5520802124422108e-06,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1298564374446869,
|
|
"step": 3910,
|
|
"valid_targets_mean": 5329.5,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 6.2144559173947576,
|
|
"grad_norm": 0.6907376050427664,
|
|
"learning_rate": 1.5216521742444236e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16815900802612305,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3737.8,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 6.222398729150119,
|
|
"grad_norm": 0.6861010697263873,
|
|
"learning_rate": 1.491513563769118e-06,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0954948142170906,
|
|
"step": 3920,
|
|
"valid_targets_mean": 2688.2,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 6.2303415409054805,
|
|
"grad_norm": 0.6832507980660436,
|
|
"learning_rate": 1.4616648530795673e-06,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11932156980037689,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3584.5,
|
|
"valid_targets_min": 1857
|
|
},
|
|
{
|
|
"epoch": 6.238284352660842,
|
|
"grad_norm": 0.640514263624275,
|
|
"learning_rate": 1.432106509698319e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10601985454559326,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3698.4,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 6.2462271644162035,
|
|
"grad_norm": 0.6173303733901818,
|
|
"learning_rate": 1.4028389965998867e-06,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08571086823940277,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3341.5,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 6.254169976171565,
|
|
"grad_norm": 0.6033941186834164,
|
|
"learning_rate": 1.3738627722034848e-06,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2102654129266739,
|
|
"step": 3940,
|
|
"valid_targets_mean": 5668.1,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 6.2621127879269265,
|
|
"grad_norm": 0.5645723180307605,
|
|
"learning_rate": 1.345178290365845e-06,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11399981379508972,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4758.2,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 6.270055599682287,
|
|
"grad_norm": 0.6222700069440545,
|
|
"learning_rate": 1.3167860003741218e-06,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11566272377967834,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3193.1,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 6.277998411437649,
|
|
"grad_norm": 0.6871692551675775,
|
|
"learning_rate": 1.2886863469388389e-06,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12112772464752197,
|
|
"step": 3955,
|
|
"valid_targets_mean": 3684.5,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 6.28594122319301,
|
|
"grad_norm": 0.6121253223135817,
|
|
"learning_rate": 1.2608797701869425e-06,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15826129913330078,
|
|
"step": 3960,
|
|
"valid_targets_mean": 5276.1,
|
|
"valid_targets_min": 1657
|
|
},
|
|
{
|
|
"epoch": 6.293884034948372,
|
|
"grad_norm": 0.6795150166589712,
|
|
"learning_rate": 1.2333667056548881e-06,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11267130076885223,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3578.9,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 6.301826846703733,
|
|
"grad_norm": 0.5541878650212809,
|
|
"learning_rate": 1.2061475842818337e-06,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09319373965263367,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3595.2,
|
|
"valid_targets_min": 1454
|
|
},
|
|
{
|
|
"epoch": 6.3097696584590945,
|
|
"grad_norm": 0.6753327473315007,
|
|
"learning_rate": 1.1792228324028776e-06,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10827286541461945,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3041.1,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 6.317712470214456,
|
|
"grad_norm": 0.7561153144861177,
|
|
"learning_rate": 1.152592871742395e-06,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1615794450044632,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3939.2,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 6.3256552819698175,
|
|
"grad_norm": 0.5665202748946662,
|
|
"learning_rate": 1.1262581194074152e-06,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11758914589881897,
|
|
"step": 3985,
|
|
"valid_targets_mean": 3781.6,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 6.333598093725179,
|
|
"grad_norm": 2.9169164776756165,
|
|
"learning_rate": 1.100218987881112e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1717279553413391,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3429.5,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 6.3415409054805405,
|
|
"grad_norm": 0.6744128197908981,
|
|
"learning_rate": 1.0744758850163085e-06,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13215947151184082,
|
|
"step": 3995,
|
|
"valid_targets_mean": 3841.0,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 6.349483717235901,
|
|
"grad_norm": 0.6389774178717454,
|
|
"learning_rate": 1.0490292140291247e-06,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09915691614151001,
|
|
"step": 4000,
|
|
"valid_targets_mean": 2507.4,
|
|
"valid_targets_min": 1775
|
|
},
|
|
{
|
|
"epoch": 6.357426528991263,
|
|
"grad_norm": 0.6591168964092291,
|
|
"learning_rate": 1.0238793734926467e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11747846007347107,
|
|
"step": 4005,
|
|
"valid_targets_mean": 2590.4,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 6.365369340746624,
|
|
"grad_norm": 0.7129197214398482,
|
|
"learning_rate": 9.990267573306745e-07,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12227798998355865,
|
|
"step": 4010,
|
|
"valid_targets_mean": 2572.2,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 6.373312152501986,
|
|
"grad_norm": 0.7665579256161716,
|
|
"learning_rate": 9.744717548115613e-07,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15173032879829407,
|
|
"step": 4015,
|
|
"valid_targets_mean": 3267.1,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 6.381254964257347,
|
|
"grad_norm": 0.7128340766891623,
|
|
"learning_rate": 9.502147505421244e-07,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15255656838417053,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3052.4,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 6.3891977760127086,
|
|
"grad_norm": 0.6023264016746027,
|
|
"learning_rate": 9.262561244616108e-07,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08786147832870483,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3371.6,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 6.39714058776807,
|
|
"grad_norm": 0.6722942450859356,
|
|
"learning_rate": 9.025962518357323e-07,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13647809624671936,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4137.5,
|
|
"valid_targets_min": 2109
|
|
},
|
|
{
|
|
"epoch": 6.4050833995234315,
|
|
"grad_norm": 0.6150781642395031,
|
|
"learning_rate": 8.792355032508282e-07,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13010865449905396,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4452.0,
|
|
"valid_targets_min": 1541
|
|
},
|
|
{
|
|
"epoch": 6.413026211278793,
|
|
"grad_norm": 0.6019778533740011,
|
|
"learning_rate": 8.561742446080168e-07,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10333382338285446,
|
|
"step": 4040,
|
|
"valid_targets_mean": 2884.2,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 6.420969023034154,
|
|
"grad_norm": 0.6146180830823612,
|
|
"learning_rate": 8.334128371174955e-07,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1671912670135498,
|
|
"step": 4045,
|
|
"valid_targets_mean": 5472.5,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 6.428911834789515,
|
|
"grad_norm": 0.7062230990164255,
|
|
"learning_rate": 8.109516372928605e-07,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12950077652931213,
|
|
"step": 4050,
|
|
"valid_targets_mean": 3174.1,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 6.436854646544877,
|
|
"grad_norm": 0.7214617238365126,
|
|
"learning_rate": 7.887909969455366e-07,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1328839212656021,
|
|
"step": 4055,
|
|
"valid_targets_mean": 3570.6,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 6.444797458300238,
|
|
"grad_norm": 0.6457785395756762,
|
|
"learning_rate": 7.669312631792758e-07,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10522368550300598,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3637.6,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 6.4527402700556,
|
|
"grad_norm": 0.595930776661547,
|
|
"learning_rate": 7.453727783846876e-07,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09058021008968353,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3794.4,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 6.460683081810961,
|
|
"grad_norm": 0.6971270557036204,
|
|
"learning_rate": 7.241158802339065e-07,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18727661669254303,
|
|
"step": 4070,
|
|
"valid_targets_mean": 4233.4,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 6.468625893566323,
|
|
"grad_norm": 0.6672329720848373,
|
|
"learning_rate": 7.031609016753016e-07,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11701859533786774,
|
|
"step": 4075,
|
|
"valid_targets_mean": 2810.5,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 6.476568705321684,
|
|
"grad_norm": 0.7462908323613261,
|
|
"learning_rate": 6.825081709282377e-07,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13601046800613403,
|
|
"step": 4080,
|
|
"valid_targets_mean": 3466.4,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 6.4845115170770455,
|
|
"grad_norm": 0.5474374299193564,
|
|
"learning_rate": 6.62158011477958e-07,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12538829445838928,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4728.5,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 6.492454328832407,
|
|
"grad_norm": 0.6629202742686043,
|
|
"learning_rate": 6.421107420705097e-07,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10946807265281677,
|
|
"step": 4090,
|
|
"valid_targets_mean": 3915.9,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 6.5003971405877685,
|
|
"grad_norm": 0.6339554944807237,
|
|
"learning_rate": 6.223666767077508e-07,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12790630757808685,
|
|
"step": 4095,
|
|
"valid_targets_mean": 3970.2,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 6.508339952343129,
|
|
"grad_norm": 0.669877328163416,
|
|
"learning_rate": 6.029261246424267e-07,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15007254481315613,
|
|
"step": 4100,
|
|
"valid_targets_mean": 4533.4,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 6.516282764098491,
|
|
"grad_norm": 0.5901457529283185,
|
|
"learning_rate": 5.837893903733394e-07,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12029819935560226,
|
|
"step": 4105,
|
|
"valid_targets_mean": 4405.9,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 6.524225575853852,
|
|
"grad_norm": 0.49021907140231863,
|
|
"learning_rate": 5.649567736405681e-07,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13696420192718506,
|
|
"step": 4110,
|
|
"valid_targets_mean": 6203.4,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 6.532168387609214,
|
|
"grad_norm": 0.6433297711590561,
|
|
"learning_rate": 5.464285694207672e-07,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15622669458389282,
|
|
"step": 4115,
|
|
"valid_targets_mean": 4412.5,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 6.540111199364575,
|
|
"grad_norm": 0.6816510159846974,
|
|
"learning_rate": 5.282050679225714e-07,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12246264517307281,
|
|
"step": 4120,
|
|
"valid_targets_mean": 3451.8,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 6.548054011119937,
|
|
"grad_norm": 0.7982488580855668,
|
|
"learning_rate": 5.102865545820245e-07,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12986546754837036,
|
|
"step": 4125,
|
|
"valid_targets_mean": 2871.6,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 6.555996822875298,
|
|
"grad_norm": 0.706570044564291,
|
|
"learning_rate": 4.926733100581182e-07,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12330260872840881,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3003.6,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 6.5639396346306595,
|
|
"grad_norm": 0.5648563728701836,
|
|
"learning_rate": 4.7536561022840213e-07,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13795113563537598,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4265.2,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 6.571882446386021,
|
|
"grad_norm": 0.5676961356962579,
|
|
"learning_rate": 4.5836372618464964e-07,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08417010307312012,
|
|
"step": 4140,
|
|
"valid_targets_mean": 2970.1,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 6.579825258141382,
|
|
"grad_norm": 0.664666593469388,
|
|
"learning_rate": 4.416679242286215e-07,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13572648167610168,
|
|
"step": 4145,
|
|
"valid_targets_mean": 4012.8,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 6.587768069896743,
|
|
"grad_norm": 0.7661839951958322,
|
|
"learning_rate": 4.2527846586789547e-07,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1681165099143982,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3993.5,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 6.595710881652105,
|
|
"grad_norm": 0.6640335668623013,
|
|
"learning_rate": 4.0919560781176317e-07,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10876567661762238,
|
|
"step": 4155,
|
|
"valid_targets_mean": 3877.1,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 6.603653693407466,
|
|
"grad_norm": 0.6746924340257882,
|
|
"learning_rate": 3.934196019672176e-07,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1876063346862793,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3577.4,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 6.611596505162828,
|
|
"grad_norm": 0.7126809296480972,
|
|
"learning_rate": 3.779506954349965e-07,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13389210402965546,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3279.4,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 6.619539316918189,
|
|
"grad_norm": 0.6057245437846794,
|
|
"learning_rate": 3.6278913050572076e-07,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06768558919429779,
|
|
"step": 4170,
|
|
"valid_targets_mean": 2370.8,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 6.627482128673551,
|
|
"grad_norm": 0.6071813584962561,
|
|
"learning_rate": 3.4793514465610414e-07,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12199762463569641,
|
|
"step": 4175,
|
|
"valid_targets_mean": 4811.2,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 6.635424940428912,
|
|
"grad_norm": 0.6336630452033049,
|
|
"learning_rate": 3.3338897054521205e-07,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10481952130794525,
|
|
"step": 4180,
|
|
"valid_targets_mean": 3601.0,
|
|
"valid_targets_min": 1608
|
|
},
|
|
{
|
|
"epoch": 6.6433677521842736,
|
|
"grad_norm": 0.48173871903281434,
|
|
"learning_rate": 3.191508360108464e-07,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13259437680244446,
|
|
"step": 4185,
|
|
"valid_targets_mean": 7524.5,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 6.651310563939635,
|
|
"grad_norm": 0.8342094259344457,
|
|
"learning_rate": 3.0522096406595536e-07,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0910007432103157,
|
|
"step": 4190,
|
|
"valid_targets_mean": 2747.2,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 6.659253375694996,
|
|
"grad_norm": 0.728224243047748,
|
|
"learning_rate": 2.9159957289514926e-07,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19000819325447083,
|
|
"step": 4195,
|
|
"valid_targets_mean": 4078.4,
|
|
"valid_targets_min": 2022
|
|
},
|
|
{
|
|
"epoch": 6.667196187450357,
|
|
"grad_norm": 0.7503314256804412,
|
|
"learning_rate": 2.782868758512791e-07,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16389736533164978,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3547.9,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 6.675138999205719,
|
|
"grad_norm": 0.6108833582039339,
|
|
"learning_rate": 2.6528308145210125e-07,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14255298674106598,
|
|
"step": 4205,
|
|
"valid_targets_mean": 4582.0,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 6.68308181096108,
|
|
"grad_norm": 0.6098418406002423,
|
|
"learning_rate": 2.525883933770046e-07,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13821999728679657,
|
|
"step": 4210,
|
|
"valid_targets_mean": 5306.2,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 6.691024622716442,
|
|
"grad_norm": 0.6718985153700444,
|
|
"learning_rate": 2.402030104638198e-07,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14344651997089386,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3949.0,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 6.698967434471803,
|
|
"grad_norm": 0.6833207345270466,
|
|
"learning_rate": 2.2812712670571502e-07,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11337868869304657,
|
|
"step": 4220,
|
|
"valid_targets_mean": 3246.8,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 6.706910246227165,
|
|
"grad_norm": 0.5894998479426262,
|
|
"learning_rate": 2.1636093124814738e-07,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11025926470756531,
|
|
"step": 4225,
|
|
"valid_targets_mean": 4266.4,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 6.714853057982526,
|
|
"grad_norm": 0.6546454048678895,
|
|
"learning_rate": 2.0490460838589855e-07,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10796785354614258,
|
|
"step": 4230,
|
|
"valid_targets_mean": 2939.5,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 6.722795869737888,
|
|
"grad_norm": 0.6873845776084513,
|
|
"learning_rate": 1.9375833756019923e-07,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12979376316070557,
|
|
"step": 4235,
|
|
"valid_targets_mean": 3678.1,
|
|
"valid_targets_min": 1816
|
|
},
|
|
{
|
|
"epoch": 6.730738681493248,
|
|
"grad_norm": 0.7381452506465538,
|
|
"learning_rate": 1.8292229335590716e-07,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10957353562116623,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3481.6,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 6.73868149324861,
|
|
"grad_norm": 0.7458945929792611,
|
|
"learning_rate": 1.7239664549878688e-07,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19980651140213013,
|
|
"step": 4245,
|
|
"valid_targets_mean": 4440.9,
|
|
"valid_targets_min": 2414
|
|
},
|
|
{
|
|
"epoch": 6.746624305003971,
|
|
"grad_norm": 0.7438788441524954,
|
|
"learning_rate": 1.6218155885283192e-07,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256391406059265,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3194.4,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 6.754567116759333,
|
|
"grad_norm": 0.6582301217158086,
|
|
"learning_rate": 1.5227719341769364e-07,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12737402319908142,
|
|
"step": 4255,
|
|
"valid_targets_mean": 4042.8,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 6.762509928514694,
|
|
"grad_norm": 0.6906390548882704,
|
|
"learning_rate": 1.4268370432618306e-07,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14591683447360992,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3928.8,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 6.770452740270056,
|
|
"grad_norm": 0.6372396341984271,
|
|
"learning_rate": 1.3340124184182178e-07,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11711558699607849,
|
|
"step": 4265,
|
|
"valid_targets_mean": 4255.8,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 6.778395552025417,
|
|
"grad_norm": 0.6473699654623891,
|
|
"learning_rate": 1.2442995135650393e-07,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1187758594751358,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3958.9,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 6.786338363780779,
|
|
"grad_norm": 0.5947042012780452,
|
|
"learning_rate": 1.1576997338821339e-07,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11414189636707306,
|
|
"step": 4275,
|
|
"valid_targets_mean": 4203.9,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 6.79428117553614,
|
|
"grad_norm": 0.6751239327395427,
|
|
"learning_rate": 1.0742144357882567e-07,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12792208790779114,
|
|
"step": 4280,
|
|
"valid_targets_mean": 3692.2,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 6.802223987291502,
|
|
"grad_norm": 0.7684897368017195,
|
|
"learning_rate": 9.938449269197181e-08,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18221008777618408,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4700.1,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 6.810166799046863,
|
|
"grad_norm": 0.7388506805567033,
|
|
"learning_rate": 9.165924661100889e-08,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09083728492259979,
|
|
"step": 4290,
|
|
"valid_targets_mean": 1953.4,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 6.818109610802224,
|
|
"grad_norm": 0.6021726748391288,
|
|
"learning_rate": 8.424582633703493e-08,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1409570872783661,
|
|
"step": 4295,
|
|
"valid_targets_mean": 4618.8,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 6.826052422557585,
|
|
"grad_norm": 0.8086399691247597,
|
|
"learning_rate": 7.714434798699933e-08,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13948075473308563,
|
|
"step": 4300,
|
|
"valid_targets_mean": 3150.5,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 6.833995234312947,
|
|
"grad_norm": 0.681418083056356,
|
|
"learning_rate": 7.035492279187538e-08,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14677859842777252,
|
|
"step": 4305,
|
|
"valid_targets_mean": 4756.4,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 6.841938046068308,
|
|
"grad_norm": 0.6581245274621383,
|
|
"learning_rate": 6.387765709493288e-08,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11418851464986801,
|
|
"step": 4310,
|
|
"valid_targets_mean": 2946.4,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 6.84988085782367,
|
|
"grad_norm": 0.7032053607111315,
|
|
"learning_rate": 5.7712652350061515e-08,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1258661448955536,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3653.9,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 6.857823669579031,
|
|
"grad_norm": 0.7246381831717736,
|
|
"learning_rate": 5.186000512018341e-08,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11564037203788757,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3460.4,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 6.865766481334393,
|
|
"grad_norm": 0.7185782232489436,
|
|
"learning_rate": 4.631980707574535e-08,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11859475076198578,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3633.6,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 6.873709293089754,
|
|
"grad_norm": 0.5866661189980579,
|
|
"learning_rate": 4.10921449932733e-08,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1231222152709961,
|
|
"step": 4330,
|
|
"valid_targets_mean": 4874.9,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 6.881652104845115,
|
|
"grad_norm": 0.6736697931322196,
|
|
"learning_rate": 3.61771007540268e-08,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10660684108734131,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3950.2,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 6.889594916600476,
|
|
"grad_norm": 0.707893855174876,
|
|
"learning_rate": 3.157475134270227e-08,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12861591577529907,
|
|
"step": 4340,
|
|
"valid_targets_mean": 2981.8,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 6.897537728355838,
|
|
"grad_norm": 0.7181545460609798,
|
|
"learning_rate": 2.728516884624277e-08,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10725265741348267,
|
|
"step": 4345,
|
|
"valid_targets_mean": 3365.9,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 6.905480540111199,
|
|
"grad_norm": 0.7497055509318514,
|
|
"learning_rate": 2.3308420452690106e-08,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13570773601531982,
|
|
"step": 4350,
|
|
"valid_targets_mean": 3281.0,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 6.913423351866561,
|
|
"grad_norm": 0.6710536391653048,
|
|
"learning_rate": 1.9644568450147837e-08,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10181531310081482,
|
|
"step": 4355,
|
|
"valid_targets_mean": 3284.5,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 6.921366163621922,
|
|
"grad_norm": 0.7258599573481606,
|
|
"learning_rate": 1.6293670225799864e-08,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14548176527023315,
|
|
"step": 4360,
|
|
"valid_targets_mean": 4070.0,
|
|
"valid_targets_min": 1644
|
|
},
|
|
{
|
|
"epoch": 6.929308975377284,
|
|
"grad_norm": 0.6639183915609941,
|
|
"learning_rate": 1.3255778265013342e-08,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14225611090660095,
|
|
"step": 4365,
|
|
"valid_targets_mean": 4621.2,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 6.937251787132645,
|
|
"grad_norm": 0.690851547592678,
|
|
"learning_rate": 1.0530940150512703e-08,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1599346548318863,
|
|
"step": 4370,
|
|
"valid_targets_mean": 4296.8,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 6.945194598888007,
|
|
"grad_norm": 0.6680777120363561,
|
|
"learning_rate": 8.119198561638009e-09,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10416897386312485,
|
|
"step": 4375,
|
|
"valid_targets_mean": 2621.1,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 6.953137410643368,
|
|
"grad_norm": 0.7674480772102124,
|
|
"learning_rate": 6.020591273674381e-09,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12043069303035736,
|
|
"step": 4380,
|
|
"valid_targets_mean": 2503.0,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 6.96108022239873,
|
|
"grad_norm": 0.6682353141235654,
|
|
"learning_rate": 4.2351511572635835e-09,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09634553641080856,
|
|
"step": 4385,
|
|
"valid_targets_mean": 2690.1,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 6.96902303415409,
|
|
"grad_norm": 0.5626803490032439,
|
|
"learning_rate": 2.7629061778866597e-09,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20516115427017212,
|
|
"step": 4390,
|
|
"valid_targets_mean": 7110.4,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 6.976965845909452,
|
|
"grad_norm": 0.7173363896967637,
|
|
"learning_rate": 1.603879395422059e-09,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10059934854507446,
|
|
"step": 4395,
|
|
"valid_targets_mean": 2828.0,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 6.984908657664813,
|
|
"grad_norm": 0.6723486690383825,
|
|
"learning_rate": 7.580889637925914e-10,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12293983995914459,
|
|
"step": 4400,
|
|
"valid_targets_mean": 3753.1,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 6.992851469420175,
|
|
"grad_norm": 0.6793606161632666,
|
|
"learning_rate": 2.2554813067676705e-10,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13661663234233856,
|
|
"step": 4405,
|
|
"valid_targets_mean": 4443.8,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.8125710548860231,
|
|
"learning_rate": 6.265237300073778e-12,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27547487616539,
|
|
"step": 4410,
|
|
"valid_targets_mean": 5897.4,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27547487616539,
|
|
"step": 4410,
|
|
"total_flos": 1.228481933579649e+18,
|
|
"train_loss": 0.32828609189208674,
|
|
"train_runtime": 47520.9929,
|
|
"train_samples_per_second": 1.483,
|
|
"train_steps_per_second": 0.093,
|
|
"valid_targets_mean": 5897.4,
|
|
"valid_targets_min": 1500
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4410,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.228481933579649e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|