Model: open-sci/sft__ot30k_Qwen3-1.7B-Base-DPO-Tulu3-decontaminated Source: Original Platform
12973 lines
375 KiB
JSON
12973 lines
375 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1175,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0042643923240938165,
|
|
"grad_norm": 3.7050780629898266,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.3692437410354614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3522762656211853,
|
|
"step": 1,
|
|
"valid_targets_mean": 14037.0,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 0.008528784648187633,
|
|
"grad_norm": 3.644142778423069,
|
|
"learning_rate": 3.3898305084745766e-07,
|
|
"loss": 1.3762229681015015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34364306926727295,
|
|
"step": 2,
|
|
"valid_targets_mean": 14289.4,
|
|
"valid_targets_min": 3217
|
|
},
|
|
{
|
|
"epoch": 0.01279317697228145,
|
|
"grad_norm": 3.6887570486447934,
|
|
"learning_rate": 6.779661016949153e-07,
|
|
"loss": 1.3203094005584717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3292529582977295,
|
|
"step": 3,
|
|
"valid_targets_mean": 14767.9,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 0.017057569296375266,
|
|
"grad_norm": 3.7110691743676436,
|
|
"learning_rate": 1.016949152542373e-06,
|
|
"loss": 1.3758978843688965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35484176874160767,
|
|
"step": 4,
|
|
"valid_targets_mean": 13822.7,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 0.021321961620469083,
|
|
"grad_norm": 3.6104227273958225,
|
|
"learning_rate": 1.3559322033898307e-06,
|
|
"loss": 1.3438400030136108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3377838432788849,
|
|
"step": 5,
|
|
"valid_targets_mean": 14168.3,
|
|
"valid_targets_min": 4549
|
|
},
|
|
{
|
|
"epoch": 0.0255863539445629,
|
|
"grad_norm": 3.552223485138882,
|
|
"learning_rate": 1.6949152542372882e-06,
|
|
"loss": 1.3246915340423584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33433401584625244,
|
|
"step": 6,
|
|
"valid_targets_mean": 14643.4,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 0.029850746268656716,
|
|
"grad_norm": 3.364111750773186,
|
|
"learning_rate": 2.033898305084746e-06,
|
|
"loss": 1.322192668914795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3354091942310333,
|
|
"step": 7,
|
|
"valid_targets_mean": 14722.1,
|
|
"valid_targets_min": 5706
|
|
},
|
|
{
|
|
"epoch": 0.03411513859275053,
|
|
"grad_norm": 3.334837654325658,
|
|
"learning_rate": 2.372881355932204e-06,
|
|
"loss": 1.3389173746109009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3530663251876831,
|
|
"step": 8,
|
|
"valid_targets_mean": 15226.8,
|
|
"valid_targets_min": 4690
|
|
},
|
|
{
|
|
"epoch": 0.03837953091684435,
|
|
"grad_norm": 2.8080847754149847,
|
|
"learning_rate": 2.7118644067796613e-06,
|
|
"loss": 1.3550939559936523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35263994336128235,
|
|
"step": 9,
|
|
"valid_targets_mean": 15026.3,
|
|
"valid_targets_min": 11234
|
|
},
|
|
{
|
|
"epoch": 0.042643923240938165,
|
|
"grad_norm": 2.6461503667751427,
|
|
"learning_rate": 3.0508474576271192e-06,
|
|
"loss": 1.3024230003356934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31125369668006897,
|
|
"step": 10,
|
|
"valid_targets_mean": 14829.6,
|
|
"valid_targets_min": 2873
|
|
},
|
|
{
|
|
"epoch": 0.046908315565031986,
|
|
"grad_norm": 2.361833251710954,
|
|
"learning_rate": 3.3898305084745763e-06,
|
|
"loss": 1.3256170749664307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3399789333343506,
|
|
"step": 11,
|
|
"valid_targets_mean": 14368.6,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 0.0511727078891258,
|
|
"grad_norm": 2.1192877978498874,
|
|
"learning_rate": 3.7288135593220342e-06,
|
|
"loss": 1.3001116514205933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34104037284851074,
|
|
"step": 12,
|
|
"valid_targets_mean": 14815.2,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 0.05543710021321962,
|
|
"grad_norm": 2.0564921620484586,
|
|
"learning_rate": 4.067796610169492e-06,
|
|
"loss": 1.3146249055862427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35663479566574097,
|
|
"step": 13,
|
|
"valid_targets_mean": 15171.1,
|
|
"valid_targets_min": 9739
|
|
},
|
|
{
|
|
"epoch": 0.05970149253731343,
|
|
"grad_norm": 1.88013392634323,
|
|
"learning_rate": 4.40677966101695e-06,
|
|
"loss": 1.3103572130203247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3216917812824249,
|
|
"step": 14,
|
|
"valid_targets_mean": 14483.7,
|
|
"valid_targets_min": 9364
|
|
},
|
|
{
|
|
"epoch": 0.06396588486140725,
|
|
"grad_norm": 1.533337013219745,
|
|
"learning_rate": 4.745762711864408e-06,
|
|
"loss": 1.3093593120574951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.348351389169693,
|
|
"step": 15,
|
|
"valid_targets_mean": 15076.1,
|
|
"valid_targets_min": 3559
|
|
},
|
|
{
|
|
"epoch": 0.06823027718550106,
|
|
"grad_norm": 1.8942686142189689,
|
|
"learning_rate": 5.084745762711865e-06,
|
|
"loss": 1.2396540641784668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293296217918396,
|
|
"step": 16,
|
|
"valid_targets_mean": 15001.3,
|
|
"valid_targets_min": 6070
|
|
},
|
|
{
|
|
"epoch": 0.07249466950959488,
|
|
"grad_norm": 1.949853023703963,
|
|
"learning_rate": 5.423728813559323e-06,
|
|
"loss": 1.2900409698486328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31647390127182007,
|
|
"step": 17,
|
|
"valid_targets_mean": 15154.1,
|
|
"valid_targets_min": 2936
|
|
},
|
|
{
|
|
"epoch": 0.0767590618336887,
|
|
"grad_norm": 1.7000416376046041,
|
|
"learning_rate": 5.7627118644067805e-06,
|
|
"loss": 1.3255032300949097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3654269576072693,
|
|
"step": 18,
|
|
"valid_targets_mean": 15572.8,
|
|
"valid_targets_min": 11400
|
|
},
|
|
{
|
|
"epoch": 0.08102345415778252,
|
|
"grad_norm": 1.5392347751119273,
|
|
"learning_rate": 6.1016949152542385e-06,
|
|
"loss": 1.2805070877075195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005303740501404,
|
|
"step": 19,
|
|
"valid_targets_mean": 13826.0,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 0.08528784648187633,
|
|
"grad_norm": 1.3548173284771003,
|
|
"learning_rate": 6.440677966101695e-06,
|
|
"loss": 1.3007392883300781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32928162813186646,
|
|
"step": 20,
|
|
"valid_targets_mean": 14183.2,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 0.08955223880597014,
|
|
"grad_norm": 1.5487187975704395,
|
|
"learning_rate": 6.779661016949153e-06,
|
|
"loss": 1.2937368154525757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31294944882392883,
|
|
"step": 21,
|
|
"valid_targets_mean": 14698.1,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 0.09381663113006397,
|
|
"grad_norm": 1.8310802916221394,
|
|
"learning_rate": 7.1186440677966106e-06,
|
|
"loss": 1.265408992767334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3101353645324707,
|
|
"step": 22,
|
|
"valid_targets_mean": 14052.0,
|
|
"valid_targets_min": 5016
|
|
},
|
|
{
|
|
"epoch": 0.09808102345415778,
|
|
"grad_norm": 1.659985356016141,
|
|
"learning_rate": 7.4576271186440685e-06,
|
|
"loss": 1.2996220588684082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32625505328178406,
|
|
"step": 23,
|
|
"valid_targets_mean": 15747.2,
|
|
"valid_targets_min": 7977
|
|
},
|
|
{
|
|
"epoch": 0.1023454157782516,
|
|
"grad_norm": 1.2796272762896368,
|
|
"learning_rate": 7.796610169491526e-06,
|
|
"loss": 1.244044303894043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3171003758907318,
|
|
"step": 24,
|
|
"valid_targets_mean": 15501.2,
|
|
"valid_targets_min": 8014
|
|
},
|
|
{
|
|
"epoch": 0.10660980810234541,
|
|
"grad_norm": 1.0421905099691027,
|
|
"learning_rate": 8.135593220338983e-06,
|
|
"loss": 1.2391921281814575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27722275257110596,
|
|
"step": 25,
|
|
"valid_targets_mean": 13349.6,
|
|
"valid_targets_min": 2049
|
|
},
|
|
{
|
|
"epoch": 0.11087420042643924,
|
|
"grad_norm": 0.9988190143069554,
|
|
"learning_rate": 8.47457627118644e-06,
|
|
"loss": 1.2015836238861084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900916337966919,
|
|
"step": 26,
|
|
"valid_targets_mean": 13670.7,
|
|
"valid_targets_min": 2377
|
|
},
|
|
{
|
|
"epoch": 0.11513859275053305,
|
|
"grad_norm": 0.7791773324939875,
|
|
"learning_rate": 8.8135593220339e-06,
|
|
"loss": 1.160265326499939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28922343254089355,
|
|
"step": 27,
|
|
"valid_targets_mean": 15432.1,
|
|
"valid_targets_min": 9230
|
|
},
|
|
{
|
|
"epoch": 0.11940298507462686,
|
|
"grad_norm": 0.8866057666371843,
|
|
"learning_rate": 9.152542372881356e-06,
|
|
"loss": 1.2556746006011963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2979189157485962,
|
|
"step": 28,
|
|
"valid_targets_mean": 14637.2,
|
|
"valid_targets_min": 5532
|
|
},
|
|
{
|
|
"epoch": 0.12366737739872068,
|
|
"grad_norm": 0.8066240821616735,
|
|
"learning_rate": 9.491525423728815e-06,
|
|
"loss": 1.1768447160720825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30367380380630493,
|
|
"step": 29,
|
|
"valid_targets_mean": 14625.5,
|
|
"valid_targets_min": 3306
|
|
},
|
|
{
|
|
"epoch": 0.1279317697228145,
|
|
"grad_norm": 0.7648919865485887,
|
|
"learning_rate": 9.830508474576272e-06,
|
|
"loss": 1.241949200630188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31732290983200073,
|
|
"step": 30,
|
|
"valid_targets_mean": 15120.5,
|
|
"valid_targets_min": 7362
|
|
},
|
|
{
|
|
"epoch": 0.13219616204690832,
|
|
"grad_norm": 0.7018397415958855,
|
|
"learning_rate": 1.016949152542373e-05,
|
|
"loss": 1.1539311408996582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28841906785964966,
|
|
"step": 31,
|
|
"valid_targets_mean": 14608.3,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 0.13646055437100213,
|
|
"grad_norm": 0.7798440333291278,
|
|
"learning_rate": 1.0508474576271188e-05,
|
|
"loss": 1.1606169939041138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28593698143959045,
|
|
"step": 32,
|
|
"valid_targets_mean": 14868.9,
|
|
"valid_targets_min": 2381
|
|
},
|
|
{
|
|
"epoch": 0.14072494669509594,
|
|
"grad_norm": 0.6709730268415378,
|
|
"learning_rate": 1.0847457627118645e-05,
|
|
"loss": 1.2407987117767334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31206440925598145,
|
|
"step": 33,
|
|
"valid_targets_mean": 15542.3,
|
|
"valid_targets_min": 9239
|
|
},
|
|
{
|
|
"epoch": 0.14498933901918976,
|
|
"grad_norm": 0.5927399346942984,
|
|
"learning_rate": 1.1186440677966102e-05,
|
|
"loss": 1.2000172138214111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3148438334465027,
|
|
"step": 34,
|
|
"valid_targets_mean": 15298.9,
|
|
"valid_targets_min": 10829
|
|
},
|
|
{
|
|
"epoch": 0.14925373134328357,
|
|
"grad_norm": 0.6879187516419376,
|
|
"learning_rate": 1.1525423728813561e-05,
|
|
"loss": 1.1847583055496216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30476540327072144,
|
|
"step": 35,
|
|
"valid_targets_mean": 15101.9,
|
|
"valid_targets_min": 4685
|
|
},
|
|
{
|
|
"epoch": 0.1535181236673774,
|
|
"grad_norm": 0.5736372588077562,
|
|
"learning_rate": 1.1864406779661018e-05,
|
|
"loss": 1.181781530380249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30979201197624207,
|
|
"step": 36,
|
|
"valid_targets_mean": 15434.2,
|
|
"valid_targets_min": 8304
|
|
},
|
|
{
|
|
"epoch": 0.15778251599147122,
|
|
"grad_norm": 0.4839600609860032,
|
|
"learning_rate": 1.2203389830508477e-05,
|
|
"loss": 1.1757041215896606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3150002658367157,
|
|
"step": 37,
|
|
"valid_targets_mean": 15374.5,
|
|
"valid_targets_min": 4818
|
|
},
|
|
{
|
|
"epoch": 0.16204690831556504,
|
|
"grad_norm": 0.5448012035335212,
|
|
"learning_rate": 1.2542372881355932e-05,
|
|
"loss": 1.1842443943023682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3021180033683777,
|
|
"step": 38,
|
|
"valid_targets_mean": 15148.5,
|
|
"valid_targets_min": 7056
|
|
},
|
|
{
|
|
"epoch": 0.16631130063965885,
|
|
"grad_norm": 0.5158459340574624,
|
|
"learning_rate": 1.288135593220339e-05,
|
|
"loss": 1.2007079124450684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2918003499507904,
|
|
"step": 39,
|
|
"valid_targets_mean": 14903.9,
|
|
"valid_targets_min": 4758
|
|
},
|
|
{
|
|
"epoch": 0.17057569296375266,
|
|
"grad_norm": 0.4934416594442201,
|
|
"learning_rate": 1.3220338983050848e-05,
|
|
"loss": 1.1404216289520264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30101698637008667,
|
|
"step": 40,
|
|
"valid_targets_mean": 14631.2,
|
|
"valid_targets_min": 4776
|
|
},
|
|
{
|
|
"epoch": 0.17484008528784648,
|
|
"grad_norm": 0.45708657374086487,
|
|
"learning_rate": 1.3559322033898305e-05,
|
|
"loss": 1.167313814163208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29247981309890747,
|
|
"step": 41,
|
|
"valid_targets_mean": 14968.9,
|
|
"valid_targets_min": 6853
|
|
},
|
|
{
|
|
"epoch": 0.1791044776119403,
|
|
"grad_norm": 0.5086425056584234,
|
|
"learning_rate": 1.3898305084745764e-05,
|
|
"loss": 1.130707025527954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2653113305568695,
|
|
"step": 42,
|
|
"valid_targets_mean": 14805.2,
|
|
"valid_targets_min": 7412
|
|
},
|
|
{
|
|
"epoch": 0.18336886993603413,
|
|
"grad_norm": 0.49446667625841045,
|
|
"learning_rate": 1.4237288135593221e-05,
|
|
"loss": 1.1759696006774902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28698980808258057,
|
|
"step": 43,
|
|
"valid_targets_mean": 14401.4,
|
|
"valid_targets_min": 2709
|
|
},
|
|
{
|
|
"epoch": 0.18763326226012794,
|
|
"grad_norm": 0.4225441910253411,
|
|
"learning_rate": 1.4576271186440678e-05,
|
|
"loss": 1.1241289377212524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27669885754585266,
|
|
"step": 44,
|
|
"valid_targets_mean": 14983.8,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 0.19189765458422176,
|
|
"grad_norm": 0.41315134653726454,
|
|
"learning_rate": 1.4915254237288137e-05,
|
|
"loss": 1.146299123764038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2780418395996094,
|
|
"step": 45,
|
|
"valid_targets_mean": 14252.4,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 0.19616204690831557,
|
|
"grad_norm": 0.43792682778587283,
|
|
"learning_rate": 1.5254237288135594e-05,
|
|
"loss": 1.1436562538146973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802981436252594,
|
|
"step": 46,
|
|
"valid_targets_mean": 14348.7,
|
|
"valid_targets_min": 3473
|
|
},
|
|
{
|
|
"epoch": 0.20042643923240938,
|
|
"grad_norm": 0.418260223399441,
|
|
"learning_rate": 1.5593220338983053e-05,
|
|
"loss": 1.197390079498291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31075793504714966,
|
|
"step": 47,
|
|
"valid_targets_mean": 14754.3,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 0.2046908315565032,
|
|
"grad_norm": 0.41700284556436135,
|
|
"learning_rate": 1.593220338983051e-05,
|
|
"loss": 1.1087491512298584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2929489016532898,
|
|
"step": 48,
|
|
"valid_targets_mean": 15185.8,
|
|
"valid_targets_min": 5859
|
|
},
|
|
{
|
|
"epoch": 0.208955223880597,
|
|
"grad_norm": 0.412556813999178,
|
|
"learning_rate": 1.6271186440677967e-05,
|
|
"loss": 1.1045202016830444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575497031211853,
|
|
"step": 49,
|
|
"valid_targets_mean": 14226.5,
|
|
"valid_targets_min": 3280
|
|
},
|
|
{
|
|
"epoch": 0.21321961620469082,
|
|
"grad_norm": 0.4042092008381859,
|
|
"learning_rate": 1.6610169491525424e-05,
|
|
"loss": 1.1158027648925781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845377027988434,
|
|
"step": 50,
|
|
"valid_targets_mean": 14587.2,
|
|
"valid_targets_min": 2470
|
|
},
|
|
{
|
|
"epoch": 0.21748400852878466,
|
|
"grad_norm": 0.4132707686825523,
|
|
"learning_rate": 1.694915254237288e-05,
|
|
"loss": 1.1375819444656372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2972974181175232,
|
|
"step": 51,
|
|
"valid_targets_mean": 15135.2,
|
|
"valid_targets_min": 2341
|
|
},
|
|
{
|
|
"epoch": 0.22174840085287847,
|
|
"grad_norm": 0.391199888066629,
|
|
"learning_rate": 1.728813559322034e-05,
|
|
"loss": 1.105478048324585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29153481125831604,
|
|
"step": 52,
|
|
"valid_targets_mean": 14628.2,
|
|
"valid_targets_min": 3150
|
|
},
|
|
{
|
|
"epoch": 0.2260127931769723,
|
|
"grad_norm": 0.386668351137354,
|
|
"learning_rate": 1.76271186440678e-05,
|
|
"loss": 1.0931549072265625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30096158385276794,
|
|
"step": 53,
|
|
"valid_targets_mean": 14552.2,
|
|
"valid_targets_min": 6412
|
|
},
|
|
{
|
|
"epoch": 0.2302771855010661,
|
|
"grad_norm": 0.3822043034745602,
|
|
"learning_rate": 1.7966101694915256e-05,
|
|
"loss": 1.1303824186325073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28337252140045166,
|
|
"step": 54,
|
|
"valid_targets_mean": 15306.2,
|
|
"valid_targets_min": 9939
|
|
},
|
|
{
|
|
"epoch": 0.2345415778251599,
|
|
"grad_norm": 0.40103756945668784,
|
|
"learning_rate": 1.8305084745762713e-05,
|
|
"loss": 1.1525251865386963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29096719622612,
|
|
"step": 55,
|
|
"valid_targets_mean": 14843.6,
|
|
"valid_targets_min": 6321
|
|
},
|
|
{
|
|
"epoch": 0.23880597014925373,
|
|
"grad_norm": 0.3833451859387816,
|
|
"learning_rate": 1.864406779661017e-05,
|
|
"loss": 1.1639981269836426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843185365200043,
|
|
"step": 56,
|
|
"valid_targets_mean": 13096.1,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 0.24307036247334754,
|
|
"grad_norm": 0.3399193334458428,
|
|
"learning_rate": 1.898305084745763e-05,
|
|
"loss": 1.1406729221343994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2889408767223358,
|
|
"step": 57,
|
|
"valid_targets_mean": 13918.6,
|
|
"valid_targets_min": 4167
|
|
},
|
|
{
|
|
"epoch": 0.24733475479744135,
|
|
"grad_norm": 0.40561471430214036,
|
|
"learning_rate": 1.9322033898305087e-05,
|
|
"loss": 1.1632907390594482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3403254747390747,
|
|
"step": 58,
|
|
"valid_targets_mean": 14827.4,
|
|
"valid_targets_min": 5551
|
|
},
|
|
{
|
|
"epoch": 0.2515991471215352,
|
|
"grad_norm": 0.37250445491384604,
|
|
"learning_rate": 1.9661016949152545e-05,
|
|
"loss": 1.1035513877868652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2879774272441864,
|
|
"step": 59,
|
|
"valid_targets_mean": 15212.9,
|
|
"valid_targets_min": 6383
|
|
},
|
|
{
|
|
"epoch": 0.255863539445629,
|
|
"grad_norm": 0.370594175104314,
|
|
"learning_rate": 2e-05,
|
|
"loss": 1.117856740951538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637665867805481,
|
|
"step": 60,
|
|
"valid_targets_mean": 13981.9,
|
|
"valid_targets_min": 2976
|
|
},
|
|
{
|
|
"epoch": 0.2601279317697228,
|
|
"grad_norm": 0.40319022476773536,
|
|
"learning_rate": 2.033898305084746e-05,
|
|
"loss": 1.140508770942688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2981041669845581,
|
|
"step": 61,
|
|
"valid_targets_mean": 14926.7,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 0.26439232409381663,
|
|
"grad_norm": 0.3592692867430342,
|
|
"learning_rate": 2.0677966101694916e-05,
|
|
"loss": 1.1198129653930664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828468084335327,
|
|
"step": 62,
|
|
"valid_targets_mean": 14801.3,
|
|
"valid_targets_min": 2559
|
|
},
|
|
{
|
|
"epoch": 0.26865671641791045,
|
|
"grad_norm": 0.3830986433032628,
|
|
"learning_rate": 2.1016949152542376e-05,
|
|
"loss": 1.0709550380706787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259308785200119,
|
|
"step": 63,
|
|
"valid_targets_mean": 14049.6,
|
|
"valid_targets_min": 3022
|
|
},
|
|
{
|
|
"epoch": 0.27292110874200426,
|
|
"grad_norm": 0.37699960866195636,
|
|
"learning_rate": 2.1355932203389833e-05,
|
|
"loss": 1.1756236553192139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2878360152244568,
|
|
"step": 64,
|
|
"valid_targets_mean": 14548.8,
|
|
"valid_targets_min": 3780
|
|
},
|
|
{
|
|
"epoch": 0.2771855010660981,
|
|
"grad_norm": 0.39298210251360866,
|
|
"learning_rate": 2.169491525423729e-05,
|
|
"loss": 1.143497109413147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30721768736839294,
|
|
"step": 65,
|
|
"valid_targets_mean": 14906.1,
|
|
"valid_targets_min": 3792
|
|
},
|
|
{
|
|
"epoch": 0.2814498933901919,
|
|
"grad_norm": 0.46218533437048287,
|
|
"learning_rate": 2.2033898305084748e-05,
|
|
"loss": 1.1329861879348755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2948032319545746,
|
|
"step": 66,
|
|
"valid_targets_mean": 14316.4,
|
|
"valid_targets_min": 4259
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 0.40363035789670854,
|
|
"learning_rate": 2.2372881355932205e-05,
|
|
"loss": 1.1038914918899536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2855198383331299,
|
|
"step": 67,
|
|
"valid_targets_mean": 14869.5,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 0.2899786780383795,
|
|
"grad_norm": 0.404526654468659,
|
|
"learning_rate": 2.2711864406779665e-05,
|
|
"loss": 1.1428015232086182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30168676376342773,
|
|
"step": 68,
|
|
"valid_targets_mean": 14551.2,
|
|
"valid_targets_min": 4821
|
|
},
|
|
{
|
|
"epoch": 0.2942430703624733,
|
|
"grad_norm": 0.4482692606441178,
|
|
"learning_rate": 2.3050847457627122e-05,
|
|
"loss": 1.1620957851409912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26107412576675415,
|
|
"step": 69,
|
|
"valid_targets_mean": 15006.9,
|
|
"valid_targets_min": 4126
|
|
},
|
|
{
|
|
"epoch": 0.29850746268656714,
|
|
"grad_norm": 0.42867084123635435,
|
|
"learning_rate": 2.338983050847458e-05,
|
|
"loss": 1.1545917987823486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30845510959625244,
|
|
"step": 70,
|
|
"valid_targets_mean": 14557.5,
|
|
"valid_targets_min": 2311
|
|
},
|
|
{
|
|
"epoch": 0.302771855010661,
|
|
"grad_norm": 0.3968495695072565,
|
|
"learning_rate": 2.3728813559322036e-05,
|
|
"loss": 1.0535703897476196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562728524208069,
|
|
"step": 71,
|
|
"valid_targets_mean": 14938.7,
|
|
"valid_targets_min": 5689
|
|
},
|
|
{
|
|
"epoch": 0.3070362473347548,
|
|
"grad_norm": 0.45436207933470474,
|
|
"learning_rate": 2.406779661016949e-05,
|
|
"loss": 1.1029781103134155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.277590811252594,
|
|
"step": 72,
|
|
"valid_targets_mean": 14522.3,
|
|
"valid_targets_min": 2727
|
|
},
|
|
{
|
|
"epoch": 0.31130063965884863,
|
|
"grad_norm": 0.359862537824085,
|
|
"learning_rate": 2.4406779661016954e-05,
|
|
"loss": 1.1120696067810059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2918398082256317,
|
|
"step": 73,
|
|
"valid_targets_mean": 15216.2,
|
|
"valid_targets_min": 6987
|
|
},
|
|
{
|
|
"epoch": 0.31556503198294245,
|
|
"grad_norm": 0.4246652728104541,
|
|
"learning_rate": 2.474576271186441e-05,
|
|
"loss": 1.13474440574646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27508383989334106,
|
|
"step": 74,
|
|
"valid_targets_mean": 15336.4,
|
|
"valid_targets_min": 7965
|
|
},
|
|
{
|
|
"epoch": 0.31982942430703626,
|
|
"grad_norm": 0.40846023357573547,
|
|
"learning_rate": 2.5084745762711865e-05,
|
|
"loss": 1.1316065788269043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2800920307636261,
|
|
"step": 75,
|
|
"valid_targets_mean": 14431.6,
|
|
"valid_targets_min": 3089
|
|
},
|
|
{
|
|
"epoch": 0.32409381663113007,
|
|
"grad_norm": 0.3731952986186072,
|
|
"learning_rate": 2.5423728813559322e-05,
|
|
"loss": 1.124661922454834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803703546524048,
|
|
"step": 76,
|
|
"valid_targets_mean": 14309.1,
|
|
"valid_targets_min": 3138
|
|
},
|
|
{
|
|
"epoch": 0.3283582089552239,
|
|
"grad_norm": 0.4568010244837068,
|
|
"learning_rate": 2.576271186440678e-05,
|
|
"loss": 1.075108528137207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25260084867477417,
|
|
"step": 77,
|
|
"valid_targets_mean": 13998.6,
|
|
"valid_targets_min": 2602
|
|
},
|
|
{
|
|
"epoch": 0.3326226012793177,
|
|
"grad_norm": 0.4493409517301465,
|
|
"learning_rate": 2.610169491525424e-05,
|
|
"loss": 1.1376177072525024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844620943069458,
|
|
"step": 78,
|
|
"valid_targets_mean": 14369.1,
|
|
"valid_targets_min": 4167
|
|
},
|
|
{
|
|
"epoch": 0.3368869936034115,
|
|
"grad_norm": 0.47372255745179864,
|
|
"learning_rate": 2.6440677966101696e-05,
|
|
"loss": 1.089780569076538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650420665740967,
|
|
"step": 79,
|
|
"valid_targets_mean": 14101.2,
|
|
"valid_targets_min": 4432
|
|
},
|
|
{
|
|
"epoch": 0.3411513859275053,
|
|
"grad_norm": 0.5665122742675868,
|
|
"learning_rate": 2.6779661016949153e-05,
|
|
"loss": 1.0992507934570312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24609148502349854,
|
|
"step": 80,
|
|
"valid_targets_mean": 13424.2,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 0.34541577825159914,
|
|
"grad_norm": 0.47254128214633884,
|
|
"learning_rate": 2.711864406779661e-05,
|
|
"loss": 1.1198959350585938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935066819190979,
|
|
"step": 81,
|
|
"valid_targets_mean": 14789.6,
|
|
"valid_targets_min": 4851
|
|
},
|
|
{
|
|
"epoch": 0.34968017057569295,
|
|
"grad_norm": 0.6035870703559557,
|
|
"learning_rate": 2.7457627118644068e-05,
|
|
"loss": 1.0773565769195557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763820290565491,
|
|
"step": 82,
|
|
"valid_targets_mean": 14809.2,
|
|
"valid_targets_min": 7600
|
|
},
|
|
{
|
|
"epoch": 0.35394456289978676,
|
|
"grad_norm": 0.4667819362761318,
|
|
"learning_rate": 2.7796610169491528e-05,
|
|
"loss": 1.0495716333389282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25999754667282104,
|
|
"step": 83,
|
|
"valid_targets_mean": 14823.2,
|
|
"valid_targets_min": 3855
|
|
},
|
|
{
|
|
"epoch": 0.3582089552238806,
|
|
"grad_norm": 0.3979333170715773,
|
|
"learning_rate": 2.8135593220338985e-05,
|
|
"loss": 1.125817894935608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996269166469574,
|
|
"step": 84,
|
|
"valid_targets_mean": 14658.2,
|
|
"valid_targets_min": 2691
|
|
},
|
|
{
|
|
"epoch": 0.3624733475479744,
|
|
"grad_norm": 0.5562559245097167,
|
|
"learning_rate": 2.8474576271186442e-05,
|
|
"loss": 1.0994623899459839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25782912969589233,
|
|
"step": 85,
|
|
"valid_targets_mean": 13998.5,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 0.36673773987206826,
|
|
"grad_norm": 0.5245838640290033,
|
|
"learning_rate": 2.88135593220339e-05,
|
|
"loss": 1.096351981163025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672840654850006,
|
|
"step": 86,
|
|
"valid_targets_mean": 14302.5,
|
|
"valid_targets_min": 2859
|
|
},
|
|
{
|
|
"epoch": 0.37100213219616207,
|
|
"grad_norm": 0.7142610115878439,
|
|
"learning_rate": 2.9152542372881356e-05,
|
|
"loss": 1.089399814605713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670043408870697,
|
|
"step": 87,
|
|
"valid_targets_mean": 14668.7,
|
|
"valid_targets_min": 4710
|
|
},
|
|
{
|
|
"epoch": 0.3752665245202559,
|
|
"grad_norm": 0.8439486434921541,
|
|
"learning_rate": 2.9491525423728817e-05,
|
|
"loss": 1.143181324005127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3085780143737793,
|
|
"step": 88,
|
|
"valid_targets_mean": 14349.1,
|
|
"valid_targets_min": 4164
|
|
},
|
|
{
|
|
"epoch": 0.3795309168443497,
|
|
"grad_norm": 0.5019995695930857,
|
|
"learning_rate": 2.9830508474576274e-05,
|
|
"loss": 1.0901545286178589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736453711986542,
|
|
"step": 89,
|
|
"valid_targets_mean": 13790.9,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 0.3837953091684435,
|
|
"grad_norm": 0.7498229867971459,
|
|
"learning_rate": 3.016949152542373e-05,
|
|
"loss": 1.1339250802993774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919880747795105,
|
|
"step": 90,
|
|
"valid_targets_mean": 14075.8,
|
|
"valid_targets_min": 4288
|
|
},
|
|
{
|
|
"epoch": 0.3880597014925373,
|
|
"grad_norm": 0.6124984431041895,
|
|
"learning_rate": 3.0508474576271188e-05,
|
|
"loss": 1.1457653045654297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28051960468292236,
|
|
"step": 91,
|
|
"valid_targets_mean": 14182.6,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 0.39232409381663114,
|
|
"grad_norm": 0.5367316576445155,
|
|
"learning_rate": 3.084745762711865e-05,
|
|
"loss": 1.0481932163238525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2602003216743469,
|
|
"step": 92,
|
|
"valid_targets_mean": 14727.0,
|
|
"valid_targets_min": 8468
|
|
},
|
|
{
|
|
"epoch": 0.39658848614072495,
|
|
"grad_norm": 0.5867631443327929,
|
|
"learning_rate": 3.1186440677966106e-05,
|
|
"loss": 1.1587417125701904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870522141456604,
|
|
"step": 93,
|
|
"valid_targets_mean": 14595.0,
|
|
"valid_targets_min": 5134
|
|
},
|
|
{
|
|
"epoch": 0.40085287846481876,
|
|
"grad_norm": 0.5356452767726304,
|
|
"learning_rate": 3.152542372881356e-05,
|
|
"loss": 1.0834548473358154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28711941838264465,
|
|
"step": 94,
|
|
"valid_targets_mean": 14092.9,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 0.4051172707889126,
|
|
"grad_norm": 0.523401883007321,
|
|
"learning_rate": 3.186440677966102e-05,
|
|
"loss": 1.095641851425171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29611924290657043,
|
|
"step": 95,
|
|
"valid_targets_mean": 14336.4,
|
|
"valid_targets_min": 7342
|
|
},
|
|
{
|
|
"epoch": 0.4093816631130064,
|
|
"grad_norm": 0.5690445238598021,
|
|
"learning_rate": 3.2203389830508473e-05,
|
|
"loss": 1.078407883644104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741876244544983,
|
|
"step": 96,
|
|
"valid_targets_mean": 14126.5,
|
|
"valid_targets_min": 7041
|
|
},
|
|
{
|
|
"epoch": 0.4136460554371002,
|
|
"grad_norm": 0.6094147291691422,
|
|
"learning_rate": 3.2542372881355934e-05,
|
|
"loss": 1.0798135995864868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2836112380027771,
|
|
"step": 97,
|
|
"valid_targets_mean": 15288.1,
|
|
"valid_targets_min": 3032
|
|
},
|
|
{
|
|
"epoch": 0.417910447761194,
|
|
"grad_norm": 0.6726142002002352,
|
|
"learning_rate": 3.2881355932203394e-05,
|
|
"loss": 1.0568196773529053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684295177459717,
|
|
"step": 98,
|
|
"valid_targets_mean": 15102.7,
|
|
"valid_targets_min": 7890
|
|
},
|
|
{
|
|
"epoch": 0.42217484008528783,
|
|
"grad_norm": 0.8491794688074314,
|
|
"learning_rate": 3.322033898305085e-05,
|
|
"loss": 1.0988755226135254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29215890169143677,
|
|
"step": 99,
|
|
"valid_targets_mean": 15020.4,
|
|
"valid_targets_min": 2603
|
|
},
|
|
{
|
|
"epoch": 0.42643923240938164,
|
|
"grad_norm": 0.9064115361710522,
|
|
"learning_rate": 3.355932203389831e-05,
|
|
"loss": 1.083425760269165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26947128772735596,
|
|
"step": 100,
|
|
"valid_targets_mean": 14664.2,
|
|
"valid_targets_min": 6535
|
|
},
|
|
{
|
|
"epoch": 0.43070362473347545,
|
|
"grad_norm": 0.9132507873879131,
|
|
"learning_rate": 3.389830508474576e-05,
|
|
"loss": 1.1036663055419922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732984125614166,
|
|
"step": 101,
|
|
"valid_targets_mean": 15139.3,
|
|
"valid_targets_min": 7516
|
|
},
|
|
{
|
|
"epoch": 0.4349680170575693,
|
|
"grad_norm": 1.385145048086555,
|
|
"learning_rate": 3.423728813559322e-05,
|
|
"loss": 1.1382055282592773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2893203794956207,
|
|
"step": 102,
|
|
"valid_targets_mean": 15199.2,
|
|
"valid_targets_min": 5997
|
|
},
|
|
{
|
|
"epoch": 0.43923240938166314,
|
|
"grad_norm": 0.5475812514048524,
|
|
"learning_rate": 3.457627118644068e-05,
|
|
"loss": 1.1070555448532104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845304012298584,
|
|
"step": 103,
|
|
"valid_targets_mean": 15446.5,
|
|
"valid_targets_min": 7746
|
|
},
|
|
{
|
|
"epoch": 0.44349680170575695,
|
|
"grad_norm": 0.9741055956863801,
|
|
"learning_rate": 3.491525423728814e-05,
|
|
"loss": 1.05870521068573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25673824548721313,
|
|
"step": 104,
|
|
"valid_targets_mean": 13467.9,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 0.44776119402985076,
|
|
"grad_norm": 1.0677300026608416,
|
|
"learning_rate": 3.52542372881356e-05,
|
|
"loss": 1.0540976524353027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25432610511779785,
|
|
"step": 105,
|
|
"valid_targets_mean": 15373.5,
|
|
"valid_targets_min": 7168
|
|
},
|
|
{
|
|
"epoch": 0.4520255863539446,
|
|
"grad_norm": 0.7611749582978701,
|
|
"learning_rate": 3.559322033898305e-05,
|
|
"loss": 1.052013874053955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26927241683006287,
|
|
"step": 106,
|
|
"valid_targets_mean": 15254.9,
|
|
"valid_targets_min": 7796
|
|
},
|
|
{
|
|
"epoch": 0.4562899786780384,
|
|
"grad_norm": 0.6867568669775715,
|
|
"learning_rate": 3.593220338983051e-05,
|
|
"loss": 1.0238397121429443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24560533463954926,
|
|
"step": 107,
|
|
"valid_targets_mean": 15089.0,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 0.4605543710021322,
|
|
"grad_norm": 0.7531365351453309,
|
|
"learning_rate": 3.627118644067797e-05,
|
|
"loss": 1.1215412616729736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2981128692626953,
|
|
"step": 108,
|
|
"valid_targets_mean": 14925.8,
|
|
"valid_targets_min": 2210
|
|
},
|
|
{
|
|
"epoch": 0.464818763326226,
|
|
"grad_norm": 0.7075846752148991,
|
|
"learning_rate": 3.6610169491525426e-05,
|
|
"loss": 1.119575023651123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2760234475135803,
|
|
"step": 109,
|
|
"valid_targets_mean": 14718.9,
|
|
"valid_targets_min": 3165
|
|
},
|
|
{
|
|
"epoch": 0.4690831556503198,
|
|
"grad_norm": 0.5207892205512951,
|
|
"learning_rate": 3.6949152542372886e-05,
|
|
"loss": 1.0607807636260986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2645629048347473,
|
|
"step": 110,
|
|
"valid_targets_mean": 13860.9,
|
|
"valid_targets_min": 1750
|
|
},
|
|
{
|
|
"epoch": 0.47334754797441364,
|
|
"grad_norm": 0.7100332484154964,
|
|
"learning_rate": 3.728813559322034e-05,
|
|
"loss": 1.0771763324737549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28458529710769653,
|
|
"step": 111,
|
|
"valid_targets_mean": 15522.3,
|
|
"valid_targets_min": 8644
|
|
},
|
|
{
|
|
"epoch": 0.47761194029850745,
|
|
"grad_norm": 0.7103735333522195,
|
|
"learning_rate": 3.76271186440678e-05,
|
|
"loss": 1.1477752923965454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2882823944091797,
|
|
"step": 112,
|
|
"valid_targets_mean": 15039.8,
|
|
"valid_targets_min": 3059
|
|
},
|
|
{
|
|
"epoch": 0.48187633262260127,
|
|
"grad_norm": 0.9098809591934902,
|
|
"learning_rate": 3.796610169491526e-05,
|
|
"loss": 1.109536051750183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30783724784851074,
|
|
"step": 113,
|
|
"valid_targets_mean": 14650.3,
|
|
"valid_targets_min": 4726
|
|
},
|
|
{
|
|
"epoch": 0.4861407249466951,
|
|
"grad_norm": 0.7399818898149016,
|
|
"learning_rate": 3.8305084745762714e-05,
|
|
"loss": 1.121962308883667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26784151792526245,
|
|
"step": 114,
|
|
"valid_targets_mean": 14586.5,
|
|
"valid_targets_min": 3354
|
|
},
|
|
{
|
|
"epoch": 0.4904051172707889,
|
|
"grad_norm": 0.5280361496919472,
|
|
"learning_rate": 3.8644067796610175e-05,
|
|
"loss": 1.0628488063812256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2730950117111206,
|
|
"step": 115,
|
|
"valid_targets_mean": 14931.8,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 0.4946695095948827,
|
|
"grad_norm": 0.6973732682162282,
|
|
"learning_rate": 3.898305084745763e-05,
|
|
"loss": 1.0915262699127197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26737692952156067,
|
|
"step": 116,
|
|
"valid_targets_mean": 14651.6,
|
|
"valid_targets_min": 4922
|
|
},
|
|
{
|
|
"epoch": 0.4989339019189765,
|
|
"grad_norm": 0.8478613201485544,
|
|
"learning_rate": 3.932203389830509e-05,
|
|
"loss": 1.042242169380188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2633335292339325,
|
|
"step": 117,
|
|
"valid_targets_mean": 15044.4,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 0.5031982942430704,
|
|
"grad_norm": 0.9423939464924462,
|
|
"learning_rate": 3.966101694915255e-05,
|
|
"loss": 1.1005557775497437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25671565532684326,
|
|
"step": 118,
|
|
"valid_targets_mean": 13800.0,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 0.5074626865671642,
|
|
"grad_norm": 0.8232326266289336,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.0876855850219727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2626562714576721,
|
|
"step": 119,
|
|
"valid_targets_mean": 14306.9,
|
|
"valid_targets_min": 3745
|
|
},
|
|
{
|
|
"epoch": 0.511727078891258,
|
|
"grad_norm": 0.7031161455560354,
|
|
"learning_rate": 3.999991166161585e-05,
|
|
"loss": 1.1505755186080933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771134078502655,
|
|
"step": 120,
|
|
"valid_targets_mean": 14492.4,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 0.5159914712153518,
|
|
"grad_norm": 0.6575632107379027,
|
|
"learning_rate": 3.999964664724376e-05,
|
|
"loss": 1.0899534225463867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2557235360145569,
|
|
"step": 121,
|
|
"valid_targets_mean": 14547.6,
|
|
"valid_targets_min": 2173
|
|
},
|
|
{
|
|
"epoch": 0.5202558635394456,
|
|
"grad_norm": 0.7913221543418435,
|
|
"learning_rate": 3.999920495922483e-05,
|
|
"loss": 1.0714800357818604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2652358412742615,
|
|
"step": 122,
|
|
"valid_targets_mean": 14360.5,
|
|
"valid_targets_min": 5181
|
|
},
|
|
{
|
|
"epoch": 0.5245202558635395,
|
|
"grad_norm": 0.7870845811379741,
|
|
"learning_rate": 3.999858660146085e-05,
|
|
"loss": 1.0843017101287842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2683025002479553,
|
|
"step": 123,
|
|
"valid_targets_mean": 15934.4,
|
|
"valid_targets_min": 11306
|
|
},
|
|
{
|
|
"epoch": 0.5287846481876333,
|
|
"grad_norm": 0.5962689178835435,
|
|
"learning_rate": 3.999779157941431e-05,
|
|
"loss": 1.070677399635315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27511274814605713,
|
|
"step": 124,
|
|
"valid_targets_mean": 15529.0,
|
|
"valid_targets_min": 7890
|
|
},
|
|
{
|
|
"epoch": 0.5330490405117271,
|
|
"grad_norm": 0.6549941776246774,
|
|
"learning_rate": 3.99968199001083e-05,
|
|
"loss": 1.092234492301941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2585451006889343,
|
|
"step": 125,
|
|
"valid_targets_mean": 14589.0,
|
|
"valid_targets_min": 3231
|
|
},
|
|
{
|
|
"epoch": 0.5373134328358209,
|
|
"grad_norm": 0.6433268626497883,
|
|
"learning_rate": 3.999567157212646e-05,
|
|
"loss": 1.0426599979400635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27732205390930176,
|
|
"step": 126,
|
|
"valid_targets_mean": 15039.9,
|
|
"valid_targets_min": 2574
|
|
},
|
|
{
|
|
"epoch": 0.5415778251599147,
|
|
"grad_norm": 0.8352443012910113,
|
|
"learning_rate": 3.9994346605612955e-05,
|
|
"loss": 1.053920030593872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27412497997283936,
|
|
"step": 127,
|
|
"valid_targets_mean": 15032.1,
|
|
"valid_targets_min": 3330
|
|
},
|
|
{
|
|
"epoch": 0.5458422174840085,
|
|
"grad_norm": 0.6721266904939381,
|
|
"learning_rate": 3.999284501227232e-05,
|
|
"loss": 1.0584911108016968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2523651719093323,
|
|
"step": 128,
|
|
"valid_targets_mean": 14215.7,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 0.5501066098081023,
|
|
"grad_norm": 0.48843672284612627,
|
|
"learning_rate": 3.9991166805369393e-05,
|
|
"loss": 1.1100125312805176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289559006690979,
|
|
"step": 129,
|
|
"valid_targets_mean": 14395.0,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 0.5543710021321961,
|
|
"grad_norm": 0.6400490615525464,
|
|
"learning_rate": 3.9989311999729166e-05,
|
|
"loss": 1.1049845218658447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29606693983078003,
|
|
"step": 130,
|
|
"valid_targets_mean": 14569.9,
|
|
"valid_targets_min": 5811
|
|
},
|
|
{
|
|
"epoch": 0.55863539445629,
|
|
"grad_norm": 0.563424535692848,
|
|
"learning_rate": 3.99872806117367e-05,
|
|
"loss": 1.0681297779083252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545401453971863,
|
|
"step": 131,
|
|
"valid_targets_mean": 13998.8,
|
|
"valid_targets_min": 2805
|
|
},
|
|
{
|
|
"epoch": 0.5628997867803838,
|
|
"grad_norm": 0.5948981350914802,
|
|
"learning_rate": 3.998507265933696e-05,
|
|
"loss": 1.067273736000061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2615966200828552,
|
|
"step": 132,
|
|
"valid_targets_mean": 14621.0,
|
|
"valid_targets_min": 3960
|
|
},
|
|
{
|
|
"epoch": 0.5671641791044776,
|
|
"grad_norm": 0.5749626198443342,
|
|
"learning_rate": 3.9982688162034624e-05,
|
|
"loss": 1.1034537553787231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733963429927826,
|
|
"step": 133,
|
|
"valid_targets_mean": 14888.8,
|
|
"valid_targets_min": 2856
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 0.5579910867112449,
|
|
"learning_rate": 3.998012714089397e-05,
|
|
"loss": 1.1013906002044678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27768003940582275,
|
|
"step": 134,
|
|
"valid_targets_mean": 14446.8,
|
|
"valid_targets_min": 4971
|
|
},
|
|
{
|
|
"epoch": 0.5756929637526652,
|
|
"grad_norm": 0.6403785510082947,
|
|
"learning_rate": 3.997738961853863e-05,
|
|
"loss": 1.0969642400741577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28485429286956787,
|
|
"step": 135,
|
|
"valid_targets_mean": 14908.9,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 0.579957356076759,
|
|
"grad_norm": 0.5380869852233647,
|
|
"learning_rate": 3.9974475619151445e-05,
|
|
"loss": 1.0559520721435547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2599300444126129,
|
|
"step": 136,
|
|
"valid_targets_mean": 15042.8,
|
|
"valid_targets_min": 5253
|
|
},
|
|
{
|
|
"epoch": 0.5842217484008528,
|
|
"grad_norm": 0.49896660740556487,
|
|
"learning_rate": 3.997138516847422e-05,
|
|
"loss": 1.0358166694641113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784730792045593,
|
|
"step": 137,
|
|
"valid_targets_mean": 15314.4,
|
|
"valid_targets_min": 6815
|
|
},
|
|
{
|
|
"epoch": 0.5884861407249466,
|
|
"grad_norm": 0.6086479921387165,
|
|
"learning_rate": 3.9968118293807476e-05,
|
|
"loss": 1.1016316413879395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2564149498939514,
|
|
"step": 138,
|
|
"valid_targets_mean": 14211.9,
|
|
"valid_targets_min": 1760
|
|
},
|
|
{
|
|
"epoch": 0.5927505330490405,
|
|
"grad_norm": 0.5425428137663556,
|
|
"learning_rate": 3.996467502401028e-05,
|
|
"loss": 1.0730235576629639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677554786205292,
|
|
"step": 139,
|
|
"valid_targets_mean": 15098.0,
|
|
"valid_targets_min": 2956
|
|
},
|
|
{
|
|
"epoch": 0.5970149253731343,
|
|
"grad_norm": 0.4908207916774726,
|
|
"learning_rate": 3.9961055389499904e-05,
|
|
"loss": 1.038144588470459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26678267121315,
|
|
"step": 140,
|
|
"valid_targets_mean": 14864.3,
|
|
"valid_targets_min": 6004
|
|
},
|
|
{
|
|
"epoch": 0.6012793176972282,
|
|
"grad_norm": 0.542727640216713,
|
|
"learning_rate": 3.995725942225162e-05,
|
|
"loss": 1.0723166465759277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25091272592544556,
|
|
"step": 141,
|
|
"valid_targets_mean": 13533.8,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 0.605543710021322,
|
|
"grad_norm": 0.48616177634505015,
|
|
"learning_rate": 3.995328715579839e-05,
|
|
"loss": 1.0786864757537842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2756822109222412,
|
|
"step": 142,
|
|
"valid_targets_mean": 15253.0,
|
|
"valid_targets_min": 7270
|
|
},
|
|
{
|
|
"epoch": 0.6098081023454158,
|
|
"grad_norm": 0.5520956388686232,
|
|
"learning_rate": 3.994913862523058e-05,
|
|
"loss": 1.0484728813171387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863498330116272,
|
|
"step": 143,
|
|
"valid_targets_mean": 15135.0,
|
|
"valid_targets_min": 2961
|
|
},
|
|
{
|
|
"epoch": 0.6140724946695096,
|
|
"grad_norm": 0.43493765908641513,
|
|
"learning_rate": 3.9944813867195624e-05,
|
|
"loss": 1.0812013149261475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22369539737701416,
|
|
"step": 144,
|
|
"valid_targets_mean": 12900.4,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 0.6183368869936035,
|
|
"grad_norm": 0.47275744196027986,
|
|
"learning_rate": 3.9940312919897744e-05,
|
|
"loss": 1.1257681846618652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30477064847946167,
|
|
"step": 145,
|
|
"valid_targets_mean": 15631.7,
|
|
"valid_targets_min": 9492
|
|
},
|
|
{
|
|
"epoch": 0.6226012793176973,
|
|
"grad_norm": 0.4648291056305156,
|
|
"learning_rate": 3.993563582309759e-05,
|
|
"loss": 1.0511856079101562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24608807265758514,
|
|
"step": 146,
|
|
"valid_targets_mean": 15147.3,
|
|
"valid_targets_min": 4251
|
|
},
|
|
{
|
|
"epoch": 0.6268656716417911,
|
|
"grad_norm": 0.44902387960467005,
|
|
"learning_rate": 3.993078261811186e-05,
|
|
"loss": 1.080320119857788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2716037333011627,
|
|
"step": 147,
|
|
"valid_targets_mean": 15246.7,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 0.6311300639658849,
|
|
"grad_norm": 0.5035450374595886,
|
|
"learning_rate": 3.9925753347813e-05,
|
|
"loss": 1.130347728729248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2737486958503723,
|
|
"step": 148,
|
|
"valid_targets_mean": 14379.8,
|
|
"valid_targets_min": 2708
|
|
},
|
|
{
|
|
"epoch": 0.6353944562899787,
|
|
"grad_norm": 0.5091556926343411,
|
|
"learning_rate": 3.992054805662876e-05,
|
|
"loss": 1.115844488143921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27972787618637085,
|
|
"step": 149,
|
|
"valid_targets_mean": 14061.5,
|
|
"valid_targets_min": 2210
|
|
},
|
|
{
|
|
"epoch": 0.6396588486140725,
|
|
"grad_norm": 0.4477300918143984,
|
|
"learning_rate": 3.991516679054185e-05,
|
|
"loss": 1.060228943824768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634928226470947,
|
|
"step": 150,
|
|
"valid_targets_mean": 14323.0,
|
|
"valid_targets_min": 5017
|
|
},
|
|
{
|
|
"epoch": 0.6439232409381663,
|
|
"grad_norm": 0.5351539062418861,
|
|
"learning_rate": 3.9909609597089496e-05,
|
|
"loss": 1.0674173831939697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2632558345794678,
|
|
"step": 151,
|
|
"valid_targets_mean": 14257.8,
|
|
"valid_targets_min": 3979
|
|
},
|
|
{
|
|
"epoch": 0.6481876332622601,
|
|
"grad_norm": 0.7278522574811955,
|
|
"learning_rate": 3.9903876525363055e-05,
|
|
"loss": 1.0999696254730225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681885361671448,
|
|
"step": 152,
|
|
"valid_targets_mean": 14785.4,
|
|
"valid_targets_min": 7633
|
|
},
|
|
{
|
|
"epoch": 0.652452025586354,
|
|
"grad_norm": 0.6639805094372959,
|
|
"learning_rate": 3.989796762600755e-05,
|
|
"loss": 1.0721893310546875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24978876113891602,
|
|
"step": 153,
|
|
"valid_targets_mean": 14481.2,
|
|
"valid_targets_min": 4915
|
|
},
|
|
{
|
|
"epoch": 0.6567164179104478,
|
|
"grad_norm": 0.5946803421731605,
|
|
"learning_rate": 3.9891882951221246e-05,
|
|
"loss": 1.160867691040039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2525663673877716,
|
|
"step": 154,
|
|
"valid_targets_mean": 13851.4,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 0.6609808102345416,
|
|
"grad_norm": 0.5005158714404884,
|
|
"learning_rate": 3.988562255475518e-05,
|
|
"loss": 1.0442200899124146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694418430328369,
|
|
"step": 155,
|
|
"valid_targets_mean": 15546.7,
|
|
"valid_targets_min": 6669
|
|
},
|
|
{
|
|
"epoch": 0.6652452025586354,
|
|
"grad_norm": 0.6923534139141314,
|
|
"learning_rate": 3.987918649191268e-05,
|
|
"loss": 1.085169792175293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27694371342658997,
|
|
"step": 156,
|
|
"valid_targets_mean": 14214.1,
|
|
"valid_targets_min": 5957
|
|
},
|
|
{
|
|
"epoch": 0.6695095948827292,
|
|
"grad_norm": 0.6291609924377848,
|
|
"learning_rate": 3.987257481954888e-05,
|
|
"loss": 1.0835366249084473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2972813844680786,
|
|
"step": 157,
|
|
"valid_targets_mean": 14841.3,
|
|
"valid_targets_min": 5718
|
|
},
|
|
{
|
|
"epoch": 0.673773987206823,
|
|
"grad_norm": 0.6949486431955099,
|
|
"learning_rate": 3.9865787596070236e-05,
|
|
"loss": 1.0785105228424072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26788365840911865,
|
|
"step": 158,
|
|
"valid_targets_mean": 14575.0,
|
|
"valid_targets_min": 5142
|
|
},
|
|
{
|
|
"epoch": 0.6780383795309168,
|
|
"grad_norm": 0.6125844372311213,
|
|
"learning_rate": 3.9858824881433975e-05,
|
|
"loss": 1.0924922227859497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2507363557815552,
|
|
"step": 159,
|
|
"valid_targets_mean": 14356.6,
|
|
"valid_targets_min": 4878
|
|
},
|
|
{
|
|
"epoch": 0.6823027718550106,
|
|
"grad_norm": 0.5574752878263121,
|
|
"learning_rate": 3.9851686737147585e-05,
|
|
"loss": 1.1294423341751099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2461737096309662,
|
|
"step": 160,
|
|
"valid_targets_mean": 13718.2,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 0.6865671641791045,
|
|
"grad_norm": 0.5478327836713052,
|
|
"learning_rate": 3.9844373226268305e-05,
|
|
"loss": 1.0423997640609741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2565568685531616,
|
|
"step": 161,
|
|
"valid_targets_mean": 14228.8,
|
|
"valid_targets_min": 2382
|
|
},
|
|
{
|
|
"epoch": 0.6908315565031983,
|
|
"grad_norm": 0.4905992626681647,
|
|
"learning_rate": 3.983688441340249e-05,
|
|
"loss": 1.1095077991485596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29134225845336914,
|
|
"step": 162,
|
|
"valid_targets_mean": 14159.3,
|
|
"valid_targets_min": 2792
|
|
},
|
|
{
|
|
"epoch": 0.6950959488272921,
|
|
"grad_norm": 0.5791383369506973,
|
|
"learning_rate": 3.98292203647051e-05,
|
|
"loss": 1.0938866138458252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27529406547546387,
|
|
"step": 163,
|
|
"valid_targets_mean": 14302.1,
|
|
"valid_targets_min": 3711
|
|
},
|
|
{
|
|
"epoch": 0.6993603411513859,
|
|
"grad_norm": 0.42193448193600835,
|
|
"learning_rate": 3.982138114787912e-05,
|
|
"loss": 1.0846669673919678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688499689102173,
|
|
"step": 164,
|
|
"valid_targets_mean": 14744.2,
|
|
"valid_targets_min": 6098
|
|
},
|
|
{
|
|
"epoch": 0.7036247334754797,
|
|
"grad_norm": 0.5155095369829436,
|
|
"learning_rate": 3.98133668321749e-05,
|
|
"loss": 1.057676911354065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630409300327301,
|
|
"step": 165,
|
|
"valid_targets_mean": 14565.0,
|
|
"valid_targets_min": 3680
|
|
},
|
|
{
|
|
"epoch": 0.7078891257995735,
|
|
"grad_norm": 0.5869902656317024,
|
|
"learning_rate": 3.980517748838963e-05,
|
|
"loss": 1.1554545164108276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754029631614685,
|
|
"step": 166,
|
|
"valid_targets_mean": 13850.9,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 0.7121535181236673,
|
|
"grad_norm": 0.5720171958814955,
|
|
"learning_rate": 3.979681318886664e-05,
|
|
"loss": 1.0603725910186768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2662554085254669,
|
|
"step": 167,
|
|
"valid_targets_mean": 14264.0,
|
|
"valid_targets_min": 2853
|
|
},
|
|
{
|
|
"epoch": 0.7164179104477612,
|
|
"grad_norm": 0.5434989869753447,
|
|
"learning_rate": 3.978827400749481e-05,
|
|
"loss": 1.0979516506195068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2543768286705017,
|
|
"step": 168,
|
|
"valid_targets_mean": 14187.2,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 0.720682302771855,
|
|
"grad_norm": 0.5166671494441886,
|
|
"learning_rate": 3.977956001970788e-05,
|
|
"loss": 1.1519384384155273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969434857368469,
|
|
"step": 169,
|
|
"valid_targets_mean": 15474.6,
|
|
"valid_targets_min": 8501
|
|
},
|
|
{
|
|
"epoch": 0.7249466950959488,
|
|
"grad_norm": 0.5303223539428913,
|
|
"learning_rate": 3.977067130248381e-05,
|
|
"loss": 1.0900096893310547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27363914251327515,
|
|
"step": 170,
|
|
"valid_targets_mean": 15222.5,
|
|
"valid_targets_min": 5620
|
|
},
|
|
{
|
|
"epoch": 0.7292110874200426,
|
|
"grad_norm": 0.5625412610722119,
|
|
"learning_rate": 3.9761607934344095e-05,
|
|
"loss": 1.0231143236160278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24777080118656158,
|
|
"step": 171,
|
|
"valid_targets_mean": 13855.9,
|
|
"valid_targets_min": 3112
|
|
},
|
|
{
|
|
"epoch": 0.7334754797441365,
|
|
"grad_norm": 0.603264553548915,
|
|
"learning_rate": 3.975236999535306e-05,
|
|
"loss": 1.0472302436828613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2527484893798828,
|
|
"step": 172,
|
|
"valid_targets_mean": 14650.7,
|
|
"valid_targets_min": 3113
|
|
},
|
|
{
|
|
"epoch": 0.7377398720682303,
|
|
"grad_norm": 0.6014862245959686,
|
|
"learning_rate": 3.974295756711717e-05,
|
|
"loss": 1.0939075946807861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624342143535614,
|
|
"step": 173,
|
|
"valid_targets_mean": 14595.8,
|
|
"valid_targets_min": 2880
|
|
},
|
|
{
|
|
"epoch": 0.7420042643923241,
|
|
"grad_norm": 0.44388702752873965,
|
|
"learning_rate": 3.9733370732784296e-05,
|
|
"loss": 1.115445852279663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28644606471061707,
|
|
"step": 174,
|
|
"valid_targets_mean": 14760.0,
|
|
"valid_targets_min": 2181
|
|
},
|
|
{
|
|
"epoch": 0.746268656716418,
|
|
"grad_norm": 0.4904032202025299,
|
|
"learning_rate": 3.972360957704298e-05,
|
|
"loss": 1.145646333694458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24962620437145233,
|
|
"step": 175,
|
|
"valid_targets_mean": 14682.2,
|
|
"valid_targets_min": 1778
|
|
},
|
|
{
|
|
"epoch": 0.7505330490405118,
|
|
"grad_norm": 0.49307189524330414,
|
|
"learning_rate": 3.97136741861217e-05,
|
|
"loss": 1.0545127391815186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779785990715027,
|
|
"step": 176,
|
|
"valid_targets_mean": 15725.2,
|
|
"valid_targets_min": 10505
|
|
},
|
|
{
|
|
"epoch": 0.7547974413646056,
|
|
"grad_norm": 0.44785683420642936,
|
|
"learning_rate": 3.970356464778808e-05,
|
|
"loss": 1.0895123481750488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27711138129234314,
|
|
"step": 177,
|
|
"valid_targets_mean": 14786.6,
|
|
"valid_targets_min": 2882
|
|
},
|
|
{
|
|
"epoch": 0.7590618336886994,
|
|
"grad_norm": 0.44123981119420197,
|
|
"learning_rate": 3.969328105134817e-05,
|
|
"loss": 1.0779128074645996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2723630368709564,
|
|
"step": 178,
|
|
"valid_targets_mean": 15287.1,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 0.7633262260127932,
|
|
"grad_norm": 0.4838123716306297,
|
|
"learning_rate": 3.9682823487645584e-05,
|
|
"loss": 1.0953731536865234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782648503780365,
|
|
"step": 179,
|
|
"valid_targets_mean": 14615.7,
|
|
"valid_targets_min": 5225
|
|
},
|
|
{
|
|
"epoch": 0.767590618336887,
|
|
"grad_norm": 0.5530489816011777,
|
|
"learning_rate": 3.9672192049060745e-05,
|
|
"loss": 1.0543200969696045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24827225506305695,
|
|
"step": 180,
|
|
"valid_targets_mean": 14464.0,
|
|
"valid_targets_min": 3973
|
|
},
|
|
{
|
|
"epoch": 0.7718550106609808,
|
|
"grad_norm": 0.5225429513663924,
|
|
"learning_rate": 3.966138682951008e-05,
|
|
"loss": 1.091672420501709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28941211104393005,
|
|
"step": 181,
|
|
"valid_targets_mean": 15169.6,
|
|
"valid_targets_min": 3780
|
|
},
|
|
{
|
|
"epoch": 0.7761194029850746,
|
|
"grad_norm": 0.44637484719229226,
|
|
"learning_rate": 3.9650407924445147e-05,
|
|
"loss": 1.1263113021850586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28688526153564453,
|
|
"step": 182,
|
|
"valid_targets_mean": 14721.4,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 0.7803837953091685,
|
|
"grad_norm": 0.45862542771768605,
|
|
"learning_rate": 3.963925543085181e-05,
|
|
"loss": 1.0835044384002686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25946244597435,
|
|
"step": 183,
|
|
"valid_targets_mean": 14252.9,
|
|
"valid_targets_min": 4037
|
|
},
|
|
{
|
|
"epoch": 0.7846481876332623,
|
|
"grad_norm": 0.4952510891744086,
|
|
"learning_rate": 3.96279294472494e-05,
|
|
"loss": 1.001509428024292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24779187142848969,
|
|
"step": 184,
|
|
"valid_targets_mean": 14142.4,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 0.7889125799573561,
|
|
"grad_norm": 0.44195447602621846,
|
|
"learning_rate": 3.961643007368984e-05,
|
|
"loss": 1.0588806867599487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.276114284992218,
|
|
"step": 185,
|
|
"valid_targets_mean": 15403.3,
|
|
"valid_targets_min": 10688
|
|
},
|
|
{
|
|
"epoch": 0.7931769722814499,
|
|
"grad_norm": 0.42766577918859827,
|
|
"learning_rate": 3.960475741175671e-05,
|
|
"loss": 1.1107757091522217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2877672612667084,
|
|
"step": 186,
|
|
"valid_targets_mean": 14154.4,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 0.7974413646055437,
|
|
"grad_norm": 0.5067246882281058,
|
|
"learning_rate": 3.959291156456444e-05,
|
|
"loss": 1.0544629096984863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686496376991272,
|
|
"step": 187,
|
|
"valid_targets_mean": 14308.6,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 0.8017057569296375,
|
|
"grad_norm": 0.6426543954086256,
|
|
"learning_rate": 3.9580892636757334e-05,
|
|
"loss": 1.0391920804977417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26643359661102295,
|
|
"step": 188,
|
|
"valid_targets_mean": 14659.4,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 0.8059701492537313,
|
|
"grad_norm": 0.6310582264944622,
|
|
"learning_rate": 3.9568700734508645e-05,
|
|
"loss": 1.0646073818206787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2615092992782593,
|
|
"step": 189,
|
|
"valid_targets_mean": 14306.6,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 0.8102345415778252,
|
|
"grad_norm": 0.4316920405376593,
|
|
"learning_rate": 3.955633596551967e-05,
|
|
"loss": 1.0984067916870117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925407886505127,
|
|
"step": 190,
|
|
"valid_targets_mean": 14534.1,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 0.814498933901919,
|
|
"grad_norm": 0.5092025373692535,
|
|
"learning_rate": 3.9543798439018776e-05,
|
|
"loss": 1.0774245262145996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26995649933815,
|
|
"step": 191,
|
|
"valid_targets_mean": 14733.8,
|
|
"valid_targets_min": 2367
|
|
},
|
|
{
|
|
"epoch": 0.8187633262260128,
|
|
"grad_norm": 0.6176598186353168,
|
|
"learning_rate": 3.953108826576046e-05,
|
|
"loss": 1.0559810400009155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29765647649765015,
|
|
"step": 192,
|
|
"valid_targets_mean": 15014.3,
|
|
"valid_targets_min": 3991
|
|
},
|
|
{
|
|
"epoch": 0.8230277185501066,
|
|
"grad_norm": 0.4955119409802344,
|
|
"learning_rate": 3.9518205558024334e-05,
|
|
"loss": 1.0619244575500488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24655967950820923,
|
|
"step": 193,
|
|
"valid_targets_mean": 14510.7,
|
|
"valid_targets_min": 2499
|
|
},
|
|
{
|
|
"epoch": 0.8272921108742004,
|
|
"grad_norm": 0.4441924061241526,
|
|
"learning_rate": 3.9505150429614154e-05,
|
|
"loss": 1.0574350357055664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708360552787781,
|
|
"step": 194,
|
|
"valid_targets_mean": 14285.1,
|
|
"valid_targets_min": 4255
|
|
},
|
|
{
|
|
"epoch": 0.8315565031982942,
|
|
"grad_norm": 0.47719053044370424,
|
|
"learning_rate": 3.949192299585681e-05,
|
|
"loss": 1.0607553720474243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23465648293495178,
|
|
"step": 195,
|
|
"valid_targets_mean": 13950.8,
|
|
"valid_targets_min": 3588
|
|
},
|
|
{
|
|
"epoch": 0.835820895522388,
|
|
"grad_norm": 0.467012826021647,
|
|
"learning_rate": 3.9478523373601325e-05,
|
|
"loss": 1.0625135898590088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731807231903076,
|
|
"step": 196,
|
|
"valid_targets_mean": 14909.9,
|
|
"valid_targets_min": 6385
|
|
},
|
|
{
|
|
"epoch": 0.8400852878464818,
|
|
"grad_norm": 0.38085852032238326,
|
|
"learning_rate": 3.946495168121778e-05,
|
|
"loss": 1.0609357357025146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28904902935028076,
|
|
"step": 197,
|
|
"valid_targets_mean": 14388.2,
|
|
"valid_targets_min": 4776
|
|
},
|
|
{
|
|
"epoch": 0.8443496801705757,
|
|
"grad_norm": 0.45711580952928094,
|
|
"learning_rate": 3.9451208038596325e-05,
|
|
"loss": 1.0474504232406616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2546575665473938,
|
|
"step": 198,
|
|
"valid_targets_mean": 14635.1,
|
|
"valid_targets_min": 4017
|
|
},
|
|
{
|
|
"epoch": 0.8486140724946695,
|
|
"grad_norm": 0.42308190621852476,
|
|
"learning_rate": 3.943729256714608e-05,
|
|
"loss": 1.0619412660598755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732556462287903,
|
|
"step": 199,
|
|
"valid_targets_mean": 15327.0,
|
|
"valid_targets_min": 9625
|
|
},
|
|
{
|
|
"epoch": 0.8528784648187633,
|
|
"grad_norm": 0.50332299139726,
|
|
"learning_rate": 3.942320538979408e-05,
|
|
"loss": 1.0523178577423096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26731598377227783,
|
|
"step": 200,
|
|
"valid_targets_mean": 15212.5,
|
|
"valid_targets_min": 6862
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 0.40579341242390754,
|
|
"learning_rate": 3.9408946630984144e-05,
|
|
"loss": 1.0020947456359863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605958878993988,
|
|
"step": 201,
|
|
"valid_targets_mean": 15162.9,
|
|
"valid_targets_min": 6042
|
|
},
|
|
{
|
|
"epoch": 0.8614072494669509,
|
|
"grad_norm": 0.4911450057574764,
|
|
"learning_rate": 3.939451641667587e-05,
|
|
"loss": 1.0410255193710327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670213580131531,
|
|
"step": 202,
|
|
"valid_targets_mean": 15394.2,
|
|
"valid_targets_min": 5438
|
|
},
|
|
{
|
|
"epoch": 0.8656716417910447,
|
|
"grad_norm": 0.503110790214135,
|
|
"learning_rate": 3.937991487434342e-05,
|
|
"loss": 1.0642400979995728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623797655105591,
|
|
"step": 203,
|
|
"valid_targets_mean": 14678.1,
|
|
"valid_targets_min": 6220
|
|
},
|
|
{
|
|
"epoch": 0.8699360341151386,
|
|
"grad_norm": 0.4436199136945869,
|
|
"learning_rate": 3.9365142132974484e-05,
|
|
"loss": 1.1306285858154297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3121783137321472,
|
|
"step": 204,
|
|
"valid_targets_mean": 14939.2,
|
|
"valid_targets_min": 3887
|
|
},
|
|
{
|
|
"epoch": 0.8742004264392325,
|
|
"grad_norm": 0.5857979238570923,
|
|
"learning_rate": 3.935019832306905e-05,
|
|
"loss": 1.0648304224014282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2739461660385132,
|
|
"step": 205,
|
|
"valid_targets_mean": 14681.6,
|
|
"valid_targets_min": 5499
|
|
},
|
|
{
|
|
"epoch": 0.8784648187633263,
|
|
"grad_norm": 0.585432048571984,
|
|
"learning_rate": 3.933508357663832e-05,
|
|
"loss": 1.0731347799301147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741486430168152,
|
|
"step": 206,
|
|
"valid_targets_mean": 14105.4,
|
|
"valid_targets_min": 4262
|
|
},
|
|
{
|
|
"epoch": 0.8827292110874201,
|
|
"grad_norm": 0.4665392758071425,
|
|
"learning_rate": 3.9319798027203544e-05,
|
|
"loss": 1.040523886680603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27001869678497314,
|
|
"step": 207,
|
|
"valid_targets_mean": 14148.8,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 0.8869936034115139,
|
|
"grad_norm": 0.4909226177806315,
|
|
"learning_rate": 3.930434180979478e-05,
|
|
"loss": 1.0693047046661377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26223987340927124,
|
|
"step": 208,
|
|
"valid_targets_mean": 13855.8,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 0.8912579957356077,
|
|
"grad_norm": 0.5572219951310814,
|
|
"learning_rate": 3.928871506094975e-05,
|
|
"loss": 1.062949538230896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771598994731903,
|
|
"step": 209,
|
|
"valid_targets_mean": 15001.1,
|
|
"valid_targets_min": 11504
|
|
},
|
|
{
|
|
"epoch": 0.8955223880597015,
|
|
"grad_norm": 0.5302292186399175,
|
|
"learning_rate": 3.927291791871264e-05,
|
|
"loss": 1.0810881853103638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25618427991867065,
|
|
"step": 210,
|
|
"valid_targets_mean": 14553.4,
|
|
"valid_targets_min": 3171
|
|
},
|
|
{
|
|
"epoch": 0.8997867803837953,
|
|
"grad_norm": 0.6242922450550126,
|
|
"learning_rate": 3.925695052263284e-05,
|
|
"loss": 1.069779634475708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2580450475215912,
|
|
"step": 211,
|
|
"valid_targets_mean": 14692.5,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 0.9040511727078892,
|
|
"grad_norm": 0.4536482512535751,
|
|
"learning_rate": 3.924081301376375e-05,
|
|
"loss": 1.0439661741256714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25196996331214905,
|
|
"step": 212,
|
|
"valid_targets_mean": 14521.2,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 0.908315565031983,
|
|
"grad_norm": 0.510283174341876,
|
|
"learning_rate": 3.9224505534661525e-05,
|
|
"loss": 1.0576764345169067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23973239958286285,
|
|
"step": 213,
|
|
"valid_targets_mean": 14508.8,
|
|
"valid_targets_min": 4589
|
|
},
|
|
{
|
|
"epoch": 0.9125799573560768,
|
|
"grad_norm": 0.49658713371562935,
|
|
"learning_rate": 3.92080282293838e-05,
|
|
"loss": 1.077567458152771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822937071323395,
|
|
"step": 214,
|
|
"valid_targets_mean": 14945.4,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 0.9168443496801706,
|
|
"grad_norm": 0.45794284802940943,
|
|
"learning_rate": 3.9191381243488417e-05,
|
|
"loss": 1.0571658611297607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27349892258644104,
|
|
"step": 215,
|
|
"valid_targets_mean": 14692.0,
|
|
"valid_targets_min": 3666
|
|
},
|
|
{
|
|
"epoch": 0.9211087420042644,
|
|
"grad_norm": 0.6141744935366914,
|
|
"learning_rate": 3.9174564724032167e-05,
|
|
"loss": 1.073103427886963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23693981766700745,
|
|
"step": 216,
|
|
"valid_targets_mean": 13916.4,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 0.9253731343283582,
|
|
"grad_norm": 0.4055586667062206,
|
|
"learning_rate": 3.9157578819569455e-05,
|
|
"loss": 1.0518367290496826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26126977801322937,
|
|
"step": 217,
|
|
"valid_targets_mean": 14793.8,
|
|
"valid_targets_min": 5449
|
|
},
|
|
{
|
|
"epoch": 0.929637526652452,
|
|
"grad_norm": 0.54158451570541,
|
|
"learning_rate": 3.9140423680151036e-05,
|
|
"loss": 1.0468168258666992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635643184185028,
|
|
"step": 218,
|
|
"valid_targets_mean": 14376.8,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 0.9339019189765458,
|
|
"grad_norm": 0.5445967854891312,
|
|
"learning_rate": 3.9123099457322625e-05,
|
|
"loss": 1.103074312210083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2882195711135864,
|
|
"step": 219,
|
|
"valid_targets_mean": 14753.6,
|
|
"valid_targets_min": 2483
|
|
},
|
|
{
|
|
"epoch": 0.9381663113006397,
|
|
"grad_norm": 0.4930856401656068,
|
|
"learning_rate": 3.9105606304123605e-05,
|
|
"loss": 1.0753755569458008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556943893432617,
|
|
"step": 220,
|
|
"valid_targets_mean": 15313.4,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 0.9424307036247335,
|
|
"grad_norm": 0.5440345868038566,
|
|
"learning_rate": 3.908794437508567e-05,
|
|
"loss": 1.0632359981536865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605021297931671,
|
|
"step": 221,
|
|
"valid_targets_mean": 14588.6,
|
|
"valid_targets_min": 7319
|
|
},
|
|
{
|
|
"epoch": 0.9466950959488273,
|
|
"grad_norm": 0.44746588450137675,
|
|
"learning_rate": 3.907011382623145e-05,
|
|
"loss": 1.0761289596557617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2692149877548218,
|
|
"step": 222,
|
|
"valid_targets_mean": 14791.3,
|
|
"valid_targets_min": 2399
|
|
},
|
|
{
|
|
"epoch": 0.9509594882729211,
|
|
"grad_norm": 0.4271710366516504,
|
|
"learning_rate": 3.905211481507313e-05,
|
|
"loss": 1.0655196905136108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2823115885257721,
|
|
"step": 223,
|
|
"valid_targets_mean": 14754.5,
|
|
"valid_targets_min": 5518
|
|
},
|
|
{
|
|
"epoch": 0.9552238805970149,
|
|
"grad_norm": 0.47524235314290736,
|
|
"learning_rate": 3.903394750061106e-05,
|
|
"loss": 1.0658456087112427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689781188964844,
|
|
"step": 224,
|
|
"valid_targets_mean": 14576.5,
|
|
"valid_targets_min": 3004
|
|
},
|
|
{
|
|
"epoch": 0.9594882729211087,
|
|
"grad_norm": 0.4841575096031807,
|
|
"learning_rate": 3.9015612043332375e-05,
|
|
"loss": 1.039156198501587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707943916320801,
|
|
"step": 225,
|
|
"valid_targets_mean": 14913.6,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 0.9637526652452025,
|
|
"grad_norm": 0.4723350160667985,
|
|
"learning_rate": 3.8997108605209535e-05,
|
|
"loss": 1.038079857826233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25404903292655945,
|
|
"step": 226,
|
|
"valid_targets_mean": 14266.9,
|
|
"valid_targets_min": 5364
|
|
},
|
|
{
|
|
"epoch": 0.9680170575692963,
|
|
"grad_norm": 0.4997955653542789,
|
|
"learning_rate": 3.897843734969891e-05,
|
|
"loss": 1.0137856006622314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24287495017051697,
|
|
"step": 227,
|
|
"valid_targets_mean": 15244.7,
|
|
"valid_targets_min": 8682
|
|
},
|
|
{
|
|
"epoch": 0.9722814498933902,
|
|
"grad_norm": 0.47450567966821877,
|
|
"learning_rate": 3.895959844173937e-05,
|
|
"loss": 1.06816565990448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828211188316345,
|
|
"step": 228,
|
|
"valid_targets_mean": 14613.2,
|
|
"valid_targets_min": 5184
|
|
},
|
|
{
|
|
"epoch": 0.976545842217484,
|
|
"grad_norm": 0.48151688570693435,
|
|
"learning_rate": 3.8940592047750774e-05,
|
|
"loss": 1.0650134086608887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23731546103954315,
|
|
"step": 229,
|
|
"valid_targets_mean": 13796.5,
|
|
"valid_targets_min": 4055
|
|
},
|
|
{
|
|
"epoch": 0.9808102345415778,
|
|
"grad_norm": 0.5110497665972306,
|
|
"learning_rate": 3.892141833563255e-05,
|
|
"loss": 1.0774457454681396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27977144718170166,
|
|
"step": 230,
|
|
"valid_targets_mean": 15119.4,
|
|
"valid_targets_min": 6852
|
|
},
|
|
{
|
|
"epoch": 0.9850746268656716,
|
|
"grad_norm": 0.47899579675299286,
|
|
"learning_rate": 3.8902077474762155e-05,
|
|
"loss": 1.0361413955688477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26833194494247437,
|
|
"step": 231,
|
|
"valid_targets_mean": 14657.4,
|
|
"valid_targets_min": 2488
|
|
},
|
|
{
|
|
"epoch": 0.9893390191897654,
|
|
"grad_norm": 0.3937117036819753,
|
|
"learning_rate": 3.888256963599364e-05,
|
|
"loss": 1.0561443567276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2683749496936798,
|
|
"step": 232,
|
|
"valid_targets_mean": 13907.6,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 0.9936034115138592,
|
|
"grad_norm": 0.43393194366644927,
|
|
"learning_rate": 3.886289499165609e-05,
|
|
"loss": 1.0482161045074463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29729658365249634,
|
|
"step": 233,
|
|
"valid_targets_mean": 15261.6,
|
|
"valid_targets_min": 6414
|
|
},
|
|
{
|
|
"epoch": 0.997867803837953,
|
|
"grad_norm": 0.4219118831004821,
|
|
"learning_rate": 3.884305371555215e-05,
|
|
"loss": 1.079267978668213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27207815647125244,
|
|
"step": 234,
|
|
"valid_targets_mean": 14794.5,
|
|
"valid_targets_min": 7270
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.4741774938868457,
|
|
"learning_rate": 3.882304598295643e-05,
|
|
"loss": 1.1089015007019043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5403157472610474,
|
|
"step": 235,
|
|
"valid_targets_mean": 14646.9,
|
|
"valid_targets_min": 4497
|
|
},
|
|
{
|
|
"epoch": 1.004264392324094,
|
|
"grad_norm": 0.4622007921336581,
|
|
"learning_rate": 3.880287197061402e-05,
|
|
"loss": 1.0656170845031738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2538582682609558,
|
|
"step": 236,
|
|
"valid_targets_mean": 14099.9,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 1.0085287846481876,
|
|
"grad_norm": 0.5412462216316746,
|
|
"learning_rate": 3.878253185673888e-05,
|
|
"loss": 1.0457720756530762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650813162326813,
|
|
"step": 237,
|
|
"valid_targets_mean": 14961.6,
|
|
"valid_targets_min": 4982
|
|
},
|
|
{
|
|
"epoch": 1.0127931769722816,
|
|
"grad_norm": 0.47263381136910665,
|
|
"learning_rate": 3.876202582101229e-05,
|
|
"loss": 1.0488179922103882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25556907057762146,
|
|
"step": 238,
|
|
"valid_targets_mean": 14361.9,
|
|
"valid_targets_min": 3011
|
|
},
|
|
{
|
|
"epoch": 1.0170575692963753,
|
|
"grad_norm": 0.38997657883095843,
|
|
"learning_rate": 3.874135404458125e-05,
|
|
"loss": 1.0531270503997803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2591373920440674,
|
|
"step": 239,
|
|
"valid_targets_mean": 13943.9,
|
|
"valid_targets_min": 2264
|
|
},
|
|
{
|
|
"epoch": 1.0213219616204692,
|
|
"grad_norm": 0.39461641289761845,
|
|
"learning_rate": 3.8720516710056905e-05,
|
|
"loss": 1.0718858242034912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26196926832199097,
|
|
"step": 240,
|
|
"valid_targets_mean": 14627.2,
|
|
"valid_targets_min": 2267
|
|
},
|
|
{
|
|
"epoch": 1.0255863539445629,
|
|
"grad_norm": 0.7550978916414514,
|
|
"learning_rate": 3.8699514001512885e-05,
|
|
"loss": 1.0579508543014526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2687267065048218,
|
|
"step": 241,
|
|
"valid_targets_mean": 15091.4,
|
|
"valid_targets_min": 2326
|
|
},
|
|
{
|
|
"epoch": 1.0298507462686568,
|
|
"grad_norm": 0.5383994900716123,
|
|
"learning_rate": 3.867834610448374e-05,
|
|
"loss": 1.0267250537872314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597755789756775,
|
|
"step": 242,
|
|
"valid_targets_mean": 14510.1,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 1.0341151385927505,
|
|
"grad_norm": 0.5182326725804484,
|
|
"learning_rate": 3.865701320596324e-05,
|
|
"loss": 1.0431740283966064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2277173399925232,
|
|
"step": 243,
|
|
"valid_targets_mean": 13234.3,
|
|
"valid_targets_min": 2697
|
|
},
|
|
{
|
|
"epoch": 1.0383795309168444,
|
|
"grad_norm": 0.4390983742748955,
|
|
"learning_rate": 3.863551549440277e-05,
|
|
"loss": 1.0455431938171387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23365628719329834,
|
|
"step": 244,
|
|
"valid_targets_mean": 13984.1,
|
|
"valid_targets_min": 2184
|
|
},
|
|
{
|
|
"epoch": 1.0426439232409381,
|
|
"grad_norm": 0.4571037150851365,
|
|
"learning_rate": 3.861385315970964e-05,
|
|
"loss": 1.0151028633117676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24773861467838287,
|
|
"step": 245,
|
|
"valid_targets_mean": 13774.0,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 1.046908315565032,
|
|
"grad_norm": 0.5004729362438585,
|
|
"learning_rate": 3.859202639324542e-05,
|
|
"loss": 1.0368274450302124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24919384717941284,
|
|
"step": 246,
|
|
"valid_targets_mean": 13648.0,
|
|
"valid_targets_min": 2810
|
|
},
|
|
{
|
|
"epoch": 1.0511727078891258,
|
|
"grad_norm": 0.5252585561240584,
|
|
"learning_rate": 3.8570035387824214e-05,
|
|
"loss": 1.0297768115997314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2419815957546234,
|
|
"step": 247,
|
|
"valid_targets_mean": 14391.8,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 1.0554371002132197,
|
|
"grad_norm": 0.4912267733930261,
|
|
"learning_rate": 3.8547880337711036e-05,
|
|
"loss": 1.0422364473342896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24009577929973602,
|
|
"step": 248,
|
|
"valid_targets_mean": 13562.6,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 1.0597014925373134,
|
|
"grad_norm": 0.49163471430717187,
|
|
"learning_rate": 3.8525561438620016e-05,
|
|
"loss": 1.0926791429519653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3109884262084961,
|
|
"step": 249,
|
|
"valid_targets_mean": 15091.4,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 1.0639658848614073,
|
|
"grad_norm": 0.47094911522426963,
|
|
"learning_rate": 3.850307888771269e-05,
|
|
"loss": 1.0230929851531982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590283453464508,
|
|
"step": 250,
|
|
"valid_targets_mean": 14780.2,
|
|
"valid_targets_min": 8635
|
|
},
|
|
{
|
|
"epoch": 1.068230277185501,
|
|
"grad_norm": 0.5044963985779186,
|
|
"learning_rate": 3.848043288359629e-05,
|
|
"loss": 1.036175012588501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24030742049217224,
|
|
"step": 251,
|
|
"valid_targets_mean": 15008.4,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 1.072494669509595,
|
|
"grad_norm": 0.3761751625602391,
|
|
"learning_rate": 3.8457623626321944e-05,
|
|
"loss": 1.0303536653518677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2748067080974579,
|
|
"step": 252,
|
|
"valid_targets_mean": 15544.8,
|
|
"valid_targets_min": 9842
|
|
},
|
|
{
|
|
"epoch": 1.0767590618336886,
|
|
"grad_norm": 0.46237616560399947,
|
|
"learning_rate": 3.843465131738296e-05,
|
|
"loss": 1.0456784963607788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2787587344646454,
|
|
"step": 253,
|
|
"valid_targets_mean": 15859.3,
|
|
"valid_targets_min": 11105
|
|
},
|
|
{
|
|
"epoch": 1.0810234541577826,
|
|
"grad_norm": 0.526445998429428,
|
|
"learning_rate": 3.8411516159713e-05,
|
|
"loss": 1.0252015590667725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26544487476348877,
|
|
"step": 254,
|
|
"valid_targets_mean": 14621.3,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 1.0852878464818763,
|
|
"grad_norm": 0.45280653386101793,
|
|
"learning_rate": 3.838821835768431e-05,
|
|
"loss": 0.9940991401672363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24375182390213013,
|
|
"step": 255,
|
|
"valid_targets_mean": 14590.2,
|
|
"valid_targets_min": 2797
|
|
},
|
|
{
|
|
"epoch": 1.0895522388059702,
|
|
"grad_norm": 0.36295386784256867,
|
|
"learning_rate": 3.83647581171059e-05,
|
|
"loss": 1.0359275341033936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22599682211875916,
|
|
"step": 256,
|
|
"valid_targets_mean": 14038.0,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 1.0938166311300639,
|
|
"grad_norm": 0.43034609935829254,
|
|
"learning_rate": 3.8341135645221744e-05,
|
|
"loss": 1.0185537338256836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2548608183860779,
|
|
"step": 257,
|
|
"valid_targets_mean": 14797.3,
|
|
"valid_targets_min": 4346
|
|
},
|
|
{
|
|
"epoch": 1.0980810234541578,
|
|
"grad_norm": 0.4702705765244186,
|
|
"learning_rate": 3.831735115070895e-05,
|
|
"loss": 1.0083509683609009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25162073969841003,
|
|
"step": 258,
|
|
"valid_targets_mean": 14662.9,
|
|
"valid_targets_min": 1517
|
|
},
|
|
{
|
|
"epoch": 1.1023454157782515,
|
|
"grad_norm": 0.4389974304477306,
|
|
"learning_rate": 3.8293404843675904e-05,
|
|
"loss": 1.0284669399261475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2285890281200409,
|
|
"step": 259,
|
|
"valid_targets_mean": 14888.3,
|
|
"valid_targets_min": 6449
|
|
},
|
|
{
|
|
"epoch": 1.1066098081023454,
|
|
"grad_norm": 0.45458157547957195,
|
|
"learning_rate": 3.8269296935660395e-05,
|
|
"loss": 1.033240795135498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23926112055778503,
|
|
"step": 260,
|
|
"valid_targets_mean": 14052.8,
|
|
"valid_targets_min": 4177
|
|
},
|
|
{
|
|
"epoch": 1.1108742004264391,
|
|
"grad_norm": 0.5163212186363417,
|
|
"learning_rate": 3.82450276396278e-05,
|
|
"loss": 1.038989782333374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630407214164734,
|
|
"step": 261,
|
|
"valid_targets_mean": 14497.7,
|
|
"valid_targets_min": 5676
|
|
},
|
|
{
|
|
"epoch": 1.115138592750533,
|
|
"grad_norm": 0.4489525935366427,
|
|
"learning_rate": 3.822059716996916e-05,
|
|
"loss": 1.0076414346694946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547253966331482,
|
|
"step": 262,
|
|
"valid_targets_mean": 14696.9,
|
|
"valid_targets_min": 3243
|
|
},
|
|
{
|
|
"epoch": 1.1194029850746268,
|
|
"grad_norm": 0.3654576140732117,
|
|
"learning_rate": 3.819600574249929e-05,
|
|
"loss": 0.9889144897460938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25444215536117554,
|
|
"step": 263,
|
|
"valid_targets_mean": 15884.4,
|
|
"valid_targets_min": 11415
|
|
},
|
|
{
|
|
"epoch": 1.1236673773987207,
|
|
"grad_norm": 0.5667279865734862,
|
|
"learning_rate": 3.817125357445489e-05,
|
|
"loss": 1.0545893907546997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2661871314048767,
|
|
"step": 264,
|
|
"valid_targets_mean": 14409.9,
|
|
"valid_targets_min": 2985
|
|
},
|
|
{
|
|
"epoch": 1.1279317697228146,
|
|
"grad_norm": 0.5161974743688891,
|
|
"learning_rate": 3.814634088449261e-05,
|
|
"loss": 1.0141961574554443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23329216241836548,
|
|
"step": 265,
|
|
"valid_targets_mean": 14820.0,
|
|
"valid_targets_min": 5142
|
|
},
|
|
{
|
|
"epoch": 1.1321961620469083,
|
|
"grad_norm": 0.3878474355707623,
|
|
"learning_rate": 3.812126789268712e-05,
|
|
"loss": 1.04948091506958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637542486190796,
|
|
"step": 266,
|
|
"valid_targets_mean": 15534.1,
|
|
"valid_targets_min": 10907
|
|
},
|
|
{
|
|
"epoch": 1.136460554371002,
|
|
"grad_norm": 0.4751415385105907,
|
|
"learning_rate": 3.80960348205292e-05,
|
|
"loss": 1.0847911834716797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24702657759189606,
|
|
"step": 267,
|
|
"valid_targets_mean": 13681.9,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 1.140724946695096,
|
|
"grad_norm": 0.4566792422829083,
|
|
"learning_rate": 3.807064189092372e-05,
|
|
"loss": 1.0932799577713013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27632957696914673,
|
|
"step": 268,
|
|
"valid_targets_mean": 14011.6,
|
|
"valid_targets_min": 3608
|
|
},
|
|
{
|
|
"epoch": 1.1449893390191899,
|
|
"grad_norm": 0.41357758496514435,
|
|
"learning_rate": 3.804508932818771e-05,
|
|
"loss": 0.9902833104133606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24217838048934937,
|
|
"step": 269,
|
|
"valid_targets_mean": 15122.8,
|
|
"valid_targets_min": 8015
|
|
},
|
|
{
|
|
"epoch": 1.1492537313432836,
|
|
"grad_norm": 0.43394680222085963,
|
|
"learning_rate": 3.801937735804838e-05,
|
|
"loss": 1.0347496271133423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25377392768859863,
|
|
"step": 270,
|
|
"valid_targets_mean": 14167.3,
|
|
"valid_targets_min": 3780
|
|
},
|
|
{
|
|
"epoch": 1.1535181236673775,
|
|
"grad_norm": 0.3989776290799155,
|
|
"learning_rate": 3.799350620764114e-05,
|
|
"loss": 1.0161268711090088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24974288046360016,
|
|
"step": 271,
|
|
"valid_targets_mean": 14951.7,
|
|
"valid_targets_min": 4915
|
|
},
|
|
{
|
|
"epoch": 1.1577825159914712,
|
|
"grad_norm": 0.5056192555570184,
|
|
"learning_rate": 3.7967476105507535e-05,
|
|
"loss": 1.0036935806274414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2507050037384033,
|
|
"step": 272,
|
|
"valid_targets_mean": 14177.5,
|
|
"valid_targets_min": 3884
|
|
},
|
|
{
|
|
"epoch": 1.1620469083155651,
|
|
"grad_norm": 0.41550802051870955,
|
|
"learning_rate": 3.7941287281593284e-05,
|
|
"loss": 1.0130646228790283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2885507345199585,
|
|
"step": 273,
|
|
"valid_targets_mean": 15371.1,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 1.1663113006396588,
|
|
"grad_norm": 0.4330835743420702,
|
|
"learning_rate": 3.7914939967246227e-05,
|
|
"loss": 1.050309419631958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25370892882347107,
|
|
"step": 274,
|
|
"valid_targets_mean": 14664.4,
|
|
"valid_targets_min": 5128
|
|
},
|
|
{
|
|
"epoch": 1.1705756929637527,
|
|
"grad_norm": 0.37795495835520443,
|
|
"learning_rate": 3.7888434395214285e-05,
|
|
"loss": 1.0400540828704834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23779457807540894,
|
|
"step": 275,
|
|
"valid_targets_mean": 13460.7,
|
|
"valid_targets_min": 3188
|
|
},
|
|
{
|
|
"epoch": 1.1748400852878464,
|
|
"grad_norm": 0.45769416332354834,
|
|
"learning_rate": 3.786177079964339e-05,
|
|
"loss": 1.039176344871521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263217568397522,
|
|
"step": 276,
|
|
"valid_targets_mean": 14753.7,
|
|
"valid_targets_min": 6232
|
|
},
|
|
{
|
|
"epoch": 1.1791044776119404,
|
|
"grad_norm": 0.3740498605812089,
|
|
"learning_rate": 3.783494941607544e-05,
|
|
"loss": 1.0600132942199707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25907689332962036,
|
|
"step": 277,
|
|
"valid_targets_mean": 13945.5,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 1.183368869936034,
|
|
"grad_norm": 0.406086619075766,
|
|
"learning_rate": 3.780797048144621e-05,
|
|
"loss": 1.0430244207382202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25081393122673035,
|
|
"step": 278,
|
|
"valid_targets_mean": 14387.9,
|
|
"valid_targets_min": 7158
|
|
},
|
|
{
|
|
"epoch": 1.187633262260128,
|
|
"grad_norm": 0.44179147581203665,
|
|
"learning_rate": 3.7780834234083236e-05,
|
|
"loss": 1.0505211353302002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2703845798969269,
|
|
"step": 279,
|
|
"valid_targets_mean": 14935.3,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 1.1918976545842217,
|
|
"grad_norm": 0.34772722945368506,
|
|
"learning_rate": 3.775354091370376e-05,
|
|
"loss": 1.0167843103408813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26054152846336365,
|
|
"step": 280,
|
|
"valid_targets_mean": 15610.1,
|
|
"valid_targets_min": 9687
|
|
},
|
|
{
|
|
"epoch": 1.1961620469083156,
|
|
"grad_norm": 0.42499175344396206,
|
|
"learning_rate": 3.772609076141255e-05,
|
|
"loss": 0.9994305968284607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2601446509361267,
|
|
"step": 281,
|
|
"valid_targets_mean": 14643.5,
|
|
"valid_targets_min": 6660
|
|
},
|
|
{
|
|
"epoch": 1.2004264392324093,
|
|
"grad_norm": 0.4122491999777202,
|
|
"learning_rate": 3.769848401969982e-05,
|
|
"loss": 1.0300343036651611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25802984833717346,
|
|
"step": 282,
|
|
"valid_targets_mean": 15233.8,
|
|
"valid_targets_min": 9758
|
|
},
|
|
{
|
|
"epoch": 1.2046908315565032,
|
|
"grad_norm": 0.3423896403457907,
|
|
"learning_rate": 3.767072093243907e-05,
|
|
"loss": 1.0937626361846924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2666207551956177,
|
|
"step": 283,
|
|
"valid_targets_mean": 14910.6,
|
|
"valid_targets_min": 4441
|
|
},
|
|
{
|
|
"epoch": 1.208955223880597,
|
|
"grad_norm": 0.43169596246361014,
|
|
"learning_rate": 3.7642801744884915e-05,
|
|
"loss": 1.1199488639831543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26645928621292114,
|
|
"step": 284,
|
|
"valid_targets_mean": 14378.9,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 1.2132196162046909,
|
|
"grad_norm": 0.35407685929099036,
|
|
"learning_rate": 3.761472670367096e-05,
|
|
"loss": 0.9989821910858154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23391807079315186,
|
|
"step": 285,
|
|
"valid_targets_mean": 14860.0,
|
|
"valid_targets_min": 3398
|
|
},
|
|
{
|
|
"epoch": 1.2174840085287846,
|
|
"grad_norm": 0.3762359119106563,
|
|
"learning_rate": 3.758649605680758e-05,
|
|
"loss": 1.0034101009368896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2448192536830902,
|
|
"step": 286,
|
|
"valid_targets_mean": 14400.8,
|
|
"valid_targets_min": 7174
|
|
},
|
|
{
|
|
"epoch": 1.2217484008528785,
|
|
"grad_norm": 0.3704421772248293,
|
|
"learning_rate": 3.755811005367974e-05,
|
|
"loss": 1.0341459512710571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650884985923767,
|
|
"step": 287,
|
|
"valid_targets_mean": 15103.7,
|
|
"valid_targets_min": 8717
|
|
},
|
|
{
|
|
"epoch": 1.2260127931769722,
|
|
"grad_norm": 0.36882767392001164,
|
|
"learning_rate": 3.752956894504481e-05,
|
|
"loss": 1.0364457368850708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2381543070077896,
|
|
"step": 288,
|
|
"valid_targets_mean": 13118.1,
|
|
"valid_targets_min": 4521
|
|
},
|
|
{
|
|
"epoch": 1.2302771855010661,
|
|
"grad_norm": 0.4493535904693946,
|
|
"learning_rate": 3.750087298303033e-05,
|
|
"loss": 1.0223348140716553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26172977685928345,
|
|
"step": 289,
|
|
"valid_targets_mean": 14181.2,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 1.2345415778251598,
|
|
"grad_norm": 0.39935701379581295,
|
|
"learning_rate": 3.7472022421131795e-05,
|
|
"loss": 1.0401383638381958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2792399525642395,
|
|
"step": 290,
|
|
"valid_targets_mean": 15071.4,
|
|
"valid_targets_min": 7071
|
|
},
|
|
{
|
|
"epoch": 1.2388059701492538,
|
|
"grad_norm": 0.4646260025537836,
|
|
"learning_rate": 3.7443017514210406e-05,
|
|
"loss": 1.0574973821640015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24841152131557465,
|
|
"step": 291,
|
|
"valid_targets_mean": 14662.9,
|
|
"valid_targets_min": 3892
|
|
},
|
|
{
|
|
"epoch": 1.2430703624733475,
|
|
"grad_norm": 0.49151129496218726,
|
|
"learning_rate": 3.7413858518490825e-05,
|
|
"loss": 1.0782525539398193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772843837738037,
|
|
"step": 292,
|
|
"valid_targets_mean": 13855.1,
|
|
"valid_targets_min": 3518
|
|
},
|
|
{
|
|
"epoch": 1.2473347547974414,
|
|
"grad_norm": 0.4568425766987167,
|
|
"learning_rate": 3.7384545691558895e-05,
|
|
"loss": 1.0356574058532715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25376325845718384,
|
|
"step": 293,
|
|
"valid_targets_mean": 14714.8,
|
|
"valid_targets_min": 6099
|
|
},
|
|
{
|
|
"epoch": 1.251599147121535,
|
|
"grad_norm": 0.5054678275792683,
|
|
"learning_rate": 3.735507929235941e-05,
|
|
"loss": 1.0234094858169556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26949718594551086,
|
|
"step": 294,
|
|
"valid_targets_mean": 14589.5,
|
|
"valid_targets_min": 2832
|
|
},
|
|
{
|
|
"epoch": 1.255863539445629,
|
|
"grad_norm": 0.46337299972789686,
|
|
"learning_rate": 3.732545958119378e-05,
|
|
"loss": 1.0196683406829834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2528137266635895,
|
|
"step": 295,
|
|
"valid_targets_mean": 14854.3,
|
|
"valid_targets_min": 6387
|
|
},
|
|
{
|
|
"epoch": 1.260127931769723,
|
|
"grad_norm": 0.464708245567803,
|
|
"learning_rate": 3.729568681971774e-05,
|
|
"loss": 1.0523638725280762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24605122208595276,
|
|
"step": 296,
|
|
"valid_targets_mean": 13976.9,
|
|
"valid_targets_min": 4314
|
|
},
|
|
{
|
|
"epoch": 1.2643923240938166,
|
|
"grad_norm": 0.3473961217240767,
|
|
"learning_rate": 3.726576127093905e-05,
|
|
"loss": 1.0757787227630615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2618931233882904,
|
|
"step": 297,
|
|
"valid_targets_mean": 15158.6,
|
|
"valid_targets_min": 4461
|
|
},
|
|
{
|
|
"epoch": 1.2686567164179103,
|
|
"grad_norm": 0.42594658697452786,
|
|
"learning_rate": 3.7235683199215177e-05,
|
|
"loss": 1.0215682983398438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23267894983291626,
|
|
"step": 298,
|
|
"valid_targets_mean": 13554.0,
|
|
"valid_targets_min": 3668
|
|
},
|
|
{
|
|
"epoch": 1.2729211087420043,
|
|
"grad_norm": 0.5129820211225081,
|
|
"learning_rate": 3.7205452870250944e-05,
|
|
"loss": 1.041092038154602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24685560166835785,
|
|
"step": 299,
|
|
"valid_targets_mean": 15081.8,
|
|
"valid_targets_min": 5372
|
|
},
|
|
{
|
|
"epoch": 1.2771855010660982,
|
|
"grad_norm": 0.4436735564383416,
|
|
"learning_rate": 3.7175070551096204e-05,
|
|
"loss": 0.9904603362083435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2435985505580902,
|
|
"step": 300,
|
|
"valid_targets_mean": 14829.8,
|
|
"valid_targets_min": 5064
|
|
},
|
|
{
|
|
"epoch": 1.2814498933901919,
|
|
"grad_norm": 0.4271440597388887,
|
|
"learning_rate": 3.7144536510143436e-05,
|
|
"loss": 1.0471582412719727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27325162291526794,
|
|
"step": 301,
|
|
"valid_targets_mean": 14287.4,
|
|
"valid_targets_min": 1719
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 0.4403765338098149,
|
|
"learning_rate": 3.711385101712544e-05,
|
|
"loss": 1.0415706634521484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25459718704223633,
|
|
"step": 302,
|
|
"valid_targets_mean": 14111.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.2899786780383795,
|
|
"grad_norm": 0.5454189109172357,
|
|
"learning_rate": 3.708301434311289e-05,
|
|
"loss": 1.0424050092697144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21341678500175476,
|
|
"step": 303,
|
|
"valid_targets_mean": 13497.2,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 1.2942430703624734,
|
|
"grad_norm": 0.47347115350572755,
|
|
"learning_rate": 3.7052026760511996e-05,
|
|
"loss": 1.0039793252944946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27036330103874207,
|
|
"step": 304,
|
|
"valid_targets_mean": 14671.9,
|
|
"valid_targets_min": 4066
|
|
},
|
|
{
|
|
"epoch": 1.2985074626865671,
|
|
"grad_norm": 0.4817211913546334,
|
|
"learning_rate": 3.7020888543062046e-05,
|
|
"loss": 1.004643440246582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28159070014953613,
|
|
"step": 305,
|
|
"valid_targets_mean": 15683.1,
|
|
"valid_targets_min": 6707
|
|
},
|
|
{
|
|
"epoch": 1.302771855010661,
|
|
"grad_norm": 0.4931345243425834,
|
|
"learning_rate": 3.6989599965833024e-05,
|
|
"loss": 1.0794950723648071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30016955733299255,
|
|
"step": 306,
|
|
"valid_targets_mean": 15442.5,
|
|
"valid_targets_min": 7315
|
|
},
|
|
{
|
|
"epoch": 1.3070362473347548,
|
|
"grad_norm": 0.4649670452998378,
|
|
"learning_rate": 3.695816130522317e-05,
|
|
"loss": 1.0607266426086426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25712451338768005,
|
|
"step": 307,
|
|
"valid_targets_mean": 13909.1,
|
|
"valid_targets_min": 3271
|
|
},
|
|
{
|
|
"epoch": 1.3113006396588487,
|
|
"grad_norm": 0.503339331796154,
|
|
"learning_rate": 3.692657283895651e-05,
|
|
"loss": 1.0296015739440918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27607113122940063,
|
|
"step": 308,
|
|
"valid_targets_mean": 14565.0,
|
|
"valid_targets_min": 2334
|
|
},
|
|
{
|
|
"epoch": 1.3155650319829424,
|
|
"grad_norm": 0.49733799334589246,
|
|
"learning_rate": 3.689483484608048e-05,
|
|
"loss": 1.0613747835159302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25526076555252075,
|
|
"step": 309,
|
|
"valid_targets_mean": 14474.2,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 1.3198294243070363,
|
|
"grad_norm": 0.43057377852174317,
|
|
"learning_rate": 3.6862947606963364e-05,
|
|
"loss": 1.0966875553131104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549406886100769,
|
|
"step": 310,
|
|
"valid_targets_mean": 14506.5,
|
|
"valid_targets_min": 2082
|
|
},
|
|
{
|
|
"epoch": 1.32409381663113,
|
|
"grad_norm": 0.3819166377195564,
|
|
"learning_rate": 3.6830911403291885e-05,
|
|
"loss": 1.0385587215423584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25084415078163147,
|
|
"step": 311,
|
|
"valid_targets_mean": 13291.1,
|
|
"valid_targets_min": 3734
|
|
},
|
|
{
|
|
"epoch": 1.328358208955224,
|
|
"grad_norm": 0.42538977991226934,
|
|
"learning_rate": 3.679872651806869e-05,
|
|
"loss": 1.0621037483215332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2692246437072754,
|
|
"step": 312,
|
|
"valid_targets_mean": 14425.4,
|
|
"valid_targets_min": 3203
|
|
},
|
|
{
|
|
"epoch": 1.3326226012793176,
|
|
"grad_norm": 0.41621894746971577,
|
|
"learning_rate": 3.676639323560986e-05,
|
|
"loss": 1.009427785873413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25388848781585693,
|
|
"step": 313,
|
|
"valid_targets_mean": 13922.9,
|
|
"valid_targets_min": 2388
|
|
},
|
|
{
|
|
"epoch": 1.3368869936034116,
|
|
"grad_norm": 0.39346700447344435,
|
|
"learning_rate": 3.6733911841542365e-05,
|
|
"loss": 1.0218225717544556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23701603710651398,
|
|
"step": 314,
|
|
"valid_targets_mean": 14275.7,
|
|
"valid_targets_min": 4836
|
|
},
|
|
{
|
|
"epoch": 1.3411513859275053,
|
|
"grad_norm": 0.4104522564156007,
|
|
"learning_rate": 3.6701282622801626e-05,
|
|
"loss": 1.0223846435546875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2658075988292694,
|
|
"step": 315,
|
|
"valid_targets_mean": 14691.5,
|
|
"valid_targets_min": 6289
|
|
},
|
|
{
|
|
"epoch": 1.3454157782515992,
|
|
"grad_norm": 0.4093098269327929,
|
|
"learning_rate": 3.666850586762886e-05,
|
|
"loss": 1.0296084880828857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23127643764019012,
|
|
"step": 316,
|
|
"valid_targets_mean": 14788.2,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 1.349680170575693,
|
|
"grad_norm": 0.3878985502995187,
|
|
"learning_rate": 3.663558186556863e-05,
|
|
"loss": 1.0241369009017944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2683104872703552,
|
|
"step": 317,
|
|
"valid_targets_mean": 14430.4,
|
|
"valid_targets_min": 4225
|
|
},
|
|
{
|
|
"epoch": 1.3539445628997868,
|
|
"grad_norm": 0.39633944727125686,
|
|
"learning_rate": 3.660251090746627e-05,
|
|
"loss": 0.994929313659668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2697906494140625,
|
|
"step": 318,
|
|
"valid_targets_mean": 14902.6,
|
|
"valid_targets_min": 3428
|
|
},
|
|
{
|
|
"epoch": 1.3582089552238805,
|
|
"grad_norm": 0.456358408659248,
|
|
"learning_rate": 3.656929328546526e-05,
|
|
"loss": 1.0745187997817993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28201112151145935,
|
|
"step": 319,
|
|
"valid_targets_mean": 15383.8,
|
|
"valid_targets_min": 7527
|
|
},
|
|
{
|
|
"epoch": 1.3624733475479744,
|
|
"grad_norm": 0.4377139842281418,
|
|
"learning_rate": 3.653592929300471e-05,
|
|
"loss": 1.0499223470687866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767045199871063,
|
|
"step": 320,
|
|
"valid_targets_mean": 15038.8,
|
|
"valid_targets_min": 3785
|
|
},
|
|
{
|
|
"epoch": 1.3667377398720681,
|
|
"grad_norm": 0.45317897056840667,
|
|
"learning_rate": 3.650241922481675e-05,
|
|
"loss": 1.0534915924072266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23889902234077454,
|
|
"step": 321,
|
|
"valid_targets_mean": 14543.2,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 1.371002132196162,
|
|
"grad_norm": 0.4895128937405734,
|
|
"learning_rate": 3.6468763376923886e-05,
|
|
"loss": 1.0507144927978516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28826040029525757,
|
|
"step": 322,
|
|
"valid_targets_mean": 14838.8,
|
|
"valid_targets_min": 9079
|
|
},
|
|
{
|
|
"epoch": 1.375266524520256,
|
|
"grad_norm": 0.45683872663301633,
|
|
"learning_rate": 3.6434962046636464e-05,
|
|
"loss": 1.0308797359466553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530701458454132,
|
|
"step": 323,
|
|
"valid_targets_mean": 14099.2,
|
|
"valid_targets_min": 5779
|
|
},
|
|
{
|
|
"epoch": 1.3795309168443497,
|
|
"grad_norm": 0.46543377414255654,
|
|
"learning_rate": 3.6401015532549957e-05,
|
|
"loss": 0.9938924312591553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24966439604759216,
|
|
"step": 324,
|
|
"valid_targets_mean": 14335.0,
|
|
"valid_targets_min": 3001
|
|
},
|
|
{
|
|
"epoch": 1.3837953091684434,
|
|
"grad_norm": 0.5073276640006866,
|
|
"learning_rate": 3.6366924134542386e-05,
|
|
"loss": 1.103708028793335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673909664154053,
|
|
"step": 325,
|
|
"valid_targets_mean": 14326.0,
|
|
"valid_targets_min": 3428
|
|
},
|
|
{
|
|
"epoch": 1.3880597014925373,
|
|
"grad_norm": 0.41999997348959983,
|
|
"learning_rate": 3.633268815377166e-05,
|
|
"loss": 1.0235549211502075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27672672271728516,
|
|
"step": 326,
|
|
"valid_targets_mean": 15623.8,
|
|
"valid_targets_min": 12359
|
|
},
|
|
{
|
|
"epoch": 1.3923240938166312,
|
|
"grad_norm": 0.4283930617686111,
|
|
"learning_rate": 3.6298307892672895e-05,
|
|
"loss": 1.060333013534546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24250705540180206,
|
|
"step": 327,
|
|
"valid_targets_mean": 13869.2,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 1.396588486140725,
|
|
"grad_norm": 0.44698391203089444,
|
|
"learning_rate": 3.626378365495577e-05,
|
|
"loss": 1.00943124294281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25098490715026855,
|
|
"step": 328,
|
|
"valid_targets_mean": 14535.7,
|
|
"valid_targets_min": 3330
|
|
},
|
|
{
|
|
"epoch": 1.4008528784648187,
|
|
"grad_norm": 0.44825256007015724,
|
|
"learning_rate": 3.622911574560181e-05,
|
|
"loss": 1.0213543176651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28993093967437744,
|
|
"step": 329,
|
|
"valid_targets_mean": 15023.1,
|
|
"valid_targets_min": 2399
|
|
},
|
|
{
|
|
"epoch": 1.4051172707889126,
|
|
"grad_norm": 0.42476912523845756,
|
|
"learning_rate": 3.6194304470861744e-05,
|
|
"loss": 1.0365045070648193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24340680241584778,
|
|
"step": 330,
|
|
"valid_targets_mean": 14159.5,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 1.4093816631130065,
|
|
"grad_norm": 0.43958000947111536,
|
|
"learning_rate": 3.615935013825272e-05,
|
|
"loss": 1.033185362815857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23615875840187073,
|
|
"step": 331,
|
|
"valid_targets_mean": 13884.3,
|
|
"valid_targets_min": 2210
|
|
},
|
|
{
|
|
"epoch": 1.4136460554371002,
|
|
"grad_norm": 0.4204170236418371,
|
|
"learning_rate": 3.612425305655569e-05,
|
|
"loss": 1.0323336124420166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2208036184310913,
|
|
"step": 332,
|
|
"valid_targets_mean": 13598.4,
|
|
"valid_targets_min": 4280
|
|
},
|
|
{
|
|
"epoch": 1.417910447761194,
|
|
"grad_norm": 0.4389873476287673,
|
|
"learning_rate": 3.6089013535812593e-05,
|
|
"loss": 1.0168285369873047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26316508650779724,
|
|
"step": 333,
|
|
"valid_targets_mean": 14173.4,
|
|
"valid_targets_min": 2833
|
|
},
|
|
{
|
|
"epoch": 1.4221748400852878,
|
|
"grad_norm": 0.4556140429089692,
|
|
"learning_rate": 3.6053631887323656e-05,
|
|
"loss": 1.0434997081756592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24023643136024475,
|
|
"step": 334,
|
|
"valid_targets_mean": 13169.0,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 1.4264392324093818,
|
|
"grad_norm": 0.4431371350441099,
|
|
"learning_rate": 3.601810842364465e-05,
|
|
"loss": 1.0581681728363037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675790786743164,
|
|
"step": 335,
|
|
"valid_targets_mean": 14832.1,
|
|
"valid_targets_min": 2603
|
|
},
|
|
{
|
|
"epoch": 1.4307036247334755,
|
|
"grad_norm": 0.3884558218916587,
|
|
"learning_rate": 3.598244345858412e-05,
|
|
"loss": 1.0448601245880127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2664775848388672,
|
|
"step": 336,
|
|
"valid_targets_mean": 14194.0,
|
|
"valid_targets_min": 3392
|
|
},
|
|
{
|
|
"epoch": 1.4349680170575694,
|
|
"grad_norm": 0.4119396903459026,
|
|
"learning_rate": 3.594663730720059e-05,
|
|
"loss": 1.0551373958587646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26155537366867065,
|
|
"step": 337,
|
|
"valid_targets_mean": 14611.4,
|
|
"valid_targets_min": 4873
|
|
},
|
|
{
|
|
"epoch": 1.439232409381663,
|
|
"grad_norm": 0.4628790996702182,
|
|
"learning_rate": 3.591069028579982e-05,
|
|
"loss": 1.0310451984405518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25116166472435,
|
|
"step": 338,
|
|
"valid_targets_mean": 14504.5,
|
|
"valid_targets_min": 2470
|
|
},
|
|
{
|
|
"epoch": 1.443496801705757,
|
|
"grad_norm": 0.4096616539119669,
|
|
"learning_rate": 3.5874602711931994e-05,
|
|
"loss": 1.0190770626068115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23682686686515808,
|
|
"step": 339,
|
|
"valid_targets_mean": 14049.9,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 1.4477611940298507,
|
|
"grad_norm": 0.37001190729217803,
|
|
"learning_rate": 3.5838374904388904e-05,
|
|
"loss": 1.0603420734405518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26523780822753906,
|
|
"step": 340,
|
|
"valid_targets_mean": 14519.9,
|
|
"valid_targets_min": 2559
|
|
},
|
|
{
|
|
"epoch": 1.4520255863539446,
|
|
"grad_norm": 0.40225130050148994,
|
|
"learning_rate": 3.580200718320115e-05,
|
|
"loss": 1.0345293283462524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24541568756103516,
|
|
"step": 341,
|
|
"valid_targets_mean": 14551.1,
|
|
"valid_targets_min": 4057
|
|
},
|
|
{
|
|
"epoch": 1.4562899786780383,
|
|
"grad_norm": 0.46211597569923707,
|
|
"learning_rate": 3.576549986963531e-05,
|
|
"loss": 1.0719603300094604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25376763939857483,
|
|
"step": 342,
|
|
"valid_targets_mean": 14550.2,
|
|
"valid_targets_min": 2094
|
|
},
|
|
{
|
|
"epoch": 1.4605543710021323,
|
|
"grad_norm": 0.41006193894194776,
|
|
"learning_rate": 3.5728853286191075e-05,
|
|
"loss": 1.0310711860656738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23255914449691772,
|
|
"step": 343,
|
|
"valid_targets_mean": 14847.7,
|
|
"valid_targets_min": 5643
|
|
},
|
|
{
|
|
"epoch": 1.464818763326226,
|
|
"grad_norm": 0.4534968027598099,
|
|
"learning_rate": 3.5692067756598465e-05,
|
|
"loss": 1.0853101015090942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26018351316452026,
|
|
"step": 344,
|
|
"valid_targets_mean": 14242.3,
|
|
"valid_targets_min": 1644
|
|
},
|
|
{
|
|
"epoch": 1.4690831556503199,
|
|
"grad_norm": 0.5066395628513659,
|
|
"learning_rate": 3.5655143605814885e-05,
|
|
"loss": 1.0884089469909668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29741960763931274,
|
|
"step": 345,
|
|
"valid_targets_mean": 15187.0,
|
|
"valid_targets_min": 5367
|
|
},
|
|
{
|
|
"epoch": 1.4733475479744136,
|
|
"grad_norm": 0.3953808929375593,
|
|
"learning_rate": 3.561808116002232e-05,
|
|
"loss": 1.0500078201293945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634213864803314,
|
|
"step": 346,
|
|
"valid_targets_mean": 13944.7,
|
|
"valid_targets_min": 4459
|
|
},
|
|
{
|
|
"epoch": 1.4776119402985075,
|
|
"grad_norm": 0.5138412293454909,
|
|
"learning_rate": 3.5580880746624444e-05,
|
|
"loss": 1.107835292816162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863835096359253,
|
|
"step": 347,
|
|
"valid_targets_mean": 14733.8,
|
|
"valid_targets_min": 3275
|
|
},
|
|
{
|
|
"epoch": 1.4818763326226012,
|
|
"grad_norm": 0.5456450090129401,
|
|
"learning_rate": 3.5543542694243685e-05,
|
|
"loss": 1.0177596807479858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24999922513961792,
|
|
"step": 348,
|
|
"valid_targets_mean": 14831.7,
|
|
"valid_targets_min": 3693
|
|
},
|
|
{
|
|
"epoch": 1.4861407249466951,
|
|
"grad_norm": 0.4191027859420552,
|
|
"learning_rate": 3.5506067332718355e-05,
|
|
"loss": 1.044450283050537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24729132652282715,
|
|
"step": 349,
|
|
"valid_targets_mean": 14922.2,
|
|
"valid_targets_min": 6830
|
|
},
|
|
{
|
|
"epoch": 1.4904051172707888,
|
|
"grad_norm": 0.5101310827045918,
|
|
"learning_rate": 3.546845499309976e-05,
|
|
"loss": 1.034336805343628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27868491411209106,
|
|
"step": 350,
|
|
"valid_targets_mean": 14642.0,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 1.4946695095948828,
|
|
"grad_norm": 0.45641955247850674,
|
|
"learning_rate": 3.5430706007649225e-05,
|
|
"loss": 1.010349154472351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27026039361953735,
|
|
"step": 351,
|
|
"valid_targets_mean": 15455.7,
|
|
"valid_targets_min": 12148
|
|
},
|
|
{
|
|
"epoch": 1.4989339019189765,
|
|
"grad_norm": 0.4317371718285171,
|
|
"learning_rate": 3.539282070983518e-05,
|
|
"loss": 0.9864077568054199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25268083810806274,
|
|
"step": 352,
|
|
"valid_targets_mean": 14920.8,
|
|
"valid_targets_min": 7864
|
|
},
|
|
{
|
|
"epoch": 1.5031982942430704,
|
|
"grad_norm": 0.3940970557960095,
|
|
"learning_rate": 3.535479943433023e-05,
|
|
"loss": 1.0978214740753174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28414294123649597,
|
|
"step": 353,
|
|
"valid_targets_mean": 15169.3,
|
|
"valid_targets_min": 7521
|
|
},
|
|
{
|
|
"epoch": 1.5074626865671643,
|
|
"grad_norm": 0.4184867810086967,
|
|
"learning_rate": 3.5316642517008184e-05,
|
|
"loss": 1.014693260192871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25302958488464355,
|
|
"step": 354,
|
|
"valid_targets_mean": 14533.0,
|
|
"valid_targets_min": 5862
|
|
},
|
|
{
|
|
"epoch": 1.511727078891258,
|
|
"grad_norm": 0.3998972769817957,
|
|
"learning_rate": 3.5278350294941074e-05,
|
|
"loss": 1.0278730392456055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2474115788936615,
|
|
"step": 355,
|
|
"valid_targets_mean": 15259.2,
|
|
"valid_targets_min": 9655
|
|
},
|
|
{
|
|
"epoch": 1.5159914712153517,
|
|
"grad_norm": 0.40301151478092534,
|
|
"learning_rate": 3.523992310639622e-05,
|
|
"loss": 1.0605480670928955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23796460032463074,
|
|
"step": 356,
|
|
"valid_targets_mean": 14440.0,
|
|
"valid_targets_min": 3563
|
|
},
|
|
{
|
|
"epoch": 1.5202558635394456,
|
|
"grad_norm": 0.3859524038088362,
|
|
"learning_rate": 3.5201361290833165e-05,
|
|
"loss": 1.0404666662216187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695145606994629,
|
|
"step": 357,
|
|
"valid_targets_mean": 14853.6,
|
|
"valid_targets_min": 5910
|
|
},
|
|
{
|
|
"epoch": 1.5245202558635396,
|
|
"grad_norm": 0.4281303213000771,
|
|
"learning_rate": 3.516266518890079e-05,
|
|
"loss": 1.0552990436553955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28470128774642944,
|
|
"step": 358,
|
|
"valid_targets_mean": 15617.7,
|
|
"valid_targets_min": 7355
|
|
},
|
|
{
|
|
"epoch": 1.5287846481876333,
|
|
"grad_norm": 0.4081716926973645,
|
|
"learning_rate": 3.512383514243419e-05,
|
|
"loss": 0.9892454147338867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25441765785217285,
|
|
"step": 359,
|
|
"valid_targets_mean": 14814.4,
|
|
"valid_targets_min": 5326
|
|
},
|
|
{
|
|
"epoch": 1.533049040511727,
|
|
"grad_norm": 0.4199861739745087,
|
|
"learning_rate": 3.5084871494451716e-05,
|
|
"loss": 1.0410898923873901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637255787849426,
|
|
"step": 360,
|
|
"valid_targets_mean": 14399.5,
|
|
"valid_targets_min": 4828
|
|
},
|
|
{
|
|
"epoch": 1.537313432835821,
|
|
"grad_norm": 0.4824609567794354,
|
|
"learning_rate": 3.5045774589151955e-05,
|
|
"loss": 1.080854892730713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708434760570526,
|
|
"step": 361,
|
|
"valid_targets_mean": 14051.8,
|
|
"valid_targets_min": 5008
|
|
},
|
|
{
|
|
"epoch": 1.5415778251599148,
|
|
"grad_norm": 0.4393530556935025,
|
|
"learning_rate": 3.500654477191064e-05,
|
|
"loss": 1.0591765642166138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25505468249320984,
|
|
"step": 362,
|
|
"valid_targets_mean": 15092.4,
|
|
"valid_targets_min": 8853
|
|
},
|
|
{
|
|
"epoch": 1.5458422174840085,
|
|
"grad_norm": 0.4496939073542294,
|
|
"learning_rate": 3.496718238927764e-05,
|
|
"loss": 1.0314967632293701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28049999475479126,
|
|
"step": 363,
|
|
"valid_targets_mean": 14726.6,
|
|
"valid_targets_min": 4844
|
|
},
|
|
{
|
|
"epoch": 1.5501066098081022,
|
|
"grad_norm": 0.3767967164078189,
|
|
"learning_rate": 3.492768778897388e-05,
|
|
"loss": 1.032379150390625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25079959630966187,
|
|
"step": 364,
|
|
"valid_targets_mean": 14377.2,
|
|
"valid_targets_min": 4826
|
|
},
|
|
{
|
|
"epoch": 1.5543710021321961,
|
|
"grad_norm": 0.41093299325839383,
|
|
"learning_rate": 3.4888061319888276e-05,
|
|
"loss": 1.0461351871490479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677087187767029,
|
|
"step": 365,
|
|
"valid_targets_mean": 14739.1,
|
|
"valid_targets_min": 5957
|
|
},
|
|
{
|
|
"epoch": 1.55863539445629,
|
|
"grad_norm": 0.4718148390197053,
|
|
"learning_rate": 3.484830333207466e-05,
|
|
"loss": 1.0037732124328613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2366221845149994,
|
|
"step": 366,
|
|
"valid_targets_mean": 14270.9,
|
|
"valid_targets_min": 1848
|
|
},
|
|
{
|
|
"epoch": 1.5628997867803838,
|
|
"grad_norm": 0.3683309665447676,
|
|
"learning_rate": 3.4808414176748666e-05,
|
|
"loss": 1.0115573406219482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25723186135292053,
|
|
"step": 367,
|
|
"valid_targets_mean": 14459.8,
|
|
"valid_targets_min": 3202
|
|
},
|
|
{
|
|
"epoch": 1.5671641791044775,
|
|
"grad_norm": 0.4668561871728462,
|
|
"learning_rate": 3.476839420628466e-05,
|
|
"loss": 1.0734195709228516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682706117630005,
|
|
"step": 368,
|
|
"valid_targets_mean": 15118.6,
|
|
"valid_targets_min": 4725
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 0.4046671633019538,
|
|
"learning_rate": 3.472824377421257e-05,
|
|
"loss": 1.048018217086792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25999540090560913,
|
|
"step": 369,
|
|
"valid_targets_mean": 14105.8,
|
|
"valid_targets_min": 2477
|
|
},
|
|
{
|
|
"epoch": 1.5756929637526653,
|
|
"grad_norm": 0.39652203500910616,
|
|
"learning_rate": 3.4687963235214845e-05,
|
|
"loss": 1.048305630683899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27277982234954834,
|
|
"step": 370,
|
|
"valid_targets_mean": 14638.0,
|
|
"valid_targets_min": 5709
|
|
},
|
|
{
|
|
"epoch": 1.579957356076759,
|
|
"grad_norm": 0.36720607936715915,
|
|
"learning_rate": 3.464755294512325e-05,
|
|
"loss": 1.0167198181152344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27033379673957825,
|
|
"step": 371,
|
|
"valid_targets_mean": 14126.0,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 1.5842217484008527,
|
|
"grad_norm": 0.38456810792937096,
|
|
"learning_rate": 3.4607013260915765e-05,
|
|
"loss": 1.0565170049667358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720809578895569,
|
|
"step": 372,
|
|
"valid_targets_mean": 15196.1,
|
|
"valid_targets_min": 3138
|
|
},
|
|
{
|
|
"epoch": 1.5884861407249466,
|
|
"grad_norm": 0.3637216404609028,
|
|
"learning_rate": 3.4566344540713404e-05,
|
|
"loss": 0.9865789413452148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26180344820022583,
|
|
"step": 373,
|
|
"valid_targets_mean": 15523.9,
|
|
"valid_targets_min": 3813
|
|
},
|
|
{
|
|
"epoch": 1.5927505330490406,
|
|
"grad_norm": 0.35889766290625613,
|
|
"learning_rate": 3.452554714377706e-05,
|
|
"loss": 1.0232092142105103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.277890145778656,
|
|
"step": 374,
|
|
"valid_targets_mean": 15356.0,
|
|
"valid_targets_min": 7797
|
|
},
|
|
{
|
|
"epoch": 1.5970149253731343,
|
|
"grad_norm": 0.3739375213114592,
|
|
"learning_rate": 3.448462143050436e-05,
|
|
"loss": 1.0132498741149902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24722163379192352,
|
|
"step": 375,
|
|
"valid_targets_mean": 14254.1,
|
|
"valid_targets_min": 4122
|
|
},
|
|
{
|
|
"epoch": 1.6012793176972282,
|
|
"grad_norm": 0.3818455869331284,
|
|
"learning_rate": 3.4443567762426444e-05,
|
|
"loss": 1.0428814888000488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24534392356872559,
|
|
"step": 376,
|
|
"valid_targets_mean": 14382.5,
|
|
"valid_targets_min": 1979
|
|
},
|
|
{
|
|
"epoch": 1.6055437100213221,
|
|
"grad_norm": 0.34495022809821796,
|
|
"learning_rate": 3.440238650220477e-05,
|
|
"loss": 1.042824387550354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24345073103904724,
|
|
"step": 377,
|
|
"valid_targets_mean": 14360.2,
|
|
"valid_targets_min": 2659
|
|
},
|
|
{
|
|
"epoch": 1.6098081023454158,
|
|
"grad_norm": 0.40488304701018185,
|
|
"learning_rate": 3.4361078013627945e-05,
|
|
"loss": 1.0216984748840332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2486225813627243,
|
|
"step": 378,
|
|
"valid_targets_mean": 14909.9,
|
|
"valid_targets_min": 5877
|
|
},
|
|
{
|
|
"epoch": 1.6140724946695095,
|
|
"grad_norm": 0.38958667063252717,
|
|
"learning_rate": 3.4319642661608474e-05,
|
|
"loss": 1.041244387626648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563459575176239,
|
|
"step": 379,
|
|
"valid_targets_mean": 14180.1,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 1.6183368869936035,
|
|
"grad_norm": 0.3879718703987766,
|
|
"learning_rate": 3.427808081217957e-05,
|
|
"loss": 1.0834057331085205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27810734510421753,
|
|
"step": 380,
|
|
"valid_targets_mean": 14983.0,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 1.6226012793176974,
|
|
"grad_norm": 0.3678724820098558,
|
|
"learning_rate": 3.423639283249189e-05,
|
|
"loss": 0.9997847676277161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26151639223098755,
|
|
"step": 381,
|
|
"valid_targets_mean": 14317.3,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 1.626865671641791,
|
|
"grad_norm": 0.37550172602496895,
|
|
"learning_rate": 3.419457909081032e-05,
|
|
"loss": 1.012846827507019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638561427593231,
|
|
"step": 382,
|
|
"valid_targets_mean": 14404.6,
|
|
"valid_targets_min": 2852
|
|
},
|
|
{
|
|
"epoch": 1.6311300639658848,
|
|
"grad_norm": 0.35458756530617497,
|
|
"learning_rate": 3.415263995651069e-05,
|
|
"loss": 1.0155653953552246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24735401570796967,
|
|
"step": 383,
|
|
"valid_targets_mean": 14532.0,
|
|
"valid_targets_min": 3217
|
|
},
|
|
{
|
|
"epoch": 1.6353944562899787,
|
|
"grad_norm": 0.371044569830832,
|
|
"learning_rate": 3.411057580007653e-05,
|
|
"loss": 1.0519380569458008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.267182320356369,
|
|
"step": 384,
|
|
"valid_targets_mean": 14626.3,
|
|
"valid_targets_min": 3330
|
|
},
|
|
{
|
|
"epoch": 1.6396588486140726,
|
|
"grad_norm": 0.3920256427266323,
|
|
"learning_rate": 3.4068386993095806e-05,
|
|
"loss": 1.0342265367507935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27065855264663696,
|
|
"step": 385,
|
|
"valid_targets_mean": 14689.8,
|
|
"valid_targets_min": 4055
|
|
},
|
|
{
|
|
"epoch": 1.6439232409381663,
|
|
"grad_norm": 0.38140062987311946,
|
|
"learning_rate": 3.402607390825762e-05,
|
|
"loss": 1.0449402332305908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26704907417297363,
|
|
"step": 386,
|
|
"valid_targets_mean": 14720.9,
|
|
"valid_targets_min": 5646
|
|
},
|
|
{
|
|
"epoch": 1.64818763326226,
|
|
"grad_norm": 0.325240540507986,
|
|
"learning_rate": 3.398363691934894e-05,
|
|
"loss": 1.0454330444335938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23542875051498413,
|
|
"step": 387,
|
|
"valid_targets_mean": 13642.0,
|
|
"valid_targets_min": 2066
|
|
},
|
|
{
|
|
"epoch": 1.652452025586354,
|
|
"grad_norm": 0.3626863683121964,
|
|
"learning_rate": 3.3941076401251244e-05,
|
|
"loss": 1.0002706050872803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23347462713718414,
|
|
"step": 388,
|
|
"valid_targets_mean": 14856.9,
|
|
"valid_targets_min": 2551
|
|
},
|
|
{
|
|
"epoch": 1.6567164179104479,
|
|
"grad_norm": 0.37236846545638524,
|
|
"learning_rate": 3.3898392729937295e-05,
|
|
"loss": 1.023524522781372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2577016353607178,
|
|
"step": 389,
|
|
"valid_targets_mean": 14729.4,
|
|
"valid_targets_min": 5174
|
|
},
|
|
{
|
|
"epoch": 1.6609808102345416,
|
|
"grad_norm": 0.3380396992436281,
|
|
"learning_rate": 3.385558628246774e-05,
|
|
"loss": 1.0359125137329102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26393139362335205,
|
|
"step": 390,
|
|
"valid_targets_mean": 15467.5,
|
|
"valid_targets_min": 2531
|
|
},
|
|
{
|
|
"epoch": 1.6652452025586353,
|
|
"grad_norm": 0.419548563472602,
|
|
"learning_rate": 3.381265743698781e-05,
|
|
"loss": 1.0573993921279907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2728646695613861,
|
|
"step": 391,
|
|
"valid_targets_mean": 14620.8,
|
|
"valid_targets_min": 5987
|
|
},
|
|
{
|
|
"epoch": 1.6695095948827292,
|
|
"grad_norm": 0.38082962717153934,
|
|
"learning_rate": 3.3769606572724e-05,
|
|
"loss": 1.0520474910736084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29153943061828613,
|
|
"step": 392,
|
|
"valid_targets_mean": 14701.7,
|
|
"valid_targets_min": 5351
|
|
},
|
|
{
|
|
"epoch": 1.6737739872068231,
|
|
"grad_norm": 0.35635117823907536,
|
|
"learning_rate": 3.3726434069980686e-05,
|
|
"loss": 1.032330870628357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26902976632118225,
|
|
"step": 393,
|
|
"valid_targets_mean": 14448.6,
|
|
"valid_targets_min": 7360
|
|
},
|
|
{
|
|
"epoch": 1.6780383795309168,
|
|
"grad_norm": 0.3155471753283903,
|
|
"learning_rate": 3.368314031013678e-05,
|
|
"loss": 1.020857810974121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23675858974456787,
|
|
"step": 394,
|
|
"valid_targets_mean": 14592.0,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 1.6823027718550105,
|
|
"grad_norm": 0.3491475504248631,
|
|
"learning_rate": 3.363972567564236e-05,
|
|
"loss": 0.9919549226760864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549368143081665,
|
|
"step": 395,
|
|
"valid_targets_mean": 15144.4,
|
|
"valid_targets_min": 3992
|
|
},
|
|
{
|
|
"epoch": 1.6865671641791045,
|
|
"grad_norm": 0.3500452748812154,
|
|
"learning_rate": 3.35961905500153e-05,
|
|
"loss": 1.008888840675354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24310478568077087,
|
|
"step": 396,
|
|
"valid_targets_mean": 13942.8,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 1.6908315565031984,
|
|
"grad_norm": 0.31629611985482453,
|
|
"learning_rate": 3.3552535317837855e-05,
|
|
"loss": 0.9874848127365112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575531601905823,
|
|
"step": 397,
|
|
"valid_targets_mean": 14985.8,
|
|
"valid_targets_min": 7044
|
|
},
|
|
{
|
|
"epoch": 1.695095948827292,
|
|
"grad_norm": 0.3522325586451902,
|
|
"learning_rate": 3.35087603647533e-05,
|
|
"loss": 1.0086314678192139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608971893787384,
|
|
"step": 398,
|
|
"valid_targets_mean": 14448.2,
|
|
"valid_targets_min": 4964
|
|
},
|
|
{
|
|
"epoch": 1.6993603411513858,
|
|
"grad_norm": 0.31301861452612695,
|
|
"learning_rate": 3.346486607746249e-05,
|
|
"loss": 1.0424044132232666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2408139407634735,
|
|
"step": 399,
|
|
"valid_targets_mean": 14033.4,
|
|
"valid_targets_min": 2426
|
|
},
|
|
{
|
|
"epoch": 1.7036247334754797,
|
|
"grad_norm": 0.3544159865720153,
|
|
"learning_rate": 3.342085284372047e-05,
|
|
"loss": 1.019155740737915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26168614625930786,
|
|
"step": 400,
|
|
"valid_targets_mean": 14583.3,
|
|
"valid_targets_min": 7331
|
|
},
|
|
{
|
|
"epoch": 1.7078891257995736,
|
|
"grad_norm": 0.35830992575233017,
|
|
"learning_rate": 3.337672105233303e-05,
|
|
"loss": 1.0167568922042847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275235116481781,
|
|
"step": 401,
|
|
"valid_targets_mean": 14993.9,
|
|
"valid_targets_min": 7835
|
|
},
|
|
{
|
|
"epoch": 1.7121535181236673,
|
|
"grad_norm": 0.35902792428341734,
|
|
"learning_rate": 3.3332471093153296e-05,
|
|
"loss": 1.0180636644363403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24872978031635284,
|
|
"step": 402,
|
|
"valid_targets_mean": 13492.8,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 1.716417910447761,
|
|
"grad_norm": 0.36051092598711465,
|
|
"learning_rate": 3.3288103357078244e-05,
|
|
"loss": 1.0330071449279785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677076458930969,
|
|
"step": 403,
|
|
"valid_targets_mean": 14617.1,
|
|
"valid_targets_min": 2467
|
|
},
|
|
{
|
|
"epoch": 1.720682302771855,
|
|
"grad_norm": 0.3434771928982728,
|
|
"learning_rate": 3.324361823604529e-05,
|
|
"loss": 0.9663311243057251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2340817153453827,
|
|
"step": 404,
|
|
"valid_targets_mean": 14467.1,
|
|
"valid_targets_min": 5017
|
|
},
|
|
{
|
|
"epoch": 1.724946695095949,
|
|
"grad_norm": 0.367031298129254,
|
|
"learning_rate": 3.319901612302881e-05,
|
|
"loss": 1.0393340587615967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2640244960784912,
|
|
"step": 405,
|
|
"valid_targets_mean": 14536.2,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 1.7292110874200426,
|
|
"grad_norm": 0.32160124688862785,
|
|
"learning_rate": 3.315429741203666e-05,
|
|
"loss": 1.0534725189208984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27018362283706665,
|
|
"step": 406,
|
|
"valid_targets_mean": 14123.1,
|
|
"valid_targets_min": 4839
|
|
},
|
|
{
|
|
"epoch": 1.7334754797441365,
|
|
"grad_norm": 0.3975915078305391,
|
|
"learning_rate": 3.3109462498106705e-05,
|
|
"loss": 1.0199532508850098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2474980354309082,
|
|
"step": 407,
|
|
"valid_targets_mean": 14131.3,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 1.7377398720682304,
|
|
"grad_norm": 0.404549535847172,
|
|
"learning_rate": 3.306451177730333e-05,
|
|
"loss": 1.0455772876739502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24461336433887482,
|
|
"step": 408,
|
|
"valid_targets_mean": 13883.8,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 1.7420042643923241,
|
|
"grad_norm": 0.4204078706751115,
|
|
"learning_rate": 3.301944564671394e-05,
|
|
"loss": 1.0095460414886475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24603058397769928,
|
|
"step": 409,
|
|
"valid_targets_mean": 14259.0,
|
|
"valid_targets_min": 6646
|
|
},
|
|
{
|
|
"epoch": 1.7462686567164178,
|
|
"grad_norm": 0.4103230098581747,
|
|
"learning_rate": 3.297426450444546e-05,
|
|
"loss": 1.0763204097747803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25960424542427063,
|
|
"step": 410,
|
|
"valid_targets_mean": 14020.4,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 1.7505330490405118,
|
|
"grad_norm": 0.37328009003092316,
|
|
"learning_rate": 3.292896874962078e-05,
|
|
"loss": 1.0004827976226807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24652372300624847,
|
|
"step": 411,
|
|
"valid_targets_mean": 15418.4,
|
|
"valid_targets_min": 2382
|
|
},
|
|
{
|
|
"epoch": 1.7547974413646057,
|
|
"grad_norm": 0.42837498755659514,
|
|
"learning_rate": 3.2883558782375294e-05,
|
|
"loss": 1.060294270515442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28447169065475464,
|
|
"step": 412,
|
|
"valid_targets_mean": 15048.8,
|
|
"valid_targets_min": 4661
|
|
},
|
|
{
|
|
"epoch": 1.7590618336886994,
|
|
"grad_norm": 0.3847348504680295,
|
|
"learning_rate": 3.283803500385332e-05,
|
|
"loss": 1.043828010559082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.256874144077301,
|
|
"step": 413,
|
|
"valid_targets_mean": 14366.2,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 1.763326226012793,
|
|
"grad_norm": 0.41750303322446036,
|
|
"learning_rate": 3.2792397816204546e-05,
|
|
"loss": 1.03984534740448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27797019481658936,
|
|
"step": 414,
|
|
"valid_targets_mean": 14281.8,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 1.767590618336887,
|
|
"grad_norm": 0.3658966470677723,
|
|
"learning_rate": 3.2746647622580524e-05,
|
|
"loss": 0.9981644153594971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24130582809448242,
|
|
"step": 415,
|
|
"valid_targets_mean": 14948.2,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 1.771855010660981,
|
|
"grad_norm": 0.4012962747445172,
|
|
"learning_rate": 3.270078482713106e-05,
|
|
"loss": 1.0081028938293457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592736482620239,
|
|
"step": 416,
|
|
"valid_targets_mean": 14538.5,
|
|
"valid_targets_min": 6985
|
|
},
|
|
{
|
|
"epoch": 1.7761194029850746,
|
|
"grad_norm": 0.3895351711980436,
|
|
"learning_rate": 3.265480983500069e-05,
|
|
"loss": 1.0340535640716553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25202107429504395,
|
|
"step": 417,
|
|
"valid_targets_mean": 13763.7,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 1.7803837953091683,
|
|
"grad_norm": 0.3767664187708881,
|
|
"learning_rate": 3.260872305232507e-05,
|
|
"loss": 1.0008809566497803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24762430787086487,
|
|
"step": 418,
|
|
"valid_targets_mean": 14314.2,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 1.7846481876332623,
|
|
"grad_norm": 0.3542086724966207,
|
|
"learning_rate": 3.256252488622738e-05,
|
|
"loss": 1.0124982595443726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24885782599449158,
|
|
"step": 419,
|
|
"valid_targets_mean": 15054.8,
|
|
"valid_targets_min": 10156
|
|
},
|
|
{
|
|
"epoch": 1.7889125799573562,
|
|
"grad_norm": 0.3651565881315884,
|
|
"learning_rate": 3.251621574481475e-05,
|
|
"loss": 1.06607985496521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2821624279022217,
|
|
"step": 420,
|
|
"valid_targets_mean": 14622.4,
|
|
"valid_targets_min": 1382
|
|
},
|
|
{
|
|
"epoch": 1.79317697228145,
|
|
"grad_norm": 0.38151203172544684,
|
|
"learning_rate": 3.246979603717467e-05,
|
|
"loss": 0.9925919771194458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2352856695652008,
|
|
"step": 421,
|
|
"valid_targets_mean": 15146.0,
|
|
"valid_targets_min": 5987
|
|
},
|
|
{
|
|
"epoch": 1.7974413646055436,
|
|
"grad_norm": 0.40169767615850255,
|
|
"learning_rate": 3.242326617337133e-05,
|
|
"loss": 1.019084095954895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23864883184432983,
|
|
"step": 422,
|
|
"valid_targets_mean": 13824.1,
|
|
"valid_targets_min": 2070
|
|
},
|
|
{
|
|
"epoch": 1.8017057569296375,
|
|
"grad_norm": 0.37718034825937,
|
|
"learning_rate": 3.2376626564442016e-05,
|
|
"loss": 1.0439693927764893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2585396468639374,
|
|
"step": 423,
|
|
"valid_targets_mean": 15041.2,
|
|
"valid_targets_min": 7090
|
|
},
|
|
{
|
|
"epoch": 1.8059701492537314,
|
|
"grad_norm": 0.39849923877777926,
|
|
"learning_rate": 3.2329877622393515e-05,
|
|
"loss": 1.0550795793533325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689545452594757,
|
|
"step": 424,
|
|
"valid_targets_mean": 14683.2,
|
|
"valid_targets_min": 7228
|
|
},
|
|
{
|
|
"epoch": 1.8102345415778252,
|
|
"grad_norm": 0.3789705571852078,
|
|
"learning_rate": 3.228301976019841e-05,
|
|
"loss": 0.9771716594696045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23471301794052124,
|
|
"step": 425,
|
|
"valid_targets_mean": 14624.9,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 1.8144989339019189,
|
|
"grad_norm": 0.3358199972987029,
|
|
"learning_rate": 3.22360533917915e-05,
|
|
"loss": 0.9905974268913269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24455487728118896,
|
|
"step": 426,
|
|
"valid_targets_mean": 14685.0,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 1.8187633262260128,
|
|
"grad_norm": 0.3346905952610327,
|
|
"learning_rate": 3.218897893206608e-05,
|
|
"loss": 1.0660550594329834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28042614459991455,
|
|
"step": 427,
|
|
"valid_targets_mean": 15406.8,
|
|
"valid_targets_min": 10790
|
|
},
|
|
{
|
|
"epoch": 1.8230277185501067,
|
|
"grad_norm": 0.38346051127421305,
|
|
"learning_rate": 3.2141796796870335e-05,
|
|
"loss": 0.9783576130867004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634841501712799,
|
|
"step": 428,
|
|
"valid_targets_mean": 15478.9,
|
|
"valid_targets_min": 5018
|
|
},
|
|
{
|
|
"epoch": 1.8272921108742004,
|
|
"grad_norm": 0.32279452910281287,
|
|
"learning_rate": 3.2094507403003614e-05,
|
|
"loss": 1.0031273365020752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24373270571231842,
|
|
"step": 429,
|
|
"valid_targets_mean": 14231.6,
|
|
"valid_targets_min": 5631
|
|
},
|
|
{
|
|
"epoch": 1.831556503198294,
|
|
"grad_norm": 0.3487932271401693,
|
|
"learning_rate": 3.2047111168212785e-05,
|
|
"loss": 0.9881079196929932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23340928554534912,
|
|
"step": 430,
|
|
"valid_targets_mean": 14735.6,
|
|
"valid_targets_min": 4105
|
|
},
|
|
{
|
|
"epoch": 1.835820895522388,
|
|
"grad_norm": 0.38587694343346535,
|
|
"learning_rate": 3.1999608511188524e-05,
|
|
"loss": 1.0298049449920654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23570841550827026,
|
|
"step": 431,
|
|
"valid_targets_mean": 14458.9,
|
|
"valid_targets_min": 4385
|
|
},
|
|
{
|
|
"epoch": 1.840085287846482,
|
|
"grad_norm": 0.3042002863554915,
|
|
"learning_rate": 3.1951999851561625e-05,
|
|
"loss": 1.0437463521957397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2505001127719879,
|
|
"step": 432,
|
|
"valid_targets_mean": 13971.0,
|
|
"valid_targets_min": 3231
|
|
},
|
|
{
|
|
"epoch": 1.8443496801705757,
|
|
"grad_norm": 0.3714940578880021,
|
|
"learning_rate": 3.190428560989931e-05,
|
|
"loss": 1.0183122158050537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25220775604248047,
|
|
"step": 433,
|
|
"valid_targets_mean": 14063.9,
|
|
"valid_targets_min": 4039
|
|
},
|
|
{
|
|
"epoch": 1.8486140724946694,
|
|
"grad_norm": 0.3488097430914349,
|
|
"learning_rate": 3.185646620770146e-05,
|
|
"loss": 1.0450854301452637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2619399428367615,
|
|
"step": 434,
|
|
"valid_targets_mean": 14315.4,
|
|
"valid_targets_min": 5088
|
|
},
|
|
{
|
|
"epoch": 1.8528784648187633,
|
|
"grad_norm": 0.3730159752184596,
|
|
"learning_rate": 3.180854206739696e-05,
|
|
"loss": 1.0416991710662842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26735377311706543,
|
|
"step": 435,
|
|
"valid_targets_mean": 14091.8,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 0.3922883995719241,
|
|
"learning_rate": 3.176051361233991e-05,
|
|
"loss": 1.0079805850982666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26462459564208984,
|
|
"step": 436,
|
|
"valid_targets_mean": 15415.9,
|
|
"valid_targets_min": 9952
|
|
},
|
|
{
|
|
"epoch": 1.861407249466951,
|
|
"grad_norm": 0.36324697909671044,
|
|
"learning_rate": 3.171238126680594e-05,
|
|
"loss": 1.0385366678237915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24491502344608307,
|
|
"step": 437,
|
|
"valid_targets_mean": 13899.1,
|
|
"valid_targets_min": 3160
|
|
},
|
|
{
|
|
"epoch": 1.8656716417910446,
|
|
"grad_norm": 0.3532207732723003,
|
|
"learning_rate": 3.166414545598839e-05,
|
|
"loss": 1.0549908876419067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25254905223846436,
|
|
"step": 438,
|
|
"valid_targets_mean": 14838.1,
|
|
"valid_targets_min": 6803
|
|
},
|
|
{
|
|
"epoch": 1.8699360341151388,
|
|
"grad_norm": 0.345430167807002,
|
|
"learning_rate": 3.161580660599464e-05,
|
|
"loss": 1.0517973899841309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26978904008865356,
|
|
"step": 439,
|
|
"valid_targets_mean": 15549.5,
|
|
"valid_targets_min": 9668
|
|
},
|
|
{
|
|
"epoch": 1.8742004264392325,
|
|
"grad_norm": 0.32859356953631097,
|
|
"learning_rate": 3.1567365143842264e-05,
|
|
"loss": 1.0060558319091797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25652962923049927,
|
|
"step": 440,
|
|
"valid_targets_mean": 14757.1,
|
|
"valid_targets_min": 5741
|
|
},
|
|
{
|
|
"epoch": 1.8784648187633262,
|
|
"grad_norm": 0.3611230558411743,
|
|
"learning_rate": 3.1518821497455326e-05,
|
|
"loss": 1.019770622253418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24030046164989471,
|
|
"step": 441,
|
|
"valid_targets_mean": 13502.8,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 1.88272921108742,
|
|
"grad_norm": 0.34932481184333747,
|
|
"learning_rate": 3.147017609566054e-05,
|
|
"loss": 1.0181442499160767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2621876299381256,
|
|
"step": 442,
|
|
"valid_targets_mean": 15241.9,
|
|
"valid_targets_min": 8209
|
|
},
|
|
{
|
|
"epoch": 1.886993603411514,
|
|
"grad_norm": 0.3798979014834382,
|
|
"learning_rate": 3.142142936818353e-05,
|
|
"loss": 1.0328328609466553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2619326710700989,
|
|
"step": 443,
|
|
"valid_targets_mean": 14370.1,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 1.8912579957356077,
|
|
"grad_norm": 0.31616579194746375,
|
|
"learning_rate": 3.137258174564501e-05,
|
|
"loss": 0.9993445873260498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2385060340166092,
|
|
"step": 444,
|
|
"valid_targets_mean": 14367.9,
|
|
"valid_targets_min": 2805
|
|
},
|
|
{
|
|
"epoch": 1.8955223880597014,
|
|
"grad_norm": 0.4153759008157176,
|
|
"learning_rate": 3.1323633659556986e-05,
|
|
"loss": 1.0545213222503662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26555371284484863,
|
|
"step": 445,
|
|
"valid_targets_mean": 15161.8,
|
|
"valid_targets_min": 2201
|
|
},
|
|
{
|
|
"epoch": 1.8997867803837953,
|
|
"grad_norm": 0.322498070011591,
|
|
"learning_rate": 3.127458554231894e-05,
|
|
"loss": 1.0254485607147217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22738614678382874,
|
|
"step": 446,
|
|
"valid_targets_mean": 14012.2,
|
|
"valid_targets_min": 5255
|
|
},
|
|
{
|
|
"epoch": 1.9040511727078893,
|
|
"grad_norm": 0.3686719239421431,
|
|
"learning_rate": 3.122543782721402e-05,
|
|
"loss": 1.01595139503479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25128450989723206,
|
|
"step": 447,
|
|
"valid_targets_mean": 13560.5,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 1.908315565031983,
|
|
"grad_norm": 0.3142450837420514,
|
|
"learning_rate": 3.1176190948405194e-05,
|
|
"loss": 1.069765567779541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2478228360414505,
|
|
"step": 448,
|
|
"valid_targets_mean": 14122.2,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 1.9125799573560767,
|
|
"grad_norm": 0.3259602966126672,
|
|
"learning_rate": 3.112684534093142e-05,
|
|
"loss": 1.0053722858428955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23693761229515076,
|
|
"step": 449,
|
|
"valid_targets_mean": 14771.7,
|
|
"valid_targets_min": 7360
|
|
},
|
|
{
|
|
"epoch": 1.9168443496801706,
|
|
"grad_norm": 0.3998294156093436,
|
|
"learning_rate": 3.107740144070385e-05,
|
|
"loss": 1.0424801111221313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25787419080734253,
|
|
"step": 450,
|
|
"valid_targets_mean": 14059.9,
|
|
"valid_targets_min": 2691
|
|
},
|
|
{
|
|
"epoch": 1.9211087420042645,
|
|
"grad_norm": 0.37809686416661686,
|
|
"learning_rate": 3.102785968450188e-05,
|
|
"loss": 1.0229108333587646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25710922479629517,
|
|
"step": 451,
|
|
"valid_targets_mean": 15053.9,
|
|
"valid_targets_min": 4019
|
|
},
|
|
{
|
|
"epoch": 1.9253731343283582,
|
|
"grad_norm": 0.3658869516959057,
|
|
"learning_rate": 3.09782205099694e-05,
|
|
"loss": 1.017193078994751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24409866333007812,
|
|
"step": 452,
|
|
"valid_targets_mean": 14496.3,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 1.929637526652452,
|
|
"grad_norm": 0.40770451564229465,
|
|
"learning_rate": 3.092848435561084e-05,
|
|
"loss": 1.0102355480194092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771579623222351,
|
|
"step": 453,
|
|
"valid_targets_mean": 14780.3,
|
|
"valid_targets_min": 4633
|
|
},
|
|
{
|
|
"epoch": 1.9339019189765458,
|
|
"grad_norm": 0.4249002626346894,
|
|
"learning_rate": 3.0878651660787376e-05,
|
|
"loss": 1.0602593421936035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704622447490692,
|
|
"step": 454,
|
|
"valid_targets_mean": 15116.1,
|
|
"valid_targets_min": 7740
|
|
},
|
|
{
|
|
"epoch": 1.9381663113006398,
|
|
"grad_norm": 0.41710557170487544,
|
|
"learning_rate": 3.082872286571295e-05,
|
|
"loss": 1.0430220365524292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2716583013534546,
|
|
"step": 455,
|
|
"valid_targets_mean": 15225.5,
|
|
"valid_targets_min": 2666
|
|
},
|
|
{
|
|
"epoch": 1.9424307036247335,
|
|
"grad_norm": 0.5134510384316797,
|
|
"learning_rate": 3.077869841145049e-05,
|
|
"loss": 1.0674012899398804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270738810300827,
|
|
"step": 456,
|
|
"valid_targets_mean": 14779.5,
|
|
"valid_targets_min": 6332
|
|
},
|
|
{
|
|
"epoch": 1.9466950959488272,
|
|
"grad_norm": 0.3511244172574389,
|
|
"learning_rate": 3.0728578739907934e-05,
|
|
"loss": 1.0135924816131592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25227487087249756,
|
|
"step": 457,
|
|
"valid_targets_mean": 14540.5,
|
|
"valid_targets_min": 4167
|
|
},
|
|
{
|
|
"epoch": 1.950959488272921,
|
|
"grad_norm": 0.4837760234158933,
|
|
"learning_rate": 3.067836429383437e-05,
|
|
"loss": 1.0494167804718018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25580132007598877,
|
|
"step": 458,
|
|
"valid_targets_mean": 14053.8,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 1.955223880597015,
|
|
"grad_norm": 0.40629715687940127,
|
|
"learning_rate": 3.062805551681609e-05,
|
|
"loss": 1.0064584016799927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26788318157196045,
|
|
"step": 459,
|
|
"valid_targets_mean": 15165.4,
|
|
"valid_targets_min": 8024
|
|
},
|
|
{
|
|
"epoch": 1.9594882729211087,
|
|
"grad_norm": 0.39073735497922735,
|
|
"learning_rate": 3.057765285327271e-05,
|
|
"loss": 1.0187773704528809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2431391328573227,
|
|
"step": 460,
|
|
"valid_targets_mean": 14963.2,
|
|
"valid_targets_min": 6194
|
|
},
|
|
{
|
|
"epoch": 1.9637526652452024,
|
|
"grad_norm": 0.3914267615569126,
|
|
"learning_rate": 3.0527156748453214e-05,
|
|
"loss": 1.0559675693511963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25785666704177856,
|
|
"step": 461,
|
|
"valid_targets_mean": 14774.2,
|
|
"valid_targets_min": 3990
|
|
},
|
|
{
|
|
"epoch": 1.9680170575692963,
|
|
"grad_norm": 0.3025895464918562,
|
|
"learning_rate": 3.047656764843203e-05,
|
|
"loss": 0.9742813110351562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2464253008365631,
|
|
"step": 462,
|
|
"valid_targets_mean": 15133.5,
|
|
"valid_targets_min": 4797
|
|
},
|
|
{
|
|
"epoch": 1.9722814498933903,
|
|
"grad_norm": 0.36633771205393245,
|
|
"learning_rate": 3.0425886000105094e-05,
|
|
"loss": 1.0286858081817627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24696555733680725,
|
|
"step": 463,
|
|
"valid_targets_mean": 14326.2,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 1.976545842217484,
|
|
"grad_norm": 0.3150709552268554,
|
|
"learning_rate": 3.0375112251185892e-05,
|
|
"loss": 1.0114942789077759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2749716341495514,
|
|
"step": 464,
|
|
"valid_targets_mean": 14633.4,
|
|
"valid_targets_min": 4870
|
|
},
|
|
{
|
|
"epoch": 1.9808102345415777,
|
|
"grad_norm": 0.394379772504802,
|
|
"learning_rate": 3.0324246850201527e-05,
|
|
"loss": 1.0314910411834717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604653537273407,
|
|
"step": 465,
|
|
"valid_targets_mean": 14508.2,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 1.9850746268656716,
|
|
"grad_norm": 0.3032793670356622,
|
|
"learning_rate": 3.0273290246488732e-05,
|
|
"loss": 1.0470383167266846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2582100033760071,
|
|
"step": 466,
|
|
"valid_targets_mean": 14294.9,
|
|
"valid_targets_min": 2956
|
|
},
|
|
{
|
|
"epoch": 1.9893390191897655,
|
|
"grad_norm": 0.3837673187665111,
|
|
"learning_rate": 3.0222242890189904e-05,
|
|
"loss": 1.022276759147644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25700968503952026,
|
|
"step": 467,
|
|
"valid_targets_mean": 14991.4,
|
|
"valid_targets_min": 7709
|
|
},
|
|
{
|
|
"epoch": 1.9936034115138592,
|
|
"grad_norm": 0.3278734779287702,
|
|
"learning_rate": 3.017110523224914e-05,
|
|
"loss": 0.9854072332382202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594117522239685,
|
|
"step": 468,
|
|
"valid_targets_mean": 15114.2,
|
|
"valid_targets_min": 4733
|
|
},
|
|
{
|
|
"epoch": 1.997867803837953,
|
|
"grad_norm": 0.3634319350213615,
|
|
"learning_rate": 3.011987772440825e-05,
|
|
"loss": 1.0170570611953735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552461326122284,
|
|
"step": 469,
|
|
"valid_targets_mean": 13891.4,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.413256975308945,
|
|
"learning_rate": 3.006856081920277e-05,
|
|
"loss": 1.0269068479537964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5216063857078552,
|
|
"step": 470,
|
|
"valid_targets_mean": 15169.9,
|
|
"valid_targets_min": 9901
|
|
},
|
|
{
|
|
"epoch": 2.0042643923240937,
|
|
"grad_norm": 0.4619453229739822,
|
|
"learning_rate": 3.001715496995793e-05,
|
|
"loss": 0.9987869262695312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25192755460739136,
|
|
"step": 471,
|
|
"valid_targets_mean": 14206.0,
|
|
"valid_targets_min": 6976
|
|
},
|
|
{
|
|
"epoch": 2.008528784648188,
|
|
"grad_norm": 0.5167331243841146,
|
|
"learning_rate": 2.9965660630784715e-05,
|
|
"loss": 1.017713189125061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26012545824050903,
|
|
"step": 472,
|
|
"valid_targets_mean": 15563.6,
|
|
"valid_targets_min": 10378
|
|
},
|
|
{
|
|
"epoch": 2.0127931769722816,
|
|
"grad_norm": 0.35584995260565605,
|
|
"learning_rate": 2.9914078256575782e-05,
|
|
"loss": 1.0301311016082764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502261996269226,
|
|
"step": 473,
|
|
"valid_targets_mean": 14867.3,
|
|
"valid_targets_min": 6280
|
|
},
|
|
{
|
|
"epoch": 2.0170575692963753,
|
|
"grad_norm": 0.45252543912658216,
|
|
"learning_rate": 2.9862408303001492e-05,
|
|
"loss": 1.07619047164917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920687794685364,
|
|
"step": 474,
|
|
"valid_targets_mean": 14555.3,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 2.021321961620469,
|
|
"grad_norm": 0.39046640391844367,
|
|
"learning_rate": 2.9810651226505875e-05,
|
|
"loss": 1.036402702331543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27445799112319946,
|
|
"step": 475,
|
|
"valid_targets_mean": 14317.6,
|
|
"valid_targets_min": 2299
|
|
},
|
|
{
|
|
"epoch": 2.025586353944563,
|
|
"grad_norm": 0.41901154506208516,
|
|
"learning_rate": 2.9758807484302566e-05,
|
|
"loss": 1.0324018001556396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24932430684566498,
|
|
"step": 476,
|
|
"valid_targets_mean": 13864.8,
|
|
"valid_targets_min": 2151
|
|
},
|
|
{
|
|
"epoch": 2.029850746268657,
|
|
"grad_norm": 0.4842255167391339,
|
|
"learning_rate": 2.9706877534370822e-05,
|
|
"loss": 1.0386693477630615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2522968351840973,
|
|
"step": 477,
|
|
"valid_targets_mean": 14256.4,
|
|
"valid_targets_min": 2066
|
|
},
|
|
{
|
|
"epoch": 2.0341151385927505,
|
|
"grad_norm": 0.4640468268987125,
|
|
"learning_rate": 2.965486183545142e-05,
|
|
"loss": 1.0114332437515259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.248225599527359,
|
|
"step": 478,
|
|
"valid_targets_mean": 14081.0,
|
|
"valid_targets_min": 2909
|
|
},
|
|
{
|
|
"epoch": 2.038379530916844,
|
|
"grad_norm": 0.42418965622022137,
|
|
"learning_rate": 2.9602760847042645e-05,
|
|
"loss": 0.9994747042655945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25438761711120605,
|
|
"step": 479,
|
|
"valid_targets_mean": 15119.9,
|
|
"valid_targets_min": 4874
|
|
},
|
|
{
|
|
"epoch": 2.0426439232409384,
|
|
"grad_norm": 0.4785686956964631,
|
|
"learning_rate": 2.955057502939621e-05,
|
|
"loss": 0.9988273978233337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23822565376758575,
|
|
"step": 480,
|
|
"valid_targets_mean": 13312.1,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 2.046908315565032,
|
|
"grad_norm": 0.4105882000722668,
|
|
"learning_rate": 2.9498304843513193e-05,
|
|
"loss": 1.0198397636413574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25080275535583496,
|
|
"step": 481,
|
|
"valid_targets_mean": 14688.1,
|
|
"valid_targets_min": 2511
|
|
},
|
|
{
|
|
"epoch": 2.0511727078891258,
|
|
"grad_norm": 0.5300415459250852,
|
|
"learning_rate": 2.9445950751139957e-05,
|
|
"loss": 0.9783532023429871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24423807859420776,
|
|
"step": 482,
|
|
"valid_targets_mean": 14867.3,
|
|
"valid_targets_min": 7621
|
|
},
|
|
{
|
|
"epoch": 2.0554371002132195,
|
|
"grad_norm": 0.4238362638262537,
|
|
"learning_rate": 2.939351321476412e-05,
|
|
"loss": 0.988930344581604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24535754323005676,
|
|
"step": 483,
|
|
"valid_targets_mean": 15264.8,
|
|
"valid_targets_min": 9523
|
|
},
|
|
{
|
|
"epoch": 2.0597014925373136,
|
|
"grad_norm": 0.4115894914881983,
|
|
"learning_rate": 2.9340992697610393e-05,
|
|
"loss": 0.9587956666946411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23480558395385742,
|
|
"step": 484,
|
|
"valid_targets_mean": 14411.7,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 2.0639658848614073,
|
|
"grad_norm": 0.3520050876205193,
|
|
"learning_rate": 2.9288389663636537e-05,
|
|
"loss": 0.943556010723114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2204040288925171,
|
|
"step": 485,
|
|
"valid_targets_mean": 14179.5,
|
|
"valid_targets_min": 4288
|
|
},
|
|
{
|
|
"epoch": 2.068230277185501,
|
|
"grad_norm": 0.42290922947817755,
|
|
"learning_rate": 2.923570457752925e-05,
|
|
"loss": 0.9887169599533081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22086945176124573,
|
|
"step": 486,
|
|
"valid_targets_mean": 13013.9,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 2.0724946695095947,
|
|
"grad_norm": 0.4078997322913125,
|
|
"learning_rate": 2.9182937904700078e-05,
|
|
"loss": 0.9575746655464172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23732571303844452,
|
|
"step": 487,
|
|
"valid_targets_mean": 14603.0,
|
|
"valid_targets_min": 5807
|
|
},
|
|
{
|
|
"epoch": 2.076759061833689,
|
|
"grad_norm": 0.33844116633131804,
|
|
"learning_rate": 2.9130090111281278e-05,
|
|
"loss": 1.036177158355713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2664754092693329,
|
|
"step": 488,
|
|
"valid_targets_mean": 14712.9,
|
|
"valid_targets_min": 5686
|
|
},
|
|
{
|
|
"epoch": 2.0810234541577826,
|
|
"grad_norm": 0.3501994979988469,
|
|
"learning_rate": 2.9077161664121722e-05,
|
|
"loss": 0.9981597661972046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26289716362953186,
|
|
"step": 489,
|
|
"valid_targets_mean": 14764.6,
|
|
"valid_targets_min": 8317
|
|
},
|
|
{
|
|
"epoch": 2.0852878464818763,
|
|
"grad_norm": 0.3287483356110035,
|
|
"learning_rate": 2.902415303078275e-05,
|
|
"loss": 1.001094102859497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24368131160736084,
|
|
"step": 490,
|
|
"valid_targets_mean": 14034.1,
|
|
"valid_targets_min": 2242
|
|
},
|
|
{
|
|
"epoch": 2.08955223880597,
|
|
"grad_norm": 0.31960244008645244,
|
|
"learning_rate": 2.8971064679534072e-05,
|
|
"loss": 0.9861602187156677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26052600145339966,
|
|
"step": 491,
|
|
"valid_targets_mean": 14437.6,
|
|
"valid_targets_min": 3668
|
|
},
|
|
{
|
|
"epoch": 2.093816631130064,
|
|
"grad_norm": 0.35021423547748054,
|
|
"learning_rate": 2.8917897079349604e-05,
|
|
"loss": 1.0067569017410278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23735103011131287,
|
|
"step": 492,
|
|
"valid_targets_mean": 14412.9,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 2.098081023454158,
|
|
"grad_norm": 0.349818040748087,
|
|
"learning_rate": 2.8864650699903336e-05,
|
|
"loss": 0.9881401062011719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25098878145217896,
|
|
"step": 493,
|
|
"valid_targets_mean": 14247.2,
|
|
"valid_targets_min": 6013
|
|
},
|
|
{
|
|
"epoch": 2.1023454157782515,
|
|
"grad_norm": 0.30251701981824775,
|
|
"learning_rate": 2.881132601156518e-05,
|
|
"loss": 0.9695771932601929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23273074626922607,
|
|
"step": 494,
|
|
"valid_targets_mean": 14427.5,
|
|
"valid_targets_min": 4288
|
|
},
|
|
{
|
|
"epoch": 2.106609808102345,
|
|
"grad_norm": 0.2753078687925536,
|
|
"learning_rate": 2.8757923485396805e-05,
|
|
"loss": 0.9951672554016113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23935125768184662,
|
|
"step": 495,
|
|
"valid_targets_mean": 14137.9,
|
|
"valid_targets_min": 2994
|
|
},
|
|
{
|
|
"epoch": 2.1108742004264394,
|
|
"grad_norm": 0.35974801443669885,
|
|
"learning_rate": 2.8704443593147517e-05,
|
|
"loss": 0.9793565273284912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518188953399658,
|
|
"step": 496,
|
|
"valid_targets_mean": 14064.2,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 2.115138592750533,
|
|
"grad_norm": 0.29602530674455757,
|
|
"learning_rate": 2.8650886807250024e-05,
|
|
"loss": 1.01462984085083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24598625302314758,
|
|
"step": 497,
|
|
"valid_targets_mean": 14743.6,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 2.1194029850746268,
|
|
"grad_norm": 0.3128733508884366,
|
|
"learning_rate": 2.8597253600816332e-05,
|
|
"loss": 0.9523487687110901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23442687094211578,
|
|
"step": 498,
|
|
"valid_targets_mean": 14247.1,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 2.1236673773987205,
|
|
"grad_norm": 0.3007440081893074,
|
|
"learning_rate": 2.8543544447633517e-05,
|
|
"loss": 1.0122535228729248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2612100839614868,
|
|
"step": 499,
|
|
"valid_targets_mean": 15464.2,
|
|
"valid_targets_min": 8312
|
|
},
|
|
{
|
|
"epoch": 2.1279317697228146,
|
|
"grad_norm": 0.33321054631335517,
|
|
"learning_rate": 2.8489759822159558e-05,
|
|
"loss": 1.0259451866149902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563559412956238,
|
|
"step": 500,
|
|
"valid_targets_mean": 14635.7,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 2.1321961620469083,
|
|
"grad_norm": 0.301153111906036,
|
|
"learning_rate": 2.843590019951914e-05,
|
|
"loss": 0.9697072505950928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2423160970211029,
|
|
"step": 501,
|
|
"valid_targets_mean": 15021.9,
|
|
"valid_targets_min": 5862
|
|
},
|
|
{
|
|
"epoch": 2.136460554371002,
|
|
"grad_norm": 0.3366031436844582,
|
|
"learning_rate": 2.838196605549948e-05,
|
|
"loss": 1.0216162204742432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2600550651550293,
|
|
"step": 502,
|
|
"valid_targets_mean": 14611.5,
|
|
"valid_targets_min": 3847
|
|
},
|
|
{
|
|
"epoch": 2.140724946695096,
|
|
"grad_norm": 0.37089688104026236,
|
|
"learning_rate": 2.8327957866546082e-05,
|
|
"loss": 1.0324145555496216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24159306287765503,
|
|
"step": 503,
|
|
"valid_targets_mean": 14911.7,
|
|
"valid_targets_min": 4490
|
|
},
|
|
{
|
|
"epoch": 2.14498933901919,
|
|
"grad_norm": 0.28338169557739873,
|
|
"learning_rate": 2.8273876109758568e-05,
|
|
"loss": 0.9820647239685059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24781620502471924,
|
|
"step": 504,
|
|
"valid_targets_mean": 14725.2,
|
|
"valid_targets_min": 3428
|
|
},
|
|
{
|
|
"epoch": 2.1492537313432836,
|
|
"grad_norm": 0.3791074528718515,
|
|
"learning_rate": 2.8219721262886427e-05,
|
|
"loss": 0.9940178990364075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.254575252532959,
|
|
"step": 505,
|
|
"valid_targets_mean": 14990.6,
|
|
"valid_targets_min": 6710
|
|
},
|
|
{
|
|
"epoch": 2.1535181236673773,
|
|
"grad_norm": 0.32241998632745894,
|
|
"learning_rate": 2.816549380432483e-05,
|
|
"loss": 0.9699217081069946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2379181683063507,
|
|
"step": 506,
|
|
"valid_targets_mean": 15332.5,
|
|
"valid_targets_min": 5308
|
|
},
|
|
{
|
|
"epoch": 2.1577825159914714,
|
|
"grad_norm": 0.36418783517251346,
|
|
"learning_rate": 2.8111194213110386e-05,
|
|
"loss": 0.9914301633834839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2490634024143219,
|
|
"step": 507,
|
|
"valid_targets_mean": 14937.6,
|
|
"valid_targets_min": 6979
|
|
},
|
|
{
|
|
"epoch": 2.162046908315565,
|
|
"grad_norm": 0.3481151636024591,
|
|
"learning_rate": 2.805682296891691e-05,
|
|
"loss": 1.034879207611084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2467014044523239,
|
|
"step": 508,
|
|
"valid_targets_mean": 14618.8,
|
|
"valid_targets_min": 2698
|
|
},
|
|
{
|
|
"epoch": 2.166311300639659,
|
|
"grad_norm": 0.3210905496194597,
|
|
"learning_rate": 2.8002380552051186e-05,
|
|
"loss": 1.0013360977172852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24771901965141296,
|
|
"step": 509,
|
|
"valid_targets_mean": 14498.8,
|
|
"valid_targets_min": 4776
|
|
},
|
|
{
|
|
"epoch": 2.1705756929637525,
|
|
"grad_norm": 0.366021222168054,
|
|
"learning_rate": 2.7947867443448728e-05,
|
|
"loss": 1.050952672958374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681233882904053,
|
|
"step": 510,
|
|
"valid_targets_mean": 15571.0,
|
|
"valid_targets_min": 7360
|
|
},
|
|
{
|
|
"epoch": 2.1748400852878467,
|
|
"grad_norm": 0.3069334481320911,
|
|
"learning_rate": 2.789328412466953e-05,
|
|
"loss": 0.9996070265769958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2375372052192688,
|
|
"step": 511,
|
|
"valid_targets_mean": 14751.3,
|
|
"valid_targets_min": 3243
|
|
},
|
|
{
|
|
"epoch": 2.1791044776119404,
|
|
"grad_norm": 0.35998981409036335,
|
|
"learning_rate": 2.7838631077893813e-05,
|
|
"loss": 1.026340126991272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25492531061172485,
|
|
"step": 512,
|
|
"valid_targets_mean": 14273.8,
|
|
"valid_targets_min": 2266
|
|
},
|
|
{
|
|
"epoch": 2.183368869936034,
|
|
"grad_norm": 0.3298626819432099,
|
|
"learning_rate": 2.7783908785917753e-05,
|
|
"loss": 1.010487675666809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26713818311691284,
|
|
"step": 513,
|
|
"valid_targets_mean": 14390.0,
|
|
"valid_targets_min": 4092
|
|
},
|
|
{
|
|
"epoch": 2.1876332622601278,
|
|
"grad_norm": 0.3646210342560063,
|
|
"learning_rate": 2.7729117732149244e-05,
|
|
"loss": 1.008512258529663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513628602027893,
|
|
"step": 514,
|
|
"valid_targets_mean": 14906.8,
|
|
"valid_targets_min": 2502
|
|
},
|
|
{
|
|
"epoch": 2.191897654584222,
|
|
"grad_norm": 0.36795022151544593,
|
|
"learning_rate": 2.7674258400603587e-05,
|
|
"loss": 1.0182232856750488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28085383772850037,
|
|
"step": 515,
|
|
"valid_targets_mean": 15310.5,
|
|
"valid_targets_min": 3271
|
|
},
|
|
{
|
|
"epoch": 2.1961620469083156,
|
|
"grad_norm": 0.3463845633061672,
|
|
"learning_rate": 2.761933127589927e-05,
|
|
"loss": 0.9878981709480286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540169954299927,
|
|
"step": 516,
|
|
"valid_targets_mean": 15095.4,
|
|
"valid_targets_min": 7989
|
|
},
|
|
{
|
|
"epoch": 2.2004264392324093,
|
|
"grad_norm": 0.32342978014510676,
|
|
"learning_rate": 2.7564336843253633e-05,
|
|
"loss": 1.009298324584961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23696599900722504,
|
|
"step": 517,
|
|
"valid_targets_mean": 14574.7,
|
|
"valid_targets_min": 4055
|
|
},
|
|
{
|
|
"epoch": 2.204690831556503,
|
|
"grad_norm": 0.35246191421570006,
|
|
"learning_rate": 2.7509275588478606e-05,
|
|
"loss": 0.9518311023712158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22795039415359497,
|
|
"step": 518,
|
|
"valid_targets_mean": 14171.1,
|
|
"valid_targets_min": 2066
|
|
},
|
|
{
|
|
"epoch": 2.208955223880597,
|
|
"grad_norm": 0.31379457120732324,
|
|
"learning_rate": 2.7454147997976404e-05,
|
|
"loss": 1.0131120681762695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25702476501464844,
|
|
"step": 519,
|
|
"valid_targets_mean": 14889.4,
|
|
"valid_targets_min": 7756
|
|
},
|
|
{
|
|
"epoch": 2.213219616204691,
|
|
"grad_norm": 0.3644697573362402,
|
|
"learning_rate": 2.7398954558735272e-05,
|
|
"loss": 1.0273219347000122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27117037773132324,
|
|
"step": 520,
|
|
"valid_targets_mean": 14467.1,
|
|
"valid_targets_min": 4204
|
|
},
|
|
{
|
|
"epoch": 2.2174840085287846,
|
|
"grad_norm": 0.31378876156098,
|
|
"learning_rate": 2.7343695758325125e-05,
|
|
"loss": 1.034562587738037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25118693709373474,
|
|
"step": 521,
|
|
"valid_targets_mean": 13929.1,
|
|
"valid_targets_min": 2697
|
|
},
|
|
{
|
|
"epoch": 2.2217484008528783,
|
|
"grad_norm": 0.36544381721525326,
|
|
"learning_rate": 2.7288372084893282e-05,
|
|
"loss": 0.9665502309799194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2441106140613556,
|
|
"step": 522,
|
|
"valid_targets_mean": 15248.3,
|
|
"valid_targets_min": 1517
|
|
},
|
|
{
|
|
"epoch": 2.2260127931769724,
|
|
"grad_norm": 0.32504579275074885,
|
|
"learning_rate": 2.7232984027160126e-05,
|
|
"loss": 0.990398645401001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21704241633415222,
|
|
"step": 523,
|
|
"valid_targets_mean": 14821.8,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 2.230277185501066,
|
|
"grad_norm": 0.33419036753063336,
|
|
"learning_rate": 2.7177532074414822e-05,
|
|
"loss": 1.001986026763916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518696188926697,
|
|
"step": 524,
|
|
"valid_targets_mean": 13669.2,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 2.23454157782516,
|
|
"grad_norm": 0.3246390584972087,
|
|
"learning_rate": 2.712201671651094e-05,
|
|
"loss": 1.0094494819641113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25980284810066223,
|
|
"step": 525,
|
|
"valid_targets_mean": 14686.9,
|
|
"valid_targets_min": 3001
|
|
},
|
|
{
|
|
"epoch": 2.2388059701492535,
|
|
"grad_norm": 0.38617899781151854,
|
|
"learning_rate": 2.7066438443862205e-05,
|
|
"loss": 0.9911002516746521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23355315625667572,
|
|
"step": 526,
|
|
"valid_targets_mean": 14217.2,
|
|
"valid_targets_min": 3674
|
|
},
|
|
{
|
|
"epoch": 2.2430703624733477,
|
|
"grad_norm": 0.31967579088660597,
|
|
"learning_rate": 2.701079774743808e-05,
|
|
"loss": 0.9487998485565186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23282305896282196,
|
|
"step": 527,
|
|
"valid_targets_mean": 14927.9,
|
|
"valid_targets_min": 6321
|
|
},
|
|
{
|
|
"epoch": 2.2473347547974414,
|
|
"grad_norm": 0.35481854930783874,
|
|
"learning_rate": 2.6955095118759496e-05,
|
|
"loss": 1.0225987434387207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.256896436214447,
|
|
"step": 528,
|
|
"valid_targets_mean": 14094.9,
|
|
"valid_targets_min": 3640
|
|
},
|
|
{
|
|
"epoch": 2.251599147121535,
|
|
"grad_norm": 0.2998115044858296,
|
|
"learning_rate": 2.689933104989447e-05,
|
|
"loss": 1.0039013624191284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24659347534179688,
|
|
"step": 529,
|
|
"valid_targets_mean": 14749.3,
|
|
"valid_targets_min": 2099
|
|
},
|
|
{
|
|
"epoch": 2.2558635394456292,
|
|
"grad_norm": 0.3422539188877669,
|
|
"learning_rate": 2.6843506033453777e-05,
|
|
"loss": 0.9687084555625916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2278328239917755,
|
|
"step": 530,
|
|
"valid_targets_mean": 15208.4,
|
|
"valid_targets_min": 6099
|
|
},
|
|
{
|
|
"epoch": 2.260127931769723,
|
|
"grad_norm": 0.29376736895245537,
|
|
"learning_rate": 2.6787620562586587e-05,
|
|
"loss": 0.9959279298782349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26383495330810547,
|
|
"step": 531,
|
|
"valid_targets_mean": 15470.2,
|
|
"valid_targets_min": 8826
|
|
},
|
|
{
|
|
"epoch": 2.2643923240938166,
|
|
"grad_norm": 0.331149278206732,
|
|
"learning_rate": 2.673167513097613e-05,
|
|
"loss": 0.978769838809967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24497835338115692,
|
|
"step": 532,
|
|
"valid_targets_mean": 14315.0,
|
|
"valid_targets_min": 2856
|
|
},
|
|
{
|
|
"epoch": 2.2686567164179103,
|
|
"grad_norm": 0.30721831968666274,
|
|
"learning_rate": 2.6675670232835297e-05,
|
|
"loss": 0.9460316300392151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22342230379581451,
|
|
"step": 533,
|
|
"valid_targets_mean": 13737.3,
|
|
"valid_targets_min": 1778
|
|
},
|
|
{
|
|
"epoch": 2.272921108742004,
|
|
"grad_norm": 0.32322528315486937,
|
|
"learning_rate": 2.661960636290231e-05,
|
|
"loss": 0.9993689060211182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2589959502220154,
|
|
"step": 534,
|
|
"valid_targets_mean": 14841.2,
|
|
"valid_targets_min": 3402
|
|
},
|
|
{
|
|
"epoch": 2.277185501066098,
|
|
"grad_norm": 0.3266621431968494,
|
|
"learning_rate": 2.6563484016436346e-05,
|
|
"loss": 1.0264701843261719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2553436756134033,
|
|
"step": 535,
|
|
"valid_targets_mean": 14679.2,
|
|
"valid_targets_min": 5570
|
|
},
|
|
{
|
|
"epoch": 2.281449893390192,
|
|
"grad_norm": 0.33169741508912115,
|
|
"learning_rate": 2.6507303689213143e-05,
|
|
"loss": 1.0258426666259766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3054558336734772,
|
|
"step": 536,
|
|
"valid_targets_mean": 15405.3,
|
|
"valid_targets_min": 11021
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 0.32019496475602877,
|
|
"learning_rate": 2.6451065877520634e-05,
|
|
"loss": 0.9969546794891357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2543153762817383,
|
|
"step": 537,
|
|
"valid_targets_mean": 15113.9,
|
|
"valid_targets_min": 7736
|
|
},
|
|
{
|
|
"epoch": 2.2899786780383797,
|
|
"grad_norm": 0.3414632048178366,
|
|
"learning_rate": 2.639477107815455e-05,
|
|
"loss": 0.9487636089324951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25016891956329346,
|
|
"step": 538,
|
|
"valid_targets_mean": 14803.9,
|
|
"valid_targets_min": 6681
|
|
},
|
|
{
|
|
"epoch": 2.2942430703624734,
|
|
"grad_norm": 0.34430281403856633,
|
|
"learning_rate": 2.633841978841406e-05,
|
|
"loss": 0.9727267622947693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23209461569786072,
|
|
"step": 539,
|
|
"valid_targets_mean": 14362.3,
|
|
"valid_targets_min": 7984
|
|
},
|
|
{
|
|
"epoch": 2.298507462686567,
|
|
"grad_norm": 0.32936335566423325,
|
|
"learning_rate": 2.6282012506097347e-05,
|
|
"loss": 1.0068211555480957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509459853172302,
|
|
"step": 540,
|
|
"valid_targets_mean": 14533.2,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 2.302771855010661,
|
|
"grad_norm": 0.33712612093459954,
|
|
"learning_rate": 2.622554972949724e-05,
|
|
"loss": 1.0443518161773682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2472529262304306,
|
|
"step": 541,
|
|
"valid_targets_mean": 13808.7,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 2.307036247334755,
|
|
"grad_norm": 0.35149539199610075,
|
|
"learning_rate": 2.6169031957396778e-05,
|
|
"loss": 1.0184926986694336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2640667259693146,
|
|
"step": 542,
|
|
"valid_targets_mean": 14197.4,
|
|
"valid_targets_min": 1999
|
|
},
|
|
{
|
|
"epoch": 2.3113006396588487,
|
|
"grad_norm": 0.337678553086505,
|
|
"learning_rate": 2.611245968906482e-05,
|
|
"loss": 0.9821817874908447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25984886288642883,
|
|
"step": 543,
|
|
"valid_targets_mean": 14675.0,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 2.3155650319829424,
|
|
"grad_norm": 0.3850534778421881,
|
|
"learning_rate": 2.605583342425165e-05,
|
|
"loss": 0.9992858171463013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23248092830181122,
|
|
"step": 544,
|
|
"valid_targets_mean": 13720.2,
|
|
"valid_targets_min": 1863
|
|
},
|
|
{
|
|
"epoch": 2.319829424307036,
|
|
"grad_norm": 0.35296894817904273,
|
|
"learning_rate": 2.5999153663184546e-05,
|
|
"loss": 1.0641541481018066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29142460227012634,
|
|
"step": 545,
|
|
"valid_targets_mean": 14533.0,
|
|
"valid_targets_min": 3852
|
|
},
|
|
{
|
|
"epoch": 2.3240938166311302,
|
|
"grad_norm": 0.36708025020001983,
|
|
"learning_rate": 2.594242090656335e-05,
|
|
"loss": 0.9965769052505493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27358609437942505,
|
|
"step": 546,
|
|
"valid_targets_mean": 15236.0,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 2.328358208955224,
|
|
"grad_norm": 0.3991980593444986,
|
|
"learning_rate": 2.5885635655556075e-05,
|
|
"loss": 0.9888902306556702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2735585868358612,
|
|
"step": 547,
|
|
"valid_targets_mean": 15963.0,
|
|
"valid_targets_min": 11813
|
|
},
|
|
{
|
|
"epoch": 2.3326226012793176,
|
|
"grad_norm": 0.3840739541693589,
|
|
"learning_rate": 2.5828798411794443e-05,
|
|
"loss": 1.0172688961029053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22582058608531952,
|
|
"step": 548,
|
|
"valid_targets_mean": 13778.4,
|
|
"valid_targets_min": 2715
|
|
},
|
|
{
|
|
"epoch": 2.3368869936034113,
|
|
"grad_norm": 0.34536658759940725,
|
|
"learning_rate": 2.5771909677369484e-05,
|
|
"loss": 0.9836137294769287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616347670555115,
|
|
"step": 549,
|
|
"valid_targets_mean": 15342.6,
|
|
"valid_targets_min": 9001
|
|
},
|
|
{
|
|
"epoch": 2.3411513859275055,
|
|
"grad_norm": 0.4400968625127286,
|
|
"learning_rate": 2.571496995482709e-05,
|
|
"loss": 1.0368796586990356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2504378855228424,
|
|
"step": 550,
|
|
"valid_targets_mean": 14808.0,
|
|
"valid_targets_min": 2730
|
|
},
|
|
{
|
|
"epoch": 2.345415778251599,
|
|
"grad_norm": 0.36026556994961684,
|
|
"learning_rate": 2.565797974716357e-05,
|
|
"loss": 1.0212513208389282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23313161730766296,
|
|
"step": 551,
|
|
"valid_targets_mean": 14403.5,
|
|
"valid_targets_min": 5487
|
|
},
|
|
{
|
|
"epoch": 2.349680170575693,
|
|
"grad_norm": 0.40420036599953035,
|
|
"learning_rate": 2.5600939557821205e-05,
|
|
"loss": 0.9742347002029419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24898028373718262,
|
|
"step": 552,
|
|
"valid_targets_mean": 14613.9,
|
|
"valid_targets_min": 2797
|
|
},
|
|
{
|
|
"epoch": 2.3539445628997866,
|
|
"grad_norm": 0.30665172704865445,
|
|
"learning_rate": 2.5543849890683813e-05,
|
|
"loss": 0.9749837517738342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259993314743042,
|
|
"step": 553,
|
|
"valid_targets_mean": 15261.2,
|
|
"valid_targets_min": 7622
|
|
},
|
|
{
|
|
"epoch": 2.3582089552238807,
|
|
"grad_norm": 0.3480237734194922,
|
|
"learning_rate": 2.548671125007229e-05,
|
|
"loss": 0.9855685830116272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25016504526138306,
|
|
"step": 554,
|
|
"valid_targets_mean": 13247.3,
|
|
"valid_targets_min": 2399
|
|
},
|
|
{
|
|
"epoch": 2.3624733475479744,
|
|
"grad_norm": 0.34928331758443754,
|
|
"learning_rate": 2.5429524140740155e-05,
|
|
"loss": 1.0194594860076904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2653951644897461,
|
|
"step": 555,
|
|
"valid_targets_mean": 14490.5,
|
|
"valid_targets_min": 3899
|
|
},
|
|
{
|
|
"epoch": 2.366737739872068,
|
|
"grad_norm": 0.2943086599844404,
|
|
"learning_rate": 2.537228906786908e-05,
|
|
"loss": 1.0033053159713745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2457573562860489,
|
|
"step": 556,
|
|
"valid_targets_mean": 14929.9,
|
|
"valid_targets_min": 7722
|
|
},
|
|
{
|
|
"epoch": 2.3710021321961623,
|
|
"grad_norm": 0.36446725875942176,
|
|
"learning_rate": 2.5315006537064473e-05,
|
|
"loss": 0.9970245361328125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23409779369831085,
|
|
"step": 557,
|
|
"valid_targets_mean": 14046.7,
|
|
"valid_targets_min": 4710
|
|
},
|
|
{
|
|
"epoch": 2.375266524520256,
|
|
"grad_norm": 0.3392391142630678,
|
|
"learning_rate": 2.5257677054350927e-05,
|
|
"loss": 0.9785336852073669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23554298281669617,
|
|
"step": 558,
|
|
"valid_targets_mean": 14559.5,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 2.3795309168443497,
|
|
"grad_norm": 0.3468364558136946,
|
|
"learning_rate": 2.5200301126167857e-05,
|
|
"loss": 0.9779709577560425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23112671077251434,
|
|
"step": 559,
|
|
"valid_targets_mean": 14566.1,
|
|
"valid_targets_min": 7120
|
|
},
|
|
{
|
|
"epoch": 2.3837953091684434,
|
|
"grad_norm": 0.36895865748323503,
|
|
"learning_rate": 2.514287925936492e-05,
|
|
"loss": 1.02138090133667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2661632001399994,
|
|
"step": 560,
|
|
"valid_targets_mean": 15004.2,
|
|
"valid_targets_min": 4432
|
|
},
|
|
{
|
|
"epoch": 2.388059701492537,
|
|
"grad_norm": 0.3505546777942912,
|
|
"learning_rate": 2.5085411961197626e-05,
|
|
"loss": 1.0374722480773926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24316981434822083,
|
|
"step": 561,
|
|
"valid_targets_mean": 14641.2,
|
|
"valid_targets_min": 5183
|
|
},
|
|
{
|
|
"epoch": 2.3923240938166312,
|
|
"grad_norm": 0.33399708012353235,
|
|
"learning_rate": 2.502789973932278e-05,
|
|
"loss": 1.000402569770813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25113219022750854,
|
|
"step": 562,
|
|
"valid_targets_mean": 14444.3,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 2.396588486140725,
|
|
"grad_norm": 0.32813402511679207,
|
|
"learning_rate": 2.4970343101794073e-05,
|
|
"loss": 1.0033669471740723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23444540798664093,
|
|
"step": 563,
|
|
"valid_targets_mean": 13773.7,
|
|
"valid_targets_min": 2585
|
|
},
|
|
{
|
|
"epoch": 2.4008528784648187,
|
|
"grad_norm": 0.33686647091329164,
|
|
"learning_rate": 2.4912742557057538e-05,
|
|
"loss": 0.9911777973175049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2385525405406952,
|
|
"step": 564,
|
|
"valid_targets_mean": 14328.3,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 2.405117270788913,
|
|
"grad_norm": 0.28929126841193425,
|
|
"learning_rate": 2.485509861394708e-05,
|
|
"loss": 1.0348443984985352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2621164917945862,
|
|
"step": 565,
|
|
"valid_targets_mean": 14846.8,
|
|
"valid_targets_min": 2791
|
|
},
|
|
{
|
|
"epoch": 2.4093816631130065,
|
|
"grad_norm": 0.2962657350974411,
|
|
"learning_rate": 2.4797411781679975e-05,
|
|
"loss": 0.9520940780639648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27280616760253906,
|
|
"step": 566,
|
|
"valid_targets_mean": 14597.2,
|
|
"valid_targets_min": 3095
|
|
},
|
|
{
|
|
"epoch": 2.4136460554371,
|
|
"grad_norm": 0.34091539321857006,
|
|
"learning_rate": 2.473968256985238e-05,
|
|
"loss": 1.0032382011413574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27264654636383057,
|
|
"step": 567,
|
|
"valid_targets_mean": 14863.2,
|
|
"valid_targets_min": 5634
|
|
},
|
|
{
|
|
"epoch": 2.417910447761194,
|
|
"grad_norm": 0.30547855046084316,
|
|
"learning_rate": 2.4681911488434825e-05,
|
|
"loss": 1.034733533859253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23930537700653076,
|
|
"step": 568,
|
|
"valid_targets_mean": 14235.1,
|
|
"valid_targets_min": 3758
|
|
},
|
|
{
|
|
"epoch": 2.4221748400852876,
|
|
"grad_norm": 0.33398207103009264,
|
|
"learning_rate": 2.4624099047767702e-05,
|
|
"loss": 1.0115569829940796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657352089881897,
|
|
"step": 569,
|
|
"valid_targets_mean": 15144.8,
|
|
"valid_targets_min": 6786
|
|
},
|
|
{
|
|
"epoch": 2.4264392324093818,
|
|
"grad_norm": 0.33741120813986736,
|
|
"learning_rate": 2.4566245758556787e-05,
|
|
"loss": 0.9684585928916931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23303334414958954,
|
|
"step": 570,
|
|
"valid_targets_mean": 14613.6,
|
|
"valid_targets_min": 2792
|
|
},
|
|
{
|
|
"epoch": 2.4307036247334755,
|
|
"grad_norm": 0.36889301409479364,
|
|
"learning_rate": 2.4508352131868664e-05,
|
|
"loss": 1.007417917251587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26629167795181274,
|
|
"step": 571,
|
|
"valid_targets_mean": 14827.8,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 2.434968017057569,
|
|
"grad_norm": 0.3405631534435157,
|
|
"learning_rate": 2.445041867912629e-05,
|
|
"loss": 0.9651800394058228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25025099515914917,
|
|
"step": 572,
|
|
"valid_targets_mean": 14830.1,
|
|
"valid_targets_min": 4156
|
|
},
|
|
{
|
|
"epoch": 2.4392324093816633,
|
|
"grad_norm": 0.37368194721116355,
|
|
"learning_rate": 2.439244591210443e-05,
|
|
"loss": 0.9608060717582703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2526610493659973,
|
|
"step": 573,
|
|
"valid_targets_mean": 15141.2,
|
|
"valid_targets_min": 7503
|
|
},
|
|
{
|
|
"epoch": 2.443496801705757,
|
|
"grad_norm": 0.3519589117733432,
|
|
"learning_rate": 2.4334434342925133e-05,
|
|
"loss": 0.9872702360153198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24747343361377716,
|
|
"step": 574,
|
|
"valid_targets_mean": 14546.2,
|
|
"valid_targets_min": 3910
|
|
},
|
|
{
|
|
"epoch": 2.4477611940298507,
|
|
"grad_norm": 0.3915005609343368,
|
|
"learning_rate": 2.4276384484053227e-05,
|
|
"loss": 0.973964512348175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2625986933708191,
|
|
"step": 575,
|
|
"valid_targets_mean": 15743.1,
|
|
"valid_targets_min": 5391
|
|
},
|
|
{
|
|
"epoch": 2.4520255863539444,
|
|
"grad_norm": 0.322431185837138,
|
|
"learning_rate": 2.4218296848291795e-05,
|
|
"loss": 1.0351622104644775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23967526853084564,
|
|
"step": 576,
|
|
"valid_targets_mean": 14141.6,
|
|
"valid_targets_min": 6849
|
|
},
|
|
{
|
|
"epoch": 2.4562899786780386,
|
|
"grad_norm": 0.38071734743587365,
|
|
"learning_rate": 2.4160171948777603e-05,
|
|
"loss": 0.990182638168335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26047706604003906,
|
|
"step": 577,
|
|
"valid_targets_mean": 15361.8,
|
|
"valid_targets_min": 5801
|
|
},
|
|
{
|
|
"epoch": 2.4605543710021323,
|
|
"grad_norm": 0.3373916772700385,
|
|
"learning_rate": 2.410201029897665e-05,
|
|
"loss": 1.0633823871612549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27211618423461914,
|
|
"step": 578,
|
|
"valid_targets_mean": 15740.7,
|
|
"valid_targets_min": 11262
|
|
},
|
|
{
|
|
"epoch": 2.464818763326226,
|
|
"grad_norm": 0.30142243918163714,
|
|
"learning_rate": 2.4043812412679532e-05,
|
|
"loss": 0.9817286729812622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24626925587654114,
|
|
"step": 579,
|
|
"valid_targets_mean": 14815.9,
|
|
"valid_targets_min": 6065
|
|
},
|
|
{
|
|
"epoch": 2.4690831556503197,
|
|
"grad_norm": 0.38633305520724576,
|
|
"learning_rate": 2.3985578803996985e-05,
|
|
"loss": 1.0299785137176514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24435535073280334,
|
|
"step": 580,
|
|
"valid_targets_mean": 13771.2,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 2.473347547974414,
|
|
"grad_norm": 0.27832550827920893,
|
|
"learning_rate": 2.392730998735529e-05,
|
|
"loss": 1.0362637042999268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24985581636428833,
|
|
"step": 581,
|
|
"valid_targets_mean": 15384.0,
|
|
"valid_targets_min": 3886
|
|
},
|
|
{
|
|
"epoch": 2.4776119402985075,
|
|
"grad_norm": 0.3960988980880983,
|
|
"learning_rate": 2.3869006477491755e-05,
|
|
"loss": 1.0011943578720093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2445807009935379,
|
|
"step": 582,
|
|
"valid_targets_mean": 15580.9,
|
|
"valid_targets_min": 4483
|
|
},
|
|
{
|
|
"epoch": 2.481876332622601,
|
|
"grad_norm": 0.30437402842242794,
|
|
"learning_rate": 2.381066878945017e-05,
|
|
"loss": 0.9859625101089478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25368452072143555,
|
|
"step": 583,
|
|
"valid_targets_mean": 14908.8,
|
|
"valid_targets_min": 2910
|
|
},
|
|
{
|
|
"epoch": 2.486140724946695,
|
|
"grad_norm": 0.4323644697650072,
|
|
"learning_rate": 2.3752297438576257e-05,
|
|
"loss": 0.9823155403137207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2465706169605255,
|
|
"step": 584,
|
|
"valid_targets_mean": 15053.9,
|
|
"valid_targets_min": 5225
|
|
},
|
|
{
|
|
"epoch": 2.490405117270789,
|
|
"grad_norm": 0.358452006527221,
|
|
"learning_rate": 2.3693892940513074e-05,
|
|
"loss": 1.051724910736084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2570017874240875,
|
|
"step": 585,
|
|
"valid_targets_mean": 14092.7,
|
|
"valid_targets_min": 4352
|
|
},
|
|
{
|
|
"epoch": 2.4946695095948828,
|
|
"grad_norm": 0.39022028589448476,
|
|
"learning_rate": 2.3635455811196536e-05,
|
|
"loss": 1.0154004096984863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25719571113586426,
|
|
"step": 586,
|
|
"valid_targets_mean": 14364.0,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 2.4989339019189765,
|
|
"grad_norm": 0.3490781892258707,
|
|
"learning_rate": 2.3576986566850796e-05,
|
|
"loss": 1.0354974269866943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22510012984275818,
|
|
"step": 587,
|
|
"valid_targets_mean": 13014.9,
|
|
"valid_targets_min": 3563
|
|
},
|
|
{
|
|
"epoch": 2.50319829424307,
|
|
"grad_norm": 0.41511633357102923,
|
|
"learning_rate": 2.351848572398371e-05,
|
|
"loss": 0.9651156663894653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2315250039100647,
|
|
"step": 588,
|
|
"valid_targets_mean": 13630.2,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 2.5074626865671643,
|
|
"grad_norm": 0.2987791039277017,
|
|
"learning_rate": 2.3459953799382276e-05,
|
|
"loss": 0.9886672496795654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25824683904647827,
|
|
"step": 589,
|
|
"valid_targets_mean": 14645.2,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 2.511727078891258,
|
|
"grad_norm": 0.3886434319439524,
|
|
"learning_rate": 2.3401391310108054e-05,
|
|
"loss": 1.002341866493225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27559715509414673,
|
|
"step": 590,
|
|
"valid_targets_mean": 15001.2,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 2.5159914712153517,
|
|
"grad_norm": 0.30742030402343073,
|
|
"learning_rate": 2.3342798773492602e-05,
|
|
"loss": 0.9635980129241943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24406534433364868,
|
|
"step": 591,
|
|
"valid_targets_mean": 13855.1,
|
|
"valid_targets_min": 5690
|
|
},
|
|
{
|
|
"epoch": 2.520255863539446,
|
|
"grad_norm": 0.35203229753820214,
|
|
"learning_rate": 2.328417670713294e-05,
|
|
"loss": 1.0279370546340942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23842567205429077,
|
|
"step": 592,
|
|
"valid_targets_mean": 13533.6,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 2.5245202558635396,
|
|
"grad_norm": 0.32721363622942806,
|
|
"learning_rate": 2.3225525628886918e-05,
|
|
"loss": 1.007649540901184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23621892929077148,
|
|
"step": 593,
|
|
"valid_targets_mean": 14168.2,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 2.5287846481876333,
|
|
"grad_norm": 0.3236736295525048,
|
|
"learning_rate": 2.3166846056868687e-05,
|
|
"loss": 1.058523416519165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25125598907470703,
|
|
"step": 594,
|
|
"valid_targets_mean": 14296.9,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 2.533049040511727,
|
|
"grad_norm": 0.37216898751375604,
|
|
"learning_rate": 2.31081385094441e-05,
|
|
"loss": 1.0509214401245117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27076196670532227,
|
|
"step": 595,
|
|
"valid_targets_mean": 14749.0,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 2.5373134328358207,
|
|
"grad_norm": 0.3169846488569876,
|
|
"learning_rate": 2.304940350522615e-05,
|
|
"loss": 0.9791812896728516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24198110401630402,
|
|
"step": 596,
|
|
"valid_targets_mean": 15197.2,
|
|
"valid_targets_min": 5355
|
|
},
|
|
{
|
|
"epoch": 2.541577825159915,
|
|
"grad_norm": 0.3655497542521435,
|
|
"learning_rate": 2.299064156307037e-05,
|
|
"loss": 0.9640069603919983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25089704990386963,
|
|
"step": 597,
|
|
"valid_targets_mean": 15269.8,
|
|
"valid_targets_min": 6535
|
|
},
|
|
{
|
|
"epoch": 2.5458422174840085,
|
|
"grad_norm": 0.33398322226694305,
|
|
"learning_rate": 2.2931853202070275e-05,
|
|
"loss": 1.0128238201141357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24849393963813782,
|
|
"step": 598,
|
|
"valid_targets_mean": 14430.6,
|
|
"valid_targets_min": 5331
|
|
},
|
|
{
|
|
"epoch": 2.550106609808102,
|
|
"grad_norm": 0.35036961035660963,
|
|
"learning_rate": 2.2873038941552724e-05,
|
|
"loss": 0.9940244555473328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2424945831298828,
|
|
"step": 599,
|
|
"valid_targets_mean": 14965.9,
|
|
"valid_targets_min": 5019
|
|
},
|
|
{
|
|
"epoch": 2.5543710021321964,
|
|
"grad_norm": 0.35909573928226096,
|
|
"learning_rate": 2.2814199301073412e-05,
|
|
"loss": 0.9789392948150635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.230018749833107,
|
|
"step": 600,
|
|
"valid_targets_mean": 14331.5,
|
|
"valid_targets_min": 2859
|
|
},
|
|
{
|
|
"epoch": 2.55863539445629,
|
|
"grad_norm": 0.4549872487932826,
|
|
"learning_rate": 2.27553348004122e-05,
|
|
"loss": 1.0233306884765625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27509790658950806,
|
|
"step": 601,
|
|
"valid_targets_mean": 14395.6,
|
|
"valid_targets_min": 2323
|
|
},
|
|
{
|
|
"epoch": 2.5628997867803838,
|
|
"grad_norm": 0.33761284764961597,
|
|
"learning_rate": 2.2696445959568577e-05,
|
|
"loss": 0.9874115586280823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23882831633090973,
|
|
"step": 602,
|
|
"valid_targets_mean": 14543.6,
|
|
"valid_targets_min": 4364
|
|
},
|
|
{
|
|
"epoch": 2.5671641791044775,
|
|
"grad_norm": 0.35401685293843677,
|
|
"learning_rate": 2.2637533298757064e-05,
|
|
"loss": 1.02969229221344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575541138648987,
|
|
"step": 603,
|
|
"valid_targets_mean": 14556.6,
|
|
"valid_targets_min": 5755
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 0.31392668806341634,
|
|
"learning_rate": 2.2578597338402567e-05,
|
|
"loss": 1.0304715633392334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26533687114715576,
|
|
"step": 604,
|
|
"valid_targets_mean": 14233.3,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 2.5756929637526653,
|
|
"grad_norm": 0.3423833897137569,
|
|
"learning_rate": 2.2519638599135844e-05,
|
|
"loss": 0.9955989122390747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24507801234722137,
|
|
"step": 605,
|
|
"valid_targets_mean": 14312.9,
|
|
"valid_targets_min": 1957
|
|
},
|
|
{
|
|
"epoch": 2.579957356076759,
|
|
"grad_norm": 0.29274617852947354,
|
|
"learning_rate": 2.2460657601788875e-05,
|
|
"loss": 1.0015254020690918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25418350100517273,
|
|
"step": 606,
|
|
"valid_targets_mean": 14795.7,
|
|
"valid_targets_min": 4826
|
|
},
|
|
{
|
|
"epoch": 2.5842217484008527,
|
|
"grad_norm": 0.35114149837713304,
|
|
"learning_rate": 2.2401654867390256e-05,
|
|
"loss": 1.0049234628677368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25163301825523376,
|
|
"step": 607,
|
|
"valid_targets_mean": 14743.6,
|
|
"valid_targets_min": 3783
|
|
},
|
|
{
|
|
"epoch": 2.588486140724947,
|
|
"grad_norm": 0.29439065023601596,
|
|
"learning_rate": 2.2342630917160605e-05,
|
|
"loss": 0.9980335831642151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.268460750579834,
|
|
"step": 608,
|
|
"valid_targets_mean": 15170.2,
|
|
"valid_targets_min": 10692
|
|
},
|
|
{
|
|
"epoch": 2.5927505330490406,
|
|
"grad_norm": 0.3918348447282248,
|
|
"learning_rate": 2.2283586272507975e-05,
|
|
"loss": 1.0358186960220337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26268818974494934,
|
|
"step": 609,
|
|
"valid_targets_mean": 14811.3,
|
|
"valid_targets_min": 3785
|
|
},
|
|
{
|
|
"epoch": 2.5970149253731343,
|
|
"grad_norm": 0.29722435327418545,
|
|
"learning_rate": 2.2224521455023193e-05,
|
|
"loss": 1.0231435298919678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27298492193222046,
|
|
"step": 610,
|
|
"valid_targets_mean": 14908.2,
|
|
"valid_targets_min": 5562
|
|
},
|
|
{
|
|
"epoch": 2.6012793176972284,
|
|
"grad_norm": 0.3706282963267535,
|
|
"learning_rate": 2.216543698647534e-05,
|
|
"loss": 1.0015376806259155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23678603768348694,
|
|
"step": 611,
|
|
"valid_targets_mean": 14785.9,
|
|
"valid_targets_min": 7223
|
|
},
|
|
{
|
|
"epoch": 2.605543710021322,
|
|
"grad_norm": 0.28997351658302517,
|
|
"learning_rate": 2.210633338880704e-05,
|
|
"loss": 0.9889682531356812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501472532749176,
|
|
"step": 612,
|
|
"valid_targets_mean": 14344.6,
|
|
"valid_targets_min": 2032
|
|
},
|
|
{
|
|
"epoch": 2.609808102345416,
|
|
"grad_norm": 0.3463457632817264,
|
|
"learning_rate": 2.204721118412994e-05,
|
|
"loss": 0.9636353254318237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23523736000061035,
|
|
"step": 613,
|
|
"valid_targets_mean": 15641.1,
|
|
"valid_targets_min": 6636
|
|
},
|
|
{
|
|
"epoch": 2.6140724946695095,
|
|
"grad_norm": 0.27747934116807216,
|
|
"learning_rate": 2.1988070894720037e-05,
|
|
"loss": 0.9970721006393433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269124299287796,
|
|
"step": 614,
|
|
"valid_targets_mean": 14540.5,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 2.6183368869936032,
|
|
"grad_norm": 0.3511925155288994,
|
|
"learning_rate": 2.192891304301309e-05,
|
|
"loss": 1.0155706405639648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2538289427757263,
|
|
"step": 615,
|
|
"valid_targets_mean": 15039.4,
|
|
"valid_targets_min": 6067
|
|
},
|
|
{
|
|
"epoch": 2.6226012793176974,
|
|
"grad_norm": 0.29011844045310653,
|
|
"learning_rate": 2.18697381516e-05,
|
|
"loss": 0.9873601198196411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24696794152259827,
|
|
"step": 616,
|
|
"valid_targets_mean": 15058.1,
|
|
"valid_targets_min": 2443
|
|
},
|
|
{
|
|
"epoch": 2.626865671641791,
|
|
"grad_norm": 0.3238909783787976,
|
|
"learning_rate": 2.181054674322221e-05,
|
|
"loss": 1.0182379484176636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2516877353191376,
|
|
"step": 617,
|
|
"valid_targets_mean": 14824.7,
|
|
"valid_targets_min": 3184
|
|
},
|
|
{
|
|
"epoch": 2.631130063965885,
|
|
"grad_norm": 0.3429422878810307,
|
|
"learning_rate": 2.1751339340767043e-05,
|
|
"loss": 0.9708336591720581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2568596601486206,
|
|
"step": 618,
|
|
"valid_targets_mean": 14443.2,
|
|
"valid_targets_min": 4836
|
|
},
|
|
{
|
|
"epoch": 2.635394456289979,
|
|
"grad_norm": 0.2908515376112425,
|
|
"learning_rate": 2.169211646726313e-05,
|
|
"loss": 1.016640305519104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24657776951789856,
|
|
"step": 619,
|
|
"valid_targets_mean": 14274.7,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 2.6396588486140726,
|
|
"grad_norm": 0.3853554609431464,
|
|
"learning_rate": 2.163287864587576e-05,
|
|
"loss": 1.0126334428787231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27196288108825684,
|
|
"step": 620,
|
|
"valid_targets_mean": 15074.4,
|
|
"valid_targets_min": 2880
|
|
},
|
|
{
|
|
"epoch": 2.6439232409381663,
|
|
"grad_norm": 0.2942337811975356,
|
|
"learning_rate": 2.157362639990229e-05,
|
|
"loss": 0.9722011089324951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530434727668762,
|
|
"step": 621,
|
|
"valid_targets_mean": 14954.9,
|
|
"valid_targets_min": 4040
|
|
},
|
|
{
|
|
"epoch": 2.64818763326226,
|
|
"grad_norm": 0.32573988623048067,
|
|
"learning_rate": 2.151436025276747e-05,
|
|
"loss": 1.019566297531128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25579628348350525,
|
|
"step": 622,
|
|
"valid_targets_mean": 14778.2,
|
|
"valid_targets_min": 6227
|
|
},
|
|
{
|
|
"epoch": 2.6524520255863537,
|
|
"grad_norm": 0.3138634089608162,
|
|
"learning_rate": 2.145508072801888e-05,
|
|
"loss": 1.0274235010147095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2799724340438843,
|
|
"step": 623,
|
|
"valid_targets_mean": 15236.8,
|
|
"valid_targets_min": 8987
|
|
},
|
|
{
|
|
"epoch": 2.656716417910448,
|
|
"grad_norm": 0.3026669199564441,
|
|
"learning_rate": 2.1395788349322256e-05,
|
|
"loss": 1.0347164869308472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27171123027801514,
|
|
"step": 624,
|
|
"valid_targets_mean": 14910.9,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 2.6609808102345416,
|
|
"grad_norm": 0.33452478891840515,
|
|
"learning_rate": 2.133648364045689e-05,
|
|
"loss": 0.9720939993858337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21387939155101776,
|
|
"step": 625,
|
|
"valid_targets_mean": 13264.8,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 2.6652452025586353,
|
|
"grad_norm": 0.3096653460973755,
|
|
"learning_rate": 2.1277167125310996e-05,
|
|
"loss": 0.9728649854660034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24355927109718323,
|
|
"step": 626,
|
|
"valid_targets_mean": 14669.8,
|
|
"valid_targets_min": 2210
|
|
},
|
|
{
|
|
"epoch": 2.6695095948827294,
|
|
"grad_norm": 0.31675455298818617,
|
|
"learning_rate": 2.1217839327877098e-05,
|
|
"loss": 0.9938322305679321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275725781917572,
|
|
"step": 627,
|
|
"valid_targets_mean": 14886.6,
|
|
"valid_targets_min": 3979
|
|
},
|
|
{
|
|
"epoch": 2.673773987206823,
|
|
"grad_norm": 0.32447364838102843,
|
|
"learning_rate": 2.1158500772247352e-05,
|
|
"loss": 1.0284380912780762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2407890409231186,
|
|
"step": 628,
|
|
"valid_targets_mean": 15458.4,
|
|
"valid_targets_min": 9633
|
|
},
|
|
{
|
|
"epoch": 2.678038379530917,
|
|
"grad_norm": 0.3053615292021208,
|
|
"learning_rate": 2.1099151982608985e-05,
|
|
"loss": 0.9960503578186035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23685085773468018,
|
|
"step": 629,
|
|
"valid_targets_mean": 13504.6,
|
|
"valid_targets_min": 2559
|
|
},
|
|
{
|
|
"epoch": 2.6823027718550105,
|
|
"grad_norm": 0.25634970031380244,
|
|
"learning_rate": 2.1039793483239607e-05,
|
|
"loss": 1.0126702785491943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531462013721466,
|
|
"step": 630,
|
|
"valid_targets_mean": 15036.3,
|
|
"valid_targets_min": 4520
|
|
},
|
|
{
|
|
"epoch": 2.6865671641791042,
|
|
"grad_norm": 0.3067523475864039,
|
|
"learning_rate": 2.0980425798502616e-05,
|
|
"loss": 0.9994568228721619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587946355342865,
|
|
"step": 631,
|
|
"valid_targets_mean": 14533.3,
|
|
"valid_targets_min": 4140
|
|
},
|
|
{
|
|
"epoch": 2.6908315565031984,
|
|
"grad_norm": 0.25389346155205395,
|
|
"learning_rate": 2.092104945284255e-05,
|
|
"loss": 0.9778339862823486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2528277039527893,
|
|
"step": 632,
|
|
"valid_targets_mean": 14683.9,
|
|
"valid_targets_min": 3664
|
|
},
|
|
{
|
|
"epoch": 2.695095948827292,
|
|
"grad_norm": 0.2778701673178365,
|
|
"learning_rate": 2.0861664970780434e-05,
|
|
"loss": 1.016330599784851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2787415385246277,
|
|
"step": 633,
|
|
"valid_targets_mean": 14680.2,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 2.699360341151386,
|
|
"grad_norm": 0.2775598140493888,
|
|
"learning_rate": 2.08022728769092e-05,
|
|
"loss": 1.007723093032837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2617778778076172,
|
|
"step": 634,
|
|
"valid_targets_mean": 14868.3,
|
|
"valid_targets_min": 8834
|
|
},
|
|
{
|
|
"epoch": 2.70362473347548,
|
|
"grad_norm": 0.27380802983068325,
|
|
"learning_rate": 2.0742873695889005e-05,
|
|
"loss": 0.9847657680511475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2117314636707306,
|
|
"step": 635,
|
|
"valid_targets_mean": 13743.9,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 2.7078891257995736,
|
|
"grad_norm": 0.2840238835125812,
|
|
"learning_rate": 2.0683467952442626e-05,
|
|
"loss": 1.0043309926986694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552762031555176,
|
|
"step": 636,
|
|
"valid_targets_mean": 14205.1,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 2.7121535181236673,
|
|
"grad_norm": 0.30845217995459634,
|
|
"learning_rate": 2.0624056171350785e-05,
|
|
"loss": 1.0011863708496094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2538048028945923,
|
|
"step": 637,
|
|
"valid_targets_mean": 14988.6,
|
|
"valid_targets_min": 1570
|
|
},
|
|
{
|
|
"epoch": 2.716417910447761,
|
|
"grad_norm": 0.30436094910838707,
|
|
"learning_rate": 2.0564638877447566e-05,
|
|
"loss": 1.0156689882278442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2511076033115387,
|
|
"step": 638,
|
|
"valid_targets_mean": 14630.3,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 2.7206823027718547,
|
|
"grad_norm": 0.3512096016591476,
|
|
"learning_rate": 2.0505216595615742e-05,
|
|
"loss": 1.0246028900146484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2568962574005127,
|
|
"step": 639,
|
|
"valid_targets_mean": 15198.6,
|
|
"valid_targets_min": 7587
|
|
},
|
|
{
|
|
"epoch": 2.724946695095949,
|
|
"grad_norm": 0.29008263960254876,
|
|
"learning_rate": 2.044578985078215e-05,
|
|
"loss": 1.0243080854415894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24314099550247192,
|
|
"step": 640,
|
|
"valid_targets_mean": 13845.4,
|
|
"valid_targets_min": 2933
|
|
},
|
|
{
|
|
"epoch": 2.7292110874200426,
|
|
"grad_norm": 0.31678542997916903,
|
|
"learning_rate": 2.0386359167913046e-05,
|
|
"loss": 0.9641498923301697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24840395152568817,
|
|
"step": 641,
|
|
"valid_targets_mean": 15607.5,
|
|
"valid_targets_min": 12085
|
|
},
|
|
{
|
|
"epoch": 2.7334754797441363,
|
|
"grad_norm": 0.2724744113968817,
|
|
"learning_rate": 2.0326925072009485e-05,
|
|
"loss": 0.9665191173553467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23087330162525177,
|
|
"step": 642,
|
|
"valid_targets_mean": 14645.0,
|
|
"valid_targets_min": 3174
|
|
},
|
|
{
|
|
"epoch": 2.7377398720682304,
|
|
"grad_norm": 0.30852280956353484,
|
|
"learning_rate": 2.0267488088102657e-05,
|
|
"loss": 1.0037891864776611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23327767848968506,
|
|
"step": 643,
|
|
"valid_targets_mean": 15101.0,
|
|
"valid_targets_min": 6402
|
|
},
|
|
{
|
|
"epoch": 2.742004264392324,
|
|
"grad_norm": 0.27599319253618093,
|
|
"learning_rate": 2.0208048741249288e-05,
|
|
"loss": 0.9691023230552673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2526437044143677,
|
|
"step": 644,
|
|
"valid_targets_mean": 15077.2,
|
|
"valid_targets_min": 9108
|
|
},
|
|
{
|
|
"epoch": 2.746268656716418,
|
|
"grad_norm": 0.3188137031502205,
|
|
"learning_rate": 2.014860755652695e-05,
|
|
"loss": 1.047473669052124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28787750005722046,
|
|
"step": 645,
|
|
"valid_targets_mean": 15099.8,
|
|
"valid_targets_min": 6838
|
|
},
|
|
{
|
|
"epoch": 2.750533049040512,
|
|
"grad_norm": 0.2782840678901553,
|
|
"learning_rate": 2.0089165059029477e-05,
|
|
"loss": 1.0118706226348877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2625814974308014,
|
|
"step": 646,
|
|
"valid_targets_mean": 15174.9,
|
|
"valid_targets_min": 4873
|
|
},
|
|
{
|
|
"epoch": 2.7547974413646057,
|
|
"grad_norm": 0.2956993212549637,
|
|
"learning_rate": 2.0029721773862277e-05,
|
|
"loss": 0.9921021461486816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24903784692287445,
|
|
"step": 647,
|
|
"valid_targets_mean": 13627.9,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 2.7590618336886994,
|
|
"grad_norm": 0.299103789669543,
|
|
"learning_rate": 1.997027822613773e-05,
|
|
"loss": 0.9913533926010132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2218574434518814,
|
|
"step": 648,
|
|
"valid_targets_mean": 13473.2,
|
|
"valid_targets_min": 5255
|
|
},
|
|
{
|
|
"epoch": 2.763326226012793,
|
|
"grad_norm": 0.31067609340389424,
|
|
"learning_rate": 1.9910834940970533e-05,
|
|
"loss": 1.014221429824829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2568957805633545,
|
|
"step": 649,
|
|
"valid_targets_mean": 13940.1,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 2.767590618336887,
|
|
"grad_norm": 0.29701382950189886,
|
|
"learning_rate": 1.985139244347305e-05,
|
|
"loss": 1.0247479677200317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2506386339664459,
|
|
"step": 650,
|
|
"valid_targets_mean": 14478.3,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 2.771855010660981,
|
|
"grad_norm": 0.3248203492140254,
|
|
"learning_rate": 1.979195125875072e-05,
|
|
"loss": 1.024149775505066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2758370637893677,
|
|
"step": 651,
|
|
"valid_targets_mean": 14935.5,
|
|
"valid_targets_min": 6628
|
|
},
|
|
{
|
|
"epoch": 2.7761194029850746,
|
|
"grad_norm": 0.2959759535941552,
|
|
"learning_rate": 1.9732511911897353e-05,
|
|
"loss": 0.9888455867767334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24844518303871155,
|
|
"step": 652,
|
|
"valid_targets_mean": 14193.0,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 2.7803837953091683,
|
|
"grad_norm": 0.25820938046737335,
|
|
"learning_rate": 1.9673074927990525e-05,
|
|
"loss": 0.9766222238540649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21590156853199005,
|
|
"step": 653,
|
|
"valid_targets_mean": 13946.5,
|
|
"valid_targets_min": 2227
|
|
},
|
|
{
|
|
"epoch": 2.7846481876332625,
|
|
"grad_norm": 0.31264287091815257,
|
|
"learning_rate": 1.9613640832086957e-05,
|
|
"loss": 1.0178096294403076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2458515167236328,
|
|
"step": 654,
|
|
"valid_targets_mean": 14395.0,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 2.788912579957356,
|
|
"grad_norm": 0.2772505249390298,
|
|
"learning_rate": 1.9554210149217855e-05,
|
|
"loss": 0.9603986144065857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2310628890991211,
|
|
"step": 655,
|
|
"valid_targets_mean": 14670.8,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 2.79317697228145,
|
|
"grad_norm": 0.2900845659415462,
|
|
"learning_rate": 1.9494783404384265e-05,
|
|
"loss": 0.9861732721328735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25131934881210327,
|
|
"step": 656,
|
|
"valid_targets_mean": 13690.4,
|
|
"valid_targets_min": 2574
|
|
},
|
|
{
|
|
"epoch": 2.7974413646055436,
|
|
"grad_norm": 0.2810150617194057,
|
|
"learning_rate": 1.9435361122552437e-05,
|
|
"loss": 1.0003434419631958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670168876647949,
|
|
"step": 657,
|
|
"valid_targets_mean": 15698.2,
|
|
"valid_targets_min": 11218
|
|
},
|
|
{
|
|
"epoch": 2.8017057569296373,
|
|
"grad_norm": 0.3169069658778849,
|
|
"learning_rate": 1.9375943828649215e-05,
|
|
"loss": 1.0127019882202148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24696844816207886,
|
|
"step": 658,
|
|
"valid_targets_mean": 13997.6,
|
|
"valid_targets_min": 4737
|
|
},
|
|
{
|
|
"epoch": 2.8059701492537314,
|
|
"grad_norm": 0.3012460761714894,
|
|
"learning_rate": 1.9316532047557378e-05,
|
|
"loss": 0.9938580393791199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25326305627822876,
|
|
"step": 659,
|
|
"valid_targets_mean": 13795.0,
|
|
"valid_targets_min": 3402
|
|
},
|
|
{
|
|
"epoch": 2.810234541577825,
|
|
"grad_norm": 0.2771715770946969,
|
|
"learning_rate": 1.9257126304110998e-05,
|
|
"loss": 0.9841893315315247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24963247776031494,
|
|
"step": 660,
|
|
"valid_targets_mean": 14952.9,
|
|
"valid_targets_min": 6734
|
|
},
|
|
{
|
|
"epoch": 2.814498933901919,
|
|
"grad_norm": 0.32517295371005556,
|
|
"learning_rate": 1.919772712309081e-05,
|
|
"loss": 1.0039091110229492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22606849670410156,
|
|
"step": 661,
|
|
"valid_targets_mean": 14572.7,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 2.818763326226013,
|
|
"grad_norm": 0.2717742099194539,
|
|
"learning_rate": 1.9138335029219572e-05,
|
|
"loss": 0.9993230104446411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25343042612075806,
|
|
"step": 662,
|
|
"valid_targets_mean": 15381.4,
|
|
"valid_targets_min": 4878
|
|
},
|
|
{
|
|
"epoch": 2.8230277185501067,
|
|
"grad_norm": 0.3154265837446081,
|
|
"learning_rate": 1.9078950547157458e-05,
|
|
"loss": 1.046058177947998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25102484226226807,
|
|
"step": 663,
|
|
"valid_targets_mean": 14729.2,
|
|
"valid_targets_min": 5173
|
|
},
|
|
{
|
|
"epoch": 2.8272921108742004,
|
|
"grad_norm": 0.2965769646358737,
|
|
"learning_rate": 1.9019574201497387e-05,
|
|
"loss": 0.9924590587615967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25624752044677734,
|
|
"step": 664,
|
|
"valid_targets_mean": 14750.4,
|
|
"valid_targets_min": 5714
|
|
},
|
|
{
|
|
"epoch": 2.831556503198294,
|
|
"grad_norm": 0.3257989013347485,
|
|
"learning_rate": 1.8960206516760396e-05,
|
|
"loss": 1.040102481842041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822834849357605,
|
|
"step": 665,
|
|
"valid_targets_mean": 15179.4,
|
|
"valid_targets_min": 9622
|
|
},
|
|
{
|
|
"epoch": 2.835820895522388,
|
|
"grad_norm": 0.27309089503879674,
|
|
"learning_rate": 1.890084801739102e-05,
|
|
"loss": 0.9552997350692749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2162771373987198,
|
|
"step": 666,
|
|
"valid_targets_mean": 14456.8,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 2.840085287846482,
|
|
"grad_norm": 0.3263504241600361,
|
|
"learning_rate": 1.884149922775265e-05,
|
|
"loss": 0.9848830699920654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24017439782619476,
|
|
"step": 667,
|
|
"valid_targets_mean": 14748.1,
|
|
"valid_targets_min": 7099
|
|
},
|
|
{
|
|
"epoch": 2.8443496801705757,
|
|
"grad_norm": 0.29144203848094297,
|
|
"learning_rate": 1.878216067212291e-05,
|
|
"loss": 1.000566005706787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23389485478401184,
|
|
"step": 668,
|
|
"valid_targets_mean": 14425.2,
|
|
"valid_targets_min": 5268
|
|
},
|
|
{
|
|
"epoch": 2.8486140724946694,
|
|
"grad_norm": 0.3420988990094064,
|
|
"learning_rate": 1.8722832874689007e-05,
|
|
"loss": 1.0311607122421265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266502320766449,
|
|
"step": 669,
|
|
"valid_targets_mean": 15614.3,
|
|
"valid_targets_min": 11742
|
|
},
|
|
{
|
|
"epoch": 2.8528784648187635,
|
|
"grad_norm": 0.27487794999318355,
|
|
"learning_rate": 1.8663516359543123e-05,
|
|
"loss": 0.9967955350875854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24374626576900482,
|
|
"step": 670,
|
|
"valid_targets_mean": 13797.7,
|
|
"valid_targets_min": 2331
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.3345790554116216,
|
|
"learning_rate": 1.860421165067775e-05,
|
|
"loss": 1.0099653005599976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24598859250545502,
|
|
"step": 671,
|
|
"valid_targets_mean": 15301.4,
|
|
"valid_targets_min": 3307
|
|
},
|
|
{
|
|
"epoch": 2.861407249466951,
|
|
"grad_norm": 0.26069829861481697,
|
|
"learning_rate": 1.8544919271981125e-05,
|
|
"loss": 0.9845088124275208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23169393837451935,
|
|
"step": 672,
|
|
"valid_targets_mean": 14041.2,
|
|
"valid_targets_min": 2267
|
|
},
|
|
{
|
|
"epoch": 2.8656716417910446,
|
|
"grad_norm": 0.32263193512720817,
|
|
"learning_rate": 1.8485639747232535e-05,
|
|
"loss": 1.0051112174987793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25941702723503113,
|
|
"step": 673,
|
|
"valid_targets_mean": 15096.3,
|
|
"valid_targets_min": 9399
|
|
},
|
|
{
|
|
"epoch": 2.8699360341151388,
|
|
"grad_norm": 0.2776411277412439,
|
|
"learning_rate": 1.8426373600097723e-05,
|
|
"loss": 1.0177416801452637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25789016485214233,
|
|
"step": 674,
|
|
"valid_targets_mean": 15325.4,
|
|
"valid_targets_min": 8396
|
|
},
|
|
{
|
|
"epoch": 2.8742004264392325,
|
|
"grad_norm": 0.30995118118169,
|
|
"learning_rate": 1.836712135412424e-05,
|
|
"loss": 1.0024807453155518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25216683745384216,
|
|
"step": 675,
|
|
"valid_targets_mean": 15016.1,
|
|
"valid_targets_min": 7838
|
|
},
|
|
{
|
|
"epoch": 2.878464818763326,
|
|
"grad_norm": 0.28300865829754157,
|
|
"learning_rate": 1.8307883532736878e-05,
|
|
"loss": 1.0101498365402222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24708959460258484,
|
|
"step": 676,
|
|
"valid_targets_mean": 15054.6,
|
|
"valid_targets_min": 3859
|
|
},
|
|
{
|
|
"epoch": 2.88272921108742,
|
|
"grad_norm": 0.28062598741042477,
|
|
"learning_rate": 1.8248660659232964e-05,
|
|
"loss": 0.9696506857872009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2315896451473236,
|
|
"step": 677,
|
|
"valid_targets_mean": 14770.5,
|
|
"valid_targets_min": 5402
|
|
},
|
|
{
|
|
"epoch": 2.886993603411514,
|
|
"grad_norm": 0.2667965349211097,
|
|
"learning_rate": 1.8189453256777798e-05,
|
|
"loss": 1.0135741233825684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2599356174468994,
|
|
"step": 678,
|
|
"valid_targets_mean": 14648.9,
|
|
"valid_targets_min": 4446
|
|
},
|
|
{
|
|
"epoch": 2.8912579957356077,
|
|
"grad_norm": 0.2811475995406021,
|
|
"learning_rate": 1.8130261848399996e-05,
|
|
"loss": 0.9844275712966919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23622454702854156,
|
|
"step": 679,
|
|
"valid_targets_mean": 14444.9,
|
|
"valid_targets_min": 3093
|
|
},
|
|
{
|
|
"epoch": 2.8955223880597014,
|
|
"grad_norm": 0.2811261928082959,
|
|
"learning_rate": 1.8071086956986916e-05,
|
|
"loss": 0.9562913179397583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2241518795490265,
|
|
"step": 680,
|
|
"valid_targets_mean": 13660.1,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 2.8997867803837956,
|
|
"grad_norm": 0.28398691496428397,
|
|
"learning_rate": 1.8011929105279967e-05,
|
|
"loss": 0.9911601543426514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23510421812534332,
|
|
"step": 681,
|
|
"valid_targets_mean": 14456.2,
|
|
"valid_targets_min": 5121
|
|
},
|
|
{
|
|
"epoch": 2.9040511727078893,
|
|
"grad_norm": 0.275681186423059,
|
|
"learning_rate": 1.795278881587007e-05,
|
|
"loss": 0.9530655145645142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24053296446800232,
|
|
"step": 682,
|
|
"valid_targets_mean": 14905.6,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 2.908315565031983,
|
|
"grad_norm": 0.2934651366406735,
|
|
"learning_rate": 1.7893666611192962e-05,
|
|
"loss": 1.001355767250061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26369741559028625,
|
|
"step": 683,
|
|
"valid_targets_mean": 14420.5,
|
|
"valid_targets_min": 6043
|
|
},
|
|
{
|
|
"epoch": 2.9125799573560767,
|
|
"grad_norm": 0.2622226770181641,
|
|
"learning_rate": 1.783456301352467e-05,
|
|
"loss": 1.0324132442474365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23967042565345764,
|
|
"step": 684,
|
|
"valid_targets_mean": 14147.8,
|
|
"valid_targets_min": 2852
|
|
},
|
|
{
|
|
"epoch": 2.9168443496801704,
|
|
"grad_norm": 0.29139717898499357,
|
|
"learning_rate": 1.7775478544976813e-05,
|
|
"loss": 0.9583367705345154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24220289289951324,
|
|
"step": 685,
|
|
"valid_targets_mean": 14339.9,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 2.9211087420042645,
|
|
"grad_norm": 0.3131197857024419,
|
|
"learning_rate": 1.7716413727492035e-05,
|
|
"loss": 1.0138423442840576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26877832412719727,
|
|
"step": 686,
|
|
"valid_targets_mean": 14603.4,
|
|
"valid_targets_min": 4177
|
|
},
|
|
{
|
|
"epoch": 2.925373134328358,
|
|
"grad_norm": 0.2837835935312025,
|
|
"learning_rate": 1.7657369082839392e-05,
|
|
"loss": 1.0201077461242676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24502786993980408,
|
|
"step": 687,
|
|
"valid_targets_mean": 14408.7,
|
|
"valid_targets_min": 6466
|
|
},
|
|
{
|
|
"epoch": 2.929637526652452,
|
|
"grad_norm": 0.2778569113568313,
|
|
"learning_rate": 1.7598345132609747e-05,
|
|
"loss": 1.0258088111877441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24969694018363953,
|
|
"step": 688,
|
|
"valid_targets_mean": 14399.9,
|
|
"valid_targets_min": 2770
|
|
},
|
|
{
|
|
"epoch": 2.933901918976546,
|
|
"grad_norm": 0.28646587599984696,
|
|
"learning_rate": 1.7539342398211132e-05,
|
|
"loss": 1.0020571947097778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26965343952178955,
|
|
"step": 689,
|
|
"valid_targets_mean": 14672.0,
|
|
"valid_targets_min": 2071
|
|
},
|
|
{
|
|
"epoch": 2.9381663113006398,
|
|
"grad_norm": 0.272819667811924,
|
|
"learning_rate": 1.748036140086416e-05,
|
|
"loss": 1.014739990234375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.268605500459671,
|
|
"step": 690,
|
|
"valid_targets_mean": 15134.9,
|
|
"valid_targets_min": 2612
|
|
},
|
|
{
|
|
"epoch": 2.9424307036247335,
|
|
"grad_norm": 0.2893868740697082,
|
|
"learning_rate": 1.742140266159744e-05,
|
|
"loss": 0.9970999956130981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24844536185264587,
|
|
"step": 691,
|
|
"valid_targets_mean": 14269.2,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 2.946695095948827,
|
|
"grad_norm": 0.2636192165484143,
|
|
"learning_rate": 1.7362466701242943e-05,
|
|
"loss": 1.0036377906799316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2315756380558014,
|
|
"step": 692,
|
|
"valid_targets_mean": 13665.4,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 2.950959488272921,
|
|
"grad_norm": 0.2916182260459237,
|
|
"learning_rate": 1.7303554040431426e-05,
|
|
"loss": 1.001044750213623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23404213786125183,
|
|
"step": 693,
|
|
"valid_targets_mean": 14589.4,
|
|
"valid_targets_min": 7691
|
|
},
|
|
{
|
|
"epoch": 2.955223880597015,
|
|
"grad_norm": 0.26335873037754964,
|
|
"learning_rate": 1.7244665199587812e-05,
|
|
"loss": 0.9943997859954834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25878506898880005,
|
|
"step": 694,
|
|
"valid_targets_mean": 15091.8,
|
|
"valid_targets_min": 7829
|
|
},
|
|
{
|
|
"epoch": 2.9594882729211087,
|
|
"grad_norm": 0.26994974250503256,
|
|
"learning_rate": 1.7185800698926594e-05,
|
|
"loss": 1.0259474515914917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2739965319633484,
|
|
"step": 695,
|
|
"valid_targets_mean": 14661.2,
|
|
"valid_targets_min": 10498
|
|
},
|
|
{
|
|
"epoch": 2.9637526652452024,
|
|
"grad_norm": 0.2543023824148054,
|
|
"learning_rate": 1.7126961058447276e-05,
|
|
"loss": 0.9935315251350403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2325773537158966,
|
|
"step": 696,
|
|
"valid_targets_mean": 15374.6,
|
|
"valid_targets_min": 7090
|
|
},
|
|
{
|
|
"epoch": 2.9680170575692966,
|
|
"grad_norm": 0.2786843052322958,
|
|
"learning_rate": 1.706814679792973e-05,
|
|
"loss": 1.0332754850387573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2570928931236267,
|
|
"step": 697,
|
|
"valid_targets_mean": 14630.6,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 2.9722814498933903,
|
|
"grad_norm": 0.24935686351831798,
|
|
"learning_rate": 1.7009358436929632e-05,
|
|
"loss": 0.9843980669975281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23867692053318024,
|
|
"step": 698,
|
|
"valid_targets_mean": 14091.2,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 2.976545842217484,
|
|
"grad_norm": 0.29857611799903405,
|
|
"learning_rate": 1.6950596494773855e-05,
|
|
"loss": 1.0147424936294556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726747989654541,
|
|
"step": 699,
|
|
"valid_targets_mean": 15212.6,
|
|
"valid_targets_min": 3210
|
|
},
|
|
{
|
|
"epoch": 2.9808102345415777,
|
|
"grad_norm": 0.2658442395716486,
|
|
"learning_rate": 1.6891861490555906e-05,
|
|
"loss": 1.0428764820098877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25429844856262207,
|
|
"step": 700,
|
|
"valid_targets_mean": 13402.4,
|
|
"valid_targets_min": 4405
|
|
},
|
|
{
|
|
"epoch": 2.9850746268656714,
|
|
"grad_norm": 0.3083935330698825,
|
|
"learning_rate": 1.683315394313132e-05,
|
|
"loss": 0.9809987545013428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24431899189949036,
|
|
"step": 701,
|
|
"valid_targets_mean": 14898.9,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 2.9893390191897655,
|
|
"grad_norm": 0.28417123865137395,
|
|
"learning_rate": 1.677447437111309e-05,
|
|
"loss": 0.9754353761672974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2438005954027176,
|
|
"step": 702,
|
|
"valid_targets_mean": 13953.7,
|
|
"valid_targets_min": 3392
|
|
},
|
|
{
|
|
"epoch": 2.9936034115138592,
|
|
"grad_norm": 0.2726311414376801,
|
|
"learning_rate": 1.671582329286707e-05,
|
|
"loss": 1.025224208831787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757064700126648,
|
|
"step": 703,
|
|
"valid_targets_mean": 14422.6,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 2.997867803837953,
|
|
"grad_norm": 0.28984949537399207,
|
|
"learning_rate": 1.66572012265074e-05,
|
|
"loss": 1.017598032951355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25678616762161255,
|
|
"step": 704,
|
|
"valid_targets_mean": 14873.5,
|
|
"valid_targets_min": 8982
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.3288839621393955,
|
|
"learning_rate": 1.6598608689891953e-05,
|
|
"loss": 0.9181628227233887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4064050614833832,
|
|
"step": 705,
|
|
"valid_targets_mean": 14252.5,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 3.0042643923240937,
|
|
"grad_norm": 0.34922824511443973,
|
|
"learning_rate": 1.654004620061773e-05,
|
|
"loss": 0.9820230007171631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2537866234779358,
|
|
"step": 706,
|
|
"valid_targets_mean": 14935.4,
|
|
"valid_targets_min": 2227
|
|
},
|
|
{
|
|
"epoch": 3.008528784648188,
|
|
"grad_norm": 0.29598563683514045,
|
|
"learning_rate": 1.6481514276016297e-05,
|
|
"loss": 0.9564608931541443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2232985496520996,
|
|
"step": 707,
|
|
"valid_targets_mean": 13370.4,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 3.0127931769722816,
|
|
"grad_norm": 0.3593652287552824,
|
|
"learning_rate": 1.6423013433149207e-05,
|
|
"loss": 0.9883919954299927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23936152458190918,
|
|
"step": 708,
|
|
"valid_targets_mean": 14046.8,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 3.0170575692963753,
|
|
"grad_norm": 0.2750802098849725,
|
|
"learning_rate": 1.636454418880347e-05,
|
|
"loss": 0.9924779534339905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24471265077590942,
|
|
"step": 709,
|
|
"valid_targets_mean": 14270.6,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 3.021321961620469,
|
|
"grad_norm": 0.344767333676187,
|
|
"learning_rate": 1.630610705948693e-05,
|
|
"loss": 0.9875859618186951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2499075084924698,
|
|
"step": 710,
|
|
"valid_targets_mean": 15469.6,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 3.025586353944563,
|
|
"grad_norm": 0.2805256371149405,
|
|
"learning_rate": 1.6247702561423753e-05,
|
|
"loss": 1.0232622623443604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24893423914909363,
|
|
"step": 711,
|
|
"valid_targets_mean": 14785.2,
|
|
"valid_targets_min": 2659
|
|
},
|
|
{
|
|
"epoch": 3.029850746268657,
|
|
"grad_norm": 0.37845316967674014,
|
|
"learning_rate": 1.6189331210549828e-05,
|
|
"loss": 0.9841704368591309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25965526700019836,
|
|
"step": 712,
|
|
"valid_targets_mean": 14634.5,
|
|
"valid_targets_min": 6716
|
|
},
|
|
{
|
|
"epoch": 3.0341151385927505,
|
|
"grad_norm": 0.2959929243789834,
|
|
"learning_rate": 1.613099352250825e-05,
|
|
"loss": 0.9825332164764404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24293263256549835,
|
|
"step": 713,
|
|
"valid_targets_mean": 14157.4,
|
|
"valid_targets_min": 1760
|
|
},
|
|
{
|
|
"epoch": 3.038379530916844,
|
|
"grad_norm": 0.31439851592503215,
|
|
"learning_rate": 1.6072690012644717e-05,
|
|
"loss": 0.9862507581710815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24176084995269775,
|
|
"step": 714,
|
|
"valid_targets_mean": 15151.0,
|
|
"valid_targets_min": 8214
|
|
},
|
|
{
|
|
"epoch": 3.0426439232409384,
|
|
"grad_norm": 0.32476794911745244,
|
|
"learning_rate": 1.6014421196003022e-05,
|
|
"loss": 0.9499908685684204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27254170179367065,
|
|
"step": 715,
|
|
"valid_targets_mean": 15267.2,
|
|
"valid_targets_min": 6651
|
|
},
|
|
{
|
|
"epoch": 3.046908315565032,
|
|
"grad_norm": 0.3397965883165207,
|
|
"learning_rate": 1.5956187587320468e-05,
|
|
"loss": 0.9644094705581665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24056094884872437,
|
|
"step": 716,
|
|
"valid_targets_mean": 15007.3,
|
|
"valid_targets_min": 6623
|
|
},
|
|
{
|
|
"epoch": 3.0511727078891258,
|
|
"grad_norm": 0.329588267878902,
|
|
"learning_rate": 1.5897989701023355e-05,
|
|
"loss": 0.9658697843551636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24570460617542267,
|
|
"step": 717,
|
|
"valid_targets_mean": 14385.4,
|
|
"valid_targets_min": 2839
|
|
},
|
|
{
|
|
"epoch": 3.0554371002132195,
|
|
"grad_norm": 0.3181757371404445,
|
|
"learning_rate": 1.58398280512224e-05,
|
|
"loss": 0.9798915386199951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25375425815582275,
|
|
"step": 718,
|
|
"valid_targets_mean": 14743.2,
|
|
"valid_targets_min": 3428
|
|
},
|
|
{
|
|
"epoch": 3.0597014925373136,
|
|
"grad_norm": 0.3489996125347138,
|
|
"learning_rate": 1.5781703151708215e-05,
|
|
"loss": 0.9843848943710327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23181578516960144,
|
|
"step": 719,
|
|
"valid_targets_mean": 13908.6,
|
|
"valid_targets_min": 2140
|
|
},
|
|
{
|
|
"epoch": 3.0639658848614073,
|
|
"grad_norm": 0.30839312580880845,
|
|
"learning_rate": 1.5723615515946773e-05,
|
|
"loss": 0.9665852785110474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2349701225757599,
|
|
"step": 720,
|
|
"valid_targets_mean": 13845.8,
|
|
"valid_targets_min": 2723
|
|
},
|
|
{
|
|
"epoch": 3.068230277185501,
|
|
"grad_norm": 0.37281135623140366,
|
|
"learning_rate": 1.5665565657074874e-05,
|
|
"loss": 0.9475533962249756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25182247161865234,
|
|
"step": 721,
|
|
"valid_targets_mean": 14853.0,
|
|
"valid_targets_min": 4328
|
|
},
|
|
{
|
|
"epoch": 3.0724946695095947,
|
|
"grad_norm": 0.3290575979442888,
|
|
"learning_rate": 1.560755408789558e-05,
|
|
"loss": 0.9628059267997742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21820151805877686,
|
|
"step": 722,
|
|
"valid_targets_mean": 12729.1,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 3.076759061833689,
|
|
"grad_norm": 0.3939057108966705,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 1.009767770767212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575645446777344,
|
|
"step": 723,
|
|
"valid_targets_mean": 15057.9,
|
|
"valid_targets_min": 7488
|
|
},
|
|
{
|
|
"epoch": 3.0810234541577826,
|
|
"grad_norm": 0.2934807765977991,
|
|
"learning_rate": 1.5491647868131343e-05,
|
|
"loss": 1.0204286575317383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26170748472213745,
|
|
"step": 724,
|
|
"valid_targets_mean": 15596.0,
|
|
"valid_targets_min": 7283
|
|
},
|
|
{
|
|
"epoch": 3.0852878464818763,
|
|
"grad_norm": 0.3504047540512624,
|
|
"learning_rate": 1.5433754241443223e-05,
|
|
"loss": 0.9512866735458374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.236918643116951,
|
|
"step": 725,
|
|
"valid_targets_mean": 14835.4,
|
|
"valid_targets_min": 6151
|
|
},
|
|
{
|
|
"epoch": 3.08955223880597,
|
|
"grad_norm": 0.33756079574746695,
|
|
"learning_rate": 1.53759009522323e-05,
|
|
"loss": 1.012444019317627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753005027770996,
|
|
"step": 726,
|
|
"valid_targets_mean": 15197.3,
|
|
"valid_targets_min": 8987
|
|
},
|
|
{
|
|
"epoch": 3.093816631130064,
|
|
"grad_norm": 0.32839056381195336,
|
|
"learning_rate": 1.5318088511565185e-05,
|
|
"loss": 1.000159740447998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2198394387960434,
|
|
"step": 727,
|
|
"valid_targets_mean": 14557.7,
|
|
"valid_targets_min": 4288
|
|
},
|
|
{
|
|
"epoch": 3.098081023454158,
|
|
"grad_norm": 0.33679437734381656,
|
|
"learning_rate": 1.5260317430147627e-05,
|
|
"loss": 0.9853755831718445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25518810749053955,
|
|
"step": 728,
|
|
"valid_targets_mean": 15146.8,
|
|
"valid_targets_min": 6993
|
|
},
|
|
{
|
|
"epoch": 3.1023454157782515,
|
|
"grad_norm": 0.3481475307752767,
|
|
"learning_rate": 1.5202588218320024e-05,
|
|
"loss": 1.0070561170578003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594500780105591,
|
|
"step": 729,
|
|
"valid_targets_mean": 14658.7,
|
|
"valid_targets_min": 9374
|
|
},
|
|
{
|
|
"epoch": 3.106609808102345,
|
|
"grad_norm": 0.3389630204611024,
|
|
"learning_rate": 1.5144901386052924e-05,
|
|
"loss": 0.9886671304702759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25142475962638855,
|
|
"step": 730,
|
|
"valid_targets_mean": 14178.2,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 3.1108742004264394,
|
|
"grad_norm": 0.33002191809714815,
|
|
"learning_rate": 1.5087257442942467e-05,
|
|
"loss": 0.995284914970398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2407272607088089,
|
|
"step": 731,
|
|
"valid_targets_mean": 14035.7,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 3.115138592750533,
|
|
"grad_norm": 0.32318762096188497,
|
|
"learning_rate": 1.502965689820593e-05,
|
|
"loss": 0.994063138961792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2572033405303955,
|
|
"step": 732,
|
|
"valid_targets_mean": 14857.7,
|
|
"valid_targets_min": 5823
|
|
},
|
|
{
|
|
"epoch": 3.1194029850746268,
|
|
"grad_norm": 0.31029081022647914,
|
|
"learning_rate": 1.4972100260677222e-05,
|
|
"loss": 0.9742772579193115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24965216219425201,
|
|
"step": 733,
|
|
"valid_targets_mean": 15568.8,
|
|
"valid_targets_min": 9668
|
|
},
|
|
{
|
|
"epoch": 3.1236673773987205,
|
|
"grad_norm": 0.30185660335583814,
|
|
"learning_rate": 1.4914588038802383e-05,
|
|
"loss": 0.953723132610321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2591814696788788,
|
|
"step": 734,
|
|
"valid_targets_mean": 15243.9,
|
|
"valid_targets_min": 7651
|
|
},
|
|
{
|
|
"epoch": 3.1279317697228146,
|
|
"grad_norm": 0.3003692126372566,
|
|
"learning_rate": 1.4857120740635084e-05,
|
|
"loss": 0.9636922478675842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21446675062179565,
|
|
"step": 735,
|
|
"valid_targets_mean": 14190.0,
|
|
"valid_targets_min": 3093
|
|
},
|
|
{
|
|
"epoch": 3.1321961620469083,
|
|
"grad_norm": 0.28087954840149193,
|
|
"learning_rate": 1.4799698873832153e-05,
|
|
"loss": 0.970176100730896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24540992081165314,
|
|
"step": 736,
|
|
"valid_targets_mean": 14513.7,
|
|
"valid_targets_min": 4489
|
|
},
|
|
{
|
|
"epoch": 3.136460554371002,
|
|
"grad_norm": 0.30565479425564773,
|
|
"learning_rate": 1.4742322945649073e-05,
|
|
"loss": 0.9687221050262451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23796163499355316,
|
|
"step": 737,
|
|
"valid_targets_mean": 15115.2,
|
|
"valid_targets_min": 3630
|
|
},
|
|
{
|
|
"epoch": 3.140724946695096,
|
|
"grad_norm": 0.2588393721294529,
|
|
"learning_rate": 1.4684993462935532e-05,
|
|
"loss": 1.0161759853363037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24519619345664978,
|
|
"step": 738,
|
|
"valid_targets_mean": 13897.5,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 3.14498933901919,
|
|
"grad_norm": 0.30569469186051346,
|
|
"learning_rate": 1.462771093213092e-05,
|
|
"loss": 0.9936240911483765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2576857805252075,
|
|
"step": 739,
|
|
"valid_targets_mean": 14842.8,
|
|
"valid_targets_min": 4455
|
|
},
|
|
{
|
|
"epoch": 3.1492537313432836,
|
|
"grad_norm": 0.2831997746933687,
|
|
"learning_rate": 1.4570475859259856e-05,
|
|
"loss": 1.0184738636016846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25552698969841003,
|
|
"step": 740,
|
|
"valid_targets_mean": 14303.8,
|
|
"valid_targets_min": 3999
|
|
},
|
|
{
|
|
"epoch": 3.1535181236673773,
|
|
"grad_norm": 0.302681488910356,
|
|
"learning_rate": 1.4513288749927714e-05,
|
|
"loss": 0.9626487493515015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2463638335466385,
|
|
"step": 741,
|
|
"valid_targets_mean": 14189.3,
|
|
"valid_targets_min": 7031
|
|
},
|
|
{
|
|
"epoch": 3.1577825159914714,
|
|
"grad_norm": 0.25531380287898403,
|
|
"learning_rate": 1.4456150109316192e-05,
|
|
"loss": 1.0105798244476318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2492905706167221,
|
|
"step": 742,
|
|
"valid_targets_mean": 15032.7,
|
|
"valid_targets_min": 6873
|
|
},
|
|
{
|
|
"epoch": 3.162046908315565,
|
|
"grad_norm": 0.2904297036538883,
|
|
"learning_rate": 1.4399060442178798e-05,
|
|
"loss": 1.0012047290802002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24875105917453766,
|
|
"step": 743,
|
|
"valid_targets_mean": 14197.8,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 3.166311300639659,
|
|
"grad_norm": 0.26657849685492374,
|
|
"learning_rate": 1.4342020252836437e-05,
|
|
"loss": 0.9481156468391418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2375640720129013,
|
|
"step": 744,
|
|
"valid_targets_mean": 14842.0,
|
|
"valid_targets_min": 4959
|
|
},
|
|
{
|
|
"epoch": 3.1705756929637525,
|
|
"grad_norm": 0.2577855220560588,
|
|
"learning_rate": 1.4285030045172913e-05,
|
|
"loss": 0.9632688760757446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551208436489105,
|
|
"step": 745,
|
|
"valid_targets_mean": 15101.4,
|
|
"valid_targets_min": 7179
|
|
},
|
|
{
|
|
"epoch": 3.1748400852878467,
|
|
"grad_norm": 0.3148277600441966,
|
|
"learning_rate": 1.422809032263052e-05,
|
|
"loss": 0.9745003581047058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2595515549182892,
|
|
"step": 746,
|
|
"valid_targets_mean": 15064.9,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 3.1791044776119404,
|
|
"grad_norm": 0.25790496551944986,
|
|
"learning_rate": 1.4171201588205566e-05,
|
|
"loss": 0.9516160488128662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24452434480190277,
|
|
"step": 747,
|
|
"valid_targets_mean": 14460.3,
|
|
"valid_targets_min": 3546
|
|
},
|
|
{
|
|
"epoch": 3.183368869936034,
|
|
"grad_norm": 0.28200650649992354,
|
|
"learning_rate": 1.4114364344443935e-05,
|
|
"loss": 0.9666727781295776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23246654868125916,
|
|
"step": 748,
|
|
"valid_targets_mean": 15058.3,
|
|
"valid_targets_min": 5619
|
|
},
|
|
{
|
|
"epoch": 3.1876332622601278,
|
|
"grad_norm": 0.2818554192764713,
|
|
"learning_rate": 1.4057579093436653e-05,
|
|
"loss": 0.9781205058097839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23165199160575867,
|
|
"step": 749,
|
|
"valid_targets_mean": 14682.0,
|
|
"valid_targets_min": 4993
|
|
},
|
|
{
|
|
"epoch": 3.191897654584222,
|
|
"grad_norm": 0.2710172411364092,
|
|
"learning_rate": 1.400084633681546e-05,
|
|
"loss": 0.9583656191825867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21231116354465485,
|
|
"step": 750,
|
|
"valid_targets_mean": 14937.3,
|
|
"valid_targets_min": 6130
|
|
},
|
|
{
|
|
"epoch": 3.1961620469083156,
|
|
"grad_norm": 0.26522661525722746,
|
|
"learning_rate": 1.3944166575748355e-05,
|
|
"loss": 1.0064042806625366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24048569798469543,
|
|
"step": 751,
|
|
"valid_targets_mean": 15015.8,
|
|
"valid_targets_min": 6065
|
|
},
|
|
{
|
|
"epoch": 3.2004264392324093,
|
|
"grad_norm": 0.24819705493992156,
|
|
"learning_rate": 1.3887540310935187e-05,
|
|
"loss": 0.9470230937004089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24713729321956635,
|
|
"step": 752,
|
|
"valid_targets_mean": 14598.2,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 3.204690831556503,
|
|
"grad_norm": 0.3034194256896165,
|
|
"learning_rate": 1.3830968042603226e-05,
|
|
"loss": 0.9944400191307068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23861563205718994,
|
|
"step": 753,
|
|
"valid_targets_mean": 13820.2,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 3.208955223880597,
|
|
"grad_norm": 0.24162596167233502,
|
|
"learning_rate": 1.3774450270502762e-05,
|
|
"loss": 0.935854971408844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22209787368774414,
|
|
"step": 754,
|
|
"valid_targets_mean": 14340.8,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 3.213219616204691,
|
|
"grad_norm": 0.27924012419974925,
|
|
"learning_rate": 1.3717987493902656e-05,
|
|
"loss": 0.9580754041671753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23207560181617737,
|
|
"step": 755,
|
|
"valid_targets_mean": 14867.7,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 3.2174840085287846,
|
|
"grad_norm": 0.2631565886944658,
|
|
"learning_rate": 1.3661580211585947e-05,
|
|
"loss": 1.0013339519500732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24636045098304749,
|
|
"step": 756,
|
|
"valid_targets_mean": 14753.9,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 3.2217484008528783,
|
|
"grad_norm": 0.2546538981456892,
|
|
"learning_rate": 1.3605228921845457e-05,
|
|
"loss": 0.9701914191246033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535368502140045,
|
|
"step": 757,
|
|
"valid_targets_mean": 15058.9,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 3.2260127931769724,
|
|
"grad_norm": 0.2651658327817212,
|
|
"learning_rate": 1.3548934122479373e-05,
|
|
"loss": 0.9925574064254761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25720131397247314,
|
|
"step": 758,
|
|
"valid_targets_mean": 14217.5,
|
|
"valid_targets_min": 4859
|
|
},
|
|
{
|
|
"epoch": 3.230277185501066,
|
|
"grad_norm": 0.27027939963101194,
|
|
"learning_rate": 1.349269631078686e-05,
|
|
"loss": 0.970876932144165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24859249591827393,
|
|
"step": 759,
|
|
"valid_targets_mean": 15206.1,
|
|
"valid_targets_min": 5725
|
|
},
|
|
{
|
|
"epoch": 3.23454157782516,
|
|
"grad_norm": 0.2587721094089808,
|
|
"learning_rate": 1.3436515983563659e-05,
|
|
"loss": 1.0265617370605469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2438148409128189,
|
|
"step": 760,
|
|
"valid_targets_mean": 14201.3,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 3.2388059701492535,
|
|
"grad_norm": 0.24350275199980534,
|
|
"learning_rate": 1.3380393637097692e-05,
|
|
"loss": 0.9264395236968994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22638459503650665,
|
|
"step": 761,
|
|
"valid_targets_mean": 14595.2,
|
|
"valid_targets_min": 4471
|
|
},
|
|
{
|
|
"epoch": 3.2430703624733477,
|
|
"grad_norm": 0.24332680720792152,
|
|
"learning_rate": 1.3324329767164708e-05,
|
|
"loss": 0.9605574607849121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23158195614814758,
|
|
"step": 762,
|
|
"valid_targets_mean": 14623.3,
|
|
"valid_targets_min": 6527
|
|
},
|
|
{
|
|
"epoch": 3.2473347547974414,
|
|
"grad_norm": 0.2621574375311258,
|
|
"learning_rate": 1.3268324869023878e-05,
|
|
"loss": 0.976763129234314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2242659330368042,
|
|
"step": 763,
|
|
"valid_targets_mean": 13988.0,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 3.251599147121535,
|
|
"grad_norm": 0.25520291478182605,
|
|
"learning_rate": 1.3212379437413421e-05,
|
|
"loss": 1.008936882019043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2539985179901123,
|
|
"step": 764,
|
|
"valid_targets_mean": 13731.8,
|
|
"valid_targets_min": 2767
|
|
},
|
|
{
|
|
"epoch": 3.2558635394456292,
|
|
"grad_norm": 0.25495156828681614,
|
|
"learning_rate": 1.3156493966546236e-05,
|
|
"loss": 0.9604157209396362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2437647134065628,
|
|
"step": 765,
|
|
"valid_targets_mean": 15253.4,
|
|
"valid_targets_min": 10283
|
|
},
|
|
{
|
|
"epoch": 3.260127931769723,
|
|
"grad_norm": 0.24326176721237994,
|
|
"learning_rate": 1.3100668950105534e-05,
|
|
"loss": 0.9801679849624634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2403479367494583,
|
|
"step": 766,
|
|
"valid_targets_mean": 13939.0,
|
|
"valid_targets_min": 2151
|
|
},
|
|
{
|
|
"epoch": 3.2643923240938166,
|
|
"grad_norm": 0.2648362448952545,
|
|
"learning_rate": 1.3044904881240507e-05,
|
|
"loss": 0.9167968034744263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22533074021339417,
|
|
"step": 767,
|
|
"valid_targets_mean": 14715.1,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 3.2686567164179103,
|
|
"grad_norm": 0.3084645052131096,
|
|
"learning_rate": 1.2989202252561926e-05,
|
|
"loss": 0.9942984580993652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27038753032684326,
|
|
"step": 768,
|
|
"valid_targets_mean": 15047.2,
|
|
"valid_targets_min": 9284
|
|
},
|
|
{
|
|
"epoch": 3.272921108742004,
|
|
"grad_norm": 0.2678905199646818,
|
|
"learning_rate": 1.2933561556137806e-05,
|
|
"loss": 0.9890280961990356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533625662326813,
|
|
"step": 769,
|
|
"valid_targets_mean": 15175.8,
|
|
"valid_targets_min": 8491
|
|
},
|
|
{
|
|
"epoch": 3.277185501066098,
|
|
"grad_norm": 0.24179677513976292,
|
|
"learning_rate": 1.2877983283489062e-05,
|
|
"loss": 0.9520249366760254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25359901785850525,
|
|
"step": 770,
|
|
"valid_targets_mean": 15184.2,
|
|
"valid_targets_min": 7985
|
|
},
|
|
{
|
|
"epoch": 3.281449893390192,
|
|
"grad_norm": 0.2682741858077797,
|
|
"learning_rate": 1.2822467925585186e-05,
|
|
"loss": 0.997904360294342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26393425464630127,
|
|
"step": 771,
|
|
"valid_targets_mean": 14969.1,
|
|
"valid_targets_min": 4675
|
|
},
|
|
{
|
|
"epoch": 3.2857142857142856,
|
|
"grad_norm": 0.26184038573544066,
|
|
"learning_rate": 1.2767015972839879e-05,
|
|
"loss": 0.9394642114639282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24244745075702667,
|
|
"step": 772,
|
|
"valid_targets_mean": 14638.4,
|
|
"valid_targets_min": 2025
|
|
},
|
|
{
|
|
"epoch": 3.2899786780383797,
|
|
"grad_norm": 0.25424206106322134,
|
|
"learning_rate": 1.2711627915106728e-05,
|
|
"loss": 0.9754142165184021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2096511572599411,
|
|
"step": 773,
|
|
"valid_targets_mean": 14076.6,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 3.2942430703624734,
|
|
"grad_norm": 0.24477837860761717,
|
|
"learning_rate": 1.2656304241674877e-05,
|
|
"loss": 0.9717386364936829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20806467533111572,
|
|
"step": 774,
|
|
"valid_targets_mean": 13653.0,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 3.298507462686567,
|
|
"grad_norm": 0.3033027878630011,
|
|
"learning_rate": 1.2601045441264734e-05,
|
|
"loss": 0.9691619873046875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24015672504901886,
|
|
"step": 775,
|
|
"valid_targets_mean": 14468.7,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 3.302771855010661,
|
|
"grad_norm": 0.2346707213963981,
|
|
"learning_rate": 1.2545852002023599e-05,
|
|
"loss": 0.9814898371696472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2222505807876587,
|
|
"step": 776,
|
|
"valid_targets_mean": 13769.2,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 3.307036247334755,
|
|
"grad_norm": 0.3281397941672066,
|
|
"learning_rate": 1.2490724411521406e-05,
|
|
"loss": 1.0499846935272217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25558245182037354,
|
|
"step": 777,
|
|
"valid_targets_mean": 14771.3,
|
|
"valid_targets_min": 2909
|
|
},
|
|
{
|
|
"epoch": 3.3113006396588487,
|
|
"grad_norm": 0.24244783914669535,
|
|
"learning_rate": 1.243566315674637e-05,
|
|
"loss": 0.9678056240081787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2553331255912781,
|
|
"step": 778,
|
|
"valid_targets_mean": 14883.9,
|
|
"valid_targets_min": 3976
|
|
},
|
|
{
|
|
"epoch": 3.3155650319829424,
|
|
"grad_norm": 0.305062300907714,
|
|
"learning_rate": 1.238066872410073e-05,
|
|
"loss": 1.0484224557876587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569788098335266,
|
|
"step": 779,
|
|
"valid_targets_mean": 13571.6,
|
|
"valid_targets_min": 2936
|
|
},
|
|
{
|
|
"epoch": 3.319829424307036,
|
|
"grad_norm": 0.26775027006551666,
|
|
"learning_rate": 1.2325741599396418e-05,
|
|
"loss": 0.9865918159484863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2306908220052719,
|
|
"step": 780,
|
|
"valid_targets_mean": 13806.5,
|
|
"valid_targets_min": 2177
|
|
},
|
|
{
|
|
"epoch": 3.3240938166311302,
|
|
"grad_norm": 0.27014121957072873,
|
|
"learning_rate": 1.2270882267850765e-05,
|
|
"loss": 0.9287204146385193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.237704336643219,
|
|
"step": 781,
|
|
"valid_targets_mean": 15658.2,
|
|
"valid_targets_min": 6294
|
|
},
|
|
{
|
|
"epoch": 3.328358208955224,
|
|
"grad_norm": 0.287754845834019,
|
|
"learning_rate": 1.2216091214082248e-05,
|
|
"loss": 0.9274210929870605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23108148574829102,
|
|
"step": 782,
|
|
"valid_targets_mean": 14416.9,
|
|
"valid_targets_min": 3910
|
|
},
|
|
{
|
|
"epoch": 3.3326226012793176,
|
|
"grad_norm": 0.25663526801340314,
|
|
"learning_rate": 1.2161368922106192e-05,
|
|
"loss": 0.9926104545593262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26447635889053345,
|
|
"step": 783,
|
|
"valid_targets_mean": 15729.0,
|
|
"valid_targets_min": 10634
|
|
},
|
|
{
|
|
"epoch": 3.3368869936034113,
|
|
"grad_norm": 0.2931257583598174,
|
|
"learning_rate": 1.2106715875330475e-05,
|
|
"loss": 0.9968762397766113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2680075168609619,
|
|
"step": 784,
|
|
"valid_targets_mean": 14808.2,
|
|
"valid_targets_min": 3531
|
|
},
|
|
{
|
|
"epoch": 3.3411513859275055,
|
|
"grad_norm": 0.2768075222387483,
|
|
"learning_rate": 1.2052132556551275e-05,
|
|
"loss": 1.0421247482299805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23852446675300598,
|
|
"step": 785,
|
|
"valid_targets_mean": 13779.9,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 3.345415778251599,
|
|
"grad_norm": 0.26922113045089796,
|
|
"learning_rate": 1.1997619447948814e-05,
|
|
"loss": 0.9955551624298096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2484271377325058,
|
|
"step": 786,
|
|
"valid_targets_mean": 14286.3,
|
|
"valid_targets_min": 2432
|
|
},
|
|
{
|
|
"epoch": 3.349680170575693,
|
|
"grad_norm": 0.2908311586215251,
|
|
"learning_rate": 1.1943177031083094e-05,
|
|
"loss": 1.0184731483459473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717573344707489,
|
|
"step": 787,
|
|
"valid_targets_mean": 15721.7,
|
|
"valid_targets_min": 9700
|
|
},
|
|
{
|
|
"epoch": 3.3539445628997866,
|
|
"grad_norm": 0.2519251309901798,
|
|
"learning_rate": 1.1888805786889621e-05,
|
|
"loss": 1.0131577253341675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515684962272644,
|
|
"step": 788,
|
|
"valid_targets_mean": 15286.8,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 3.3582089552238807,
|
|
"grad_norm": 0.30645413660658805,
|
|
"learning_rate": 1.183450619567518e-05,
|
|
"loss": 0.9787472486495972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24756258726119995,
|
|
"step": 789,
|
|
"valid_targets_mean": 14251.6,
|
|
"valid_targets_min": 4490
|
|
},
|
|
{
|
|
"epoch": 3.3624733475479744,
|
|
"grad_norm": 0.26232305011663887,
|
|
"learning_rate": 1.1780278737113581e-05,
|
|
"loss": 1.0121639966964722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24441544711589813,
|
|
"step": 790,
|
|
"valid_targets_mean": 14975.3,
|
|
"valid_targets_min": 3680
|
|
},
|
|
{
|
|
"epoch": 3.366737739872068,
|
|
"grad_norm": 0.3030622365686099,
|
|
"learning_rate": 1.1726123890241439e-05,
|
|
"loss": 0.9977049231529236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25506260991096497,
|
|
"step": 791,
|
|
"valid_targets_mean": 14009.7,
|
|
"valid_targets_min": 3023
|
|
},
|
|
{
|
|
"epoch": 3.3710021321961623,
|
|
"grad_norm": 0.29140871981606176,
|
|
"learning_rate": 1.1672042133453925e-05,
|
|
"loss": 0.9836759567260742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25351113080978394,
|
|
"step": 792,
|
|
"valid_targets_mean": 14919.7,
|
|
"valid_targets_min": 6013
|
|
},
|
|
{
|
|
"epoch": 3.375266524520256,
|
|
"grad_norm": 0.26745952898965397,
|
|
"learning_rate": 1.1618033944500527e-05,
|
|
"loss": 0.9230258464813232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2220560908317566,
|
|
"step": 793,
|
|
"valid_targets_mean": 14836.0,
|
|
"valid_targets_min": 2406
|
|
},
|
|
{
|
|
"epoch": 3.3795309168443497,
|
|
"grad_norm": 0.3102081315403335,
|
|
"learning_rate": 1.1564099800480864e-05,
|
|
"loss": 0.9660372734069824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22207561135292053,
|
|
"step": 794,
|
|
"valid_targets_mean": 13774.2,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 3.3837953091684434,
|
|
"grad_norm": 0.2490913224598025,
|
|
"learning_rate": 1.151024017784045e-05,
|
|
"loss": 0.9783912897109985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2386191487312317,
|
|
"step": 795,
|
|
"valid_targets_mean": 14262.1,
|
|
"valid_targets_min": 4572
|
|
},
|
|
{
|
|
"epoch": 3.388059701492537,
|
|
"grad_norm": 0.29054281608727117,
|
|
"learning_rate": 1.1456455552366488e-05,
|
|
"loss": 0.9620133638381958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26244238018989563,
|
|
"step": 796,
|
|
"valid_targets_mean": 15688.0,
|
|
"valid_targets_min": 10994
|
|
},
|
|
{
|
|
"epoch": 3.3923240938166312,
|
|
"grad_norm": 0.29952604665307336,
|
|
"learning_rate": 1.1402746399183671e-05,
|
|
"loss": 0.9715530872344971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24581879377365112,
|
|
"step": 797,
|
|
"valid_targets_mean": 14943.1,
|
|
"valid_targets_min": 6999
|
|
},
|
|
{
|
|
"epoch": 3.396588486140725,
|
|
"grad_norm": 0.26441230485833,
|
|
"learning_rate": 1.1349113192749986e-05,
|
|
"loss": 0.9797306060791016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2557758092880249,
|
|
"step": 798,
|
|
"valid_targets_mean": 15075.8,
|
|
"valid_targets_min": 6989
|
|
},
|
|
{
|
|
"epoch": 3.4008528784648187,
|
|
"grad_norm": 0.2790219543501813,
|
|
"learning_rate": 1.1295556406852488e-05,
|
|
"loss": 0.9969871044158936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2347119003534317,
|
|
"step": 799,
|
|
"valid_targets_mean": 14168.6,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 3.405117270788913,
|
|
"grad_norm": 0.26201015718914195,
|
|
"learning_rate": 1.1242076514603201e-05,
|
|
"loss": 0.955880343914032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21588972210884094,
|
|
"step": 800,
|
|
"valid_targets_mean": 14430.6,
|
|
"valid_targets_min": 2882
|
|
},
|
|
{
|
|
"epoch": 3.4093816631130065,
|
|
"grad_norm": 0.24806995409068797,
|
|
"learning_rate": 1.1188673988434831e-05,
|
|
"loss": 0.9920408725738525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2410411536693573,
|
|
"step": 801,
|
|
"valid_targets_mean": 13998.1,
|
|
"valid_targets_min": 3138
|
|
},
|
|
{
|
|
"epoch": 3.4136460554371,
|
|
"grad_norm": 0.29848409228999456,
|
|
"learning_rate": 1.1135349300096667e-05,
|
|
"loss": 0.9661373496055603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24285857379436493,
|
|
"step": 802,
|
|
"valid_targets_mean": 15185.6,
|
|
"valid_targets_min": 3257
|
|
},
|
|
{
|
|
"epoch": 3.417910447761194,
|
|
"grad_norm": 0.25251476542737417,
|
|
"learning_rate": 1.1082102920650397e-05,
|
|
"loss": 0.9824184775352478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22753749787807465,
|
|
"step": 803,
|
|
"valid_targets_mean": 14153.7,
|
|
"valid_targets_min": 4364
|
|
},
|
|
{
|
|
"epoch": 3.4221748400852876,
|
|
"grad_norm": 0.2483682139540597,
|
|
"learning_rate": 1.102893532046593e-05,
|
|
"loss": 1.001957654953003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2591865658760071,
|
|
"step": 804,
|
|
"valid_targets_mean": 14353.4,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 3.4264392324093818,
|
|
"grad_norm": 0.2699000642224219,
|
|
"learning_rate": 1.0975846969217258e-05,
|
|
"loss": 0.9685598611831665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596426010131836,
|
|
"step": 805,
|
|
"valid_targets_mean": 14756.9,
|
|
"valid_targets_min": 7238
|
|
},
|
|
{
|
|
"epoch": 3.4307036247334755,
|
|
"grad_norm": 0.24461156052414396,
|
|
"learning_rate": 1.092283833587829e-05,
|
|
"loss": 0.9834060668945312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2554374933242798,
|
|
"step": 806,
|
|
"valid_targets_mean": 14806.7,
|
|
"valid_targets_min": 4322
|
|
},
|
|
{
|
|
"epoch": 3.434968017057569,
|
|
"grad_norm": 0.25721119261175807,
|
|
"learning_rate": 1.086990988871873e-05,
|
|
"loss": 0.9687243103981018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2370043396949768,
|
|
"step": 807,
|
|
"valid_targets_mean": 14201.8,
|
|
"valid_targets_min": 2523
|
|
},
|
|
{
|
|
"epoch": 3.4392324093816633,
|
|
"grad_norm": 0.271565819340625,
|
|
"learning_rate": 1.0817062095299929e-05,
|
|
"loss": 0.9997534155845642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24003225564956665,
|
|
"step": 808,
|
|
"valid_targets_mean": 13671.8,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 3.443496801705757,
|
|
"grad_norm": 0.2476065411633593,
|
|
"learning_rate": 1.0764295422470755e-05,
|
|
"loss": 0.9745993614196777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23869183659553528,
|
|
"step": 809,
|
|
"valid_targets_mean": 14831.8,
|
|
"valid_targets_min": 3113
|
|
},
|
|
{
|
|
"epoch": 3.4477611940298507,
|
|
"grad_norm": 0.277127282792621,
|
|
"learning_rate": 1.0711610336363477e-05,
|
|
"loss": 0.9948844909667969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2519140839576721,
|
|
"step": 810,
|
|
"valid_targets_mean": 14924.7,
|
|
"valid_targets_min": 4915
|
|
},
|
|
{
|
|
"epoch": 3.4520255863539444,
|
|
"grad_norm": 0.23211987960653785,
|
|
"learning_rate": 1.065900730238961e-05,
|
|
"loss": 0.9197704792022705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23289944231510162,
|
|
"step": 811,
|
|
"valid_targets_mean": 15488.8,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 3.4562899786780386,
|
|
"grad_norm": 0.2387382306487428,
|
|
"learning_rate": 1.0606486785235879e-05,
|
|
"loss": 0.9850577116012573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24952645599842072,
|
|
"step": 812,
|
|
"valid_targets_mean": 14707.6,
|
|
"valid_targets_min": 7077
|
|
},
|
|
{
|
|
"epoch": 3.4605543710021323,
|
|
"grad_norm": 0.2616740717997132,
|
|
"learning_rate": 1.0554049248860045e-05,
|
|
"loss": 1.0188957452774048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24433737993240356,
|
|
"step": 813,
|
|
"valid_targets_mean": 13376.9,
|
|
"valid_targets_min": 2264
|
|
},
|
|
{
|
|
"epoch": 3.464818763326226,
|
|
"grad_norm": 0.23569887094323022,
|
|
"learning_rate": 1.0501695156486819e-05,
|
|
"loss": 0.9842403531074524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2371315211057663,
|
|
"step": 814,
|
|
"valid_targets_mean": 14910.6,
|
|
"valid_targets_min": 2733
|
|
},
|
|
{
|
|
"epoch": 3.4690831556503197,
|
|
"grad_norm": 0.23081715082505702,
|
|
"learning_rate": 1.0449424970603796e-05,
|
|
"loss": 0.9469051361083984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22757041454315186,
|
|
"step": 815,
|
|
"valid_targets_mean": 14495.3,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 3.473347547974414,
|
|
"grad_norm": 0.25613539045930966,
|
|
"learning_rate": 1.0397239152957356e-05,
|
|
"loss": 0.9637795686721802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.234003484249115,
|
|
"step": 816,
|
|
"valid_targets_mean": 14073.7,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 3.4776119402985075,
|
|
"grad_norm": 0.23226042618728032,
|
|
"learning_rate": 1.034513816454858e-05,
|
|
"loss": 1.032134771347046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.253731906414032,
|
|
"step": 817,
|
|
"valid_targets_mean": 14066.1,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 3.481876332622601,
|
|
"grad_norm": 0.26342043609085664,
|
|
"learning_rate": 1.0293122465629186e-05,
|
|
"loss": 0.9562536478042603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24223795533180237,
|
|
"step": 818,
|
|
"valid_targets_mean": 14672.9,
|
|
"valid_targets_min": 3004
|
|
},
|
|
{
|
|
"epoch": 3.486140724946695,
|
|
"grad_norm": 0.26300142285285033,
|
|
"learning_rate": 1.0241192515697432e-05,
|
|
"loss": 0.9867624640464783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2341744303703308,
|
|
"step": 819,
|
|
"valid_targets_mean": 14388.9,
|
|
"valid_targets_min": 2995
|
|
},
|
|
{
|
|
"epoch": 3.490405117270789,
|
|
"grad_norm": 0.22577998264799773,
|
|
"learning_rate": 1.0189348773494135e-05,
|
|
"loss": 1.0202937126159668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2818657457828522,
|
|
"step": 820,
|
|
"valid_targets_mean": 14920.1,
|
|
"valid_targets_min": 4167
|
|
},
|
|
{
|
|
"epoch": 3.4946695095948828,
|
|
"grad_norm": 0.25028221792085803,
|
|
"learning_rate": 1.0137591696998514e-05,
|
|
"loss": 0.9820396900177002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24462352693080902,
|
|
"step": 821,
|
|
"valid_targets_mean": 14204.5,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 3.4989339019189765,
|
|
"grad_norm": 0.2662388579462513,
|
|
"learning_rate": 1.0085921743424225e-05,
|
|
"loss": 0.964117169380188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26233386993408203,
|
|
"step": 822,
|
|
"valid_targets_mean": 14899.5,
|
|
"valid_targets_min": 4092
|
|
},
|
|
{
|
|
"epoch": 3.50319829424307,
|
|
"grad_norm": 0.24421515409606953,
|
|
"learning_rate": 1.0034339369215288e-05,
|
|
"loss": 1.014388918876648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26374751329421997,
|
|
"step": 823,
|
|
"valid_targets_mean": 15294.1,
|
|
"valid_targets_min": 9094
|
|
},
|
|
{
|
|
"epoch": 3.5074626865671643,
|
|
"grad_norm": 0.26870681149585096,
|
|
"learning_rate": 9.982845030042068e-06,
|
|
"loss": 0.9517983198165894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23002931475639343,
|
|
"step": 824,
|
|
"valid_targets_mean": 14787.1,
|
|
"valid_targets_min": 3972
|
|
},
|
|
{
|
|
"epoch": 3.511727078891258,
|
|
"grad_norm": 0.26281252606998257,
|
|
"learning_rate": 9.931439180797237e-06,
|
|
"loss": 1.000589370727539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24353653192520142,
|
|
"step": 825,
|
|
"valid_targets_mean": 15400.7,
|
|
"valid_targets_min": 8805
|
|
},
|
|
{
|
|
"epoch": 3.5159914712153517,
|
|
"grad_norm": 0.2801487598484197,
|
|
"learning_rate": 9.880122275591752e-06,
|
|
"loss": 0.9344038963317871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596667408943176,
|
|
"step": 826,
|
|
"valid_targets_mean": 14202.7,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 3.520255863539446,
|
|
"grad_norm": 0.2751368401541349,
|
|
"learning_rate": 9.828894767750865e-06,
|
|
"loss": 1.036057472229004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263114333152771,
|
|
"step": 827,
|
|
"valid_targets_mean": 15087.4,
|
|
"valid_targets_min": 2986
|
|
},
|
|
{
|
|
"epoch": 3.5245202558635396,
|
|
"grad_norm": 0.24910039526000877,
|
|
"learning_rate": 9.777757109810102e-06,
|
|
"loss": 0.968199610710144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24098575115203857,
|
|
"step": 828,
|
|
"valid_targets_mean": 14234.5,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 3.5287846481876333,
|
|
"grad_norm": 0.25249450033395743,
|
|
"learning_rate": 9.726709753511275e-06,
|
|
"loss": 0.9408011436462402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2418053150177002,
|
|
"step": 829,
|
|
"valid_targets_mean": 15226.1,
|
|
"valid_targets_min": 8810
|
|
},
|
|
{
|
|
"epoch": 3.533049040511727,
|
|
"grad_norm": 0.2517721863075118,
|
|
"learning_rate": 9.675753149798474e-06,
|
|
"loss": 0.9770376682281494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2498602569103241,
|
|
"step": 830,
|
|
"valid_targets_mean": 14781.8,
|
|
"valid_targets_min": 6994
|
|
},
|
|
{
|
|
"epoch": 3.5373134328358207,
|
|
"grad_norm": 0.25927125922197164,
|
|
"learning_rate": 9.624887748814118e-06,
|
|
"loss": 1.0216107368469238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23916086554527283,
|
|
"step": 831,
|
|
"valid_targets_mean": 15214.8,
|
|
"valid_targets_min": 10179
|
|
},
|
|
{
|
|
"epoch": 3.541577825159915,
|
|
"grad_norm": 0.26295001457537137,
|
|
"learning_rate": 9.574113999894909e-06,
|
|
"loss": 0.9992193579673767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22977036237716675,
|
|
"step": 832,
|
|
"valid_targets_mean": 13955.2,
|
|
"valid_targets_min": 2242
|
|
},
|
|
{
|
|
"epoch": 3.5458422174840085,
|
|
"grad_norm": 0.24095839155556267,
|
|
"learning_rate": 9.523432351567979e-06,
|
|
"loss": 0.9577963948249817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24666322767734528,
|
|
"step": 833,
|
|
"valid_targets_mean": 15254.7,
|
|
"valid_targets_min": 7740
|
|
},
|
|
{
|
|
"epoch": 3.550106609808102,
|
|
"grad_norm": 0.26188011738715167,
|
|
"learning_rate": 9.472843251546792e-06,
|
|
"loss": 1.0054728984832764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23398205637931824,
|
|
"step": 834,
|
|
"valid_targets_mean": 13342.5,
|
|
"valid_targets_min": 2585
|
|
},
|
|
{
|
|
"epoch": 3.5543710021321964,
|
|
"grad_norm": 0.2699766880378024,
|
|
"learning_rate": 9.422347146727294e-06,
|
|
"loss": 0.9918596744537354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24219661951065063,
|
|
"step": 835,
|
|
"valid_targets_mean": 14498.8,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 3.55863539445629,
|
|
"grad_norm": 0.26866569469142376,
|
|
"learning_rate": 9.371944483183912e-06,
|
|
"loss": 0.9615319967269897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22946661710739136,
|
|
"step": 836,
|
|
"valid_targets_mean": 14546.1,
|
|
"valid_targets_min": 5910
|
|
},
|
|
{
|
|
"epoch": 3.5628997867803838,
|
|
"grad_norm": 0.2605098677062309,
|
|
"learning_rate": 9.321635706165635e-06,
|
|
"loss": 0.9757985472679138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.255840003490448,
|
|
"step": 837,
|
|
"valid_targets_mean": 15266.3,
|
|
"valid_targets_min": 8894
|
|
},
|
|
{
|
|
"epoch": 3.5671641791044775,
|
|
"grad_norm": 0.23775094240792444,
|
|
"learning_rate": 9.271421260092075e-06,
|
|
"loss": 0.989256739616394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24244949221611023,
|
|
"step": 838,
|
|
"valid_targets_mean": 14179.2,
|
|
"valid_targets_min": 7842
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.24007515683200306,
|
|
"learning_rate": 9.221301588549519e-06,
|
|
"loss": 0.9419832229614258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25342583656311035,
|
|
"step": 839,
|
|
"valid_targets_mean": 14947.5,
|
|
"valid_targets_min": 9230
|
|
},
|
|
{
|
|
"epoch": 3.5756929637526653,
|
|
"grad_norm": 0.2691792621285365,
|
|
"learning_rate": 9.171277134287057e-06,
|
|
"loss": 0.9450219869613647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22654005885124207,
|
|
"step": 840,
|
|
"valid_targets_mean": 14274.2,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 3.579957356076759,
|
|
"grad_norm": 0.237255278781228,
|
|
"learning_rate": 9.121348339212634e-06,
|
|
"loss": 1.0224823951721191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536211609840393,
|
|
"step": 841,
|
|
"valid_targets_mean": 14984.0,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 3.5842217484008527,
|
|
"grad_norm": 0.24953145517541198,
|
|
"learning_rate": 9.07151564438916e-06,
|
|
"loss": 1.0022039413452148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715470492839813,
|
|
"step": 842,
|
|
"valid_targets_mean": 14656.7,
|
|
"valid_targets_min": 6455
|
|
},
|
|
{
|
|
"epoch": 3.588486140724947,
|
|
"grad_norm": 0.23114629454351143,
|
|
"learning_rate": 9.021779490030611e-06,
|
|
"loss": 0.9453797340393066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22272245585918427,
|
|
"step": 843,
|
|
"valid_targets_mean": 14127.5,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 3.5927505330490406,
|
|
"grad_norm": 0.22891550463363444,
|
|
"learning_rate": 8.972140315498119e-06,
|
|
"loss": 0.9721546173095703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25284475088119507,
|
|
"step": 844,
|
|
"valid_targets_mean": 15379.6,
|
|
"valid_targets_min": 8062
|
|
},
|
|
{
|
|
"epoch": 3.5970149253731343,
|
|
"grad_norm": 0.265547783629435,
|
|
"learning_rate": 8.922598559296154e-06,
|
|
"loss": 0.986110508441925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24768789112567902,
|
|
"step": 845,
|
|
"valid_targets_mean": 15095.0,
|
|
"valid_targets_min": 9098
|
|
},
|
|
{
|
|
"epoch": 3.6012793176972284,
|
|
"grad_norm": 0.24559411887500796,
|
|
"learning_rate": 8.873154659068582e-06,
|
|
"loss": 0.9304745197296143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2088957130908966,
|
|
"step": 846,
|
|
"valid_targets_mean": 14593.3,
|
|
"valid_targets_min": 4305
|
|
},
|
|
{
|
|
"epoch": 3.605543710021322,
|
|
"grad_norm": 0.24378067479731605,
|
|
"learning_rate": 8.823809051594816e-06,
|
|
"loss": 0.9832024574279785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23046353459358215,
|
|
"step": 847,
|
|
"valid_targets_mean": 13953.9,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 3.609808102345416,
|
|
"grad_norm": 0.23851355907408645,
|
|
"learning_rate": 8.774562172785988e-06,
|
|
"loss": 0.964216411113739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2261546552181244,
|
|
"step": 848,
|
|
"valid_targets_mean": 14167.8,
|
|
"valid_targets_min": 2377
|
|
},
|
|
{
|
|
"epoch": 3.6140724946695095,
|
|
"grad_norm": 0.26180049005190187,
|
|
"learning_rate": 8.725414457681063e-06,
|
|
"loss": 1.0207599401474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2765401005744934,
|
|
"step": 849,
|
|
"valid_targets_mean": 15571.1,
|
|
"valid_targets_min": 3899
|
|
},
|
|
{
|
|
"epoch": 3.6183368869936032,
|
|
"grad_norm": 0.2315627165495342,
|
|
"learning_rate": 8.676366340443017e-06,
|
|
"loss": 0.9406695365905762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22266033291816711,
|
|
"step": 850,
|
|
"valid_targets_mean": 14893.3,
|
|
"valid_targets_min": 6060
|
|
},
|
|
{
|
|
"epoch": 3.6226012793176974,
|
|
"grad_norm": 0.27262510321769146,
|
|
"learning_rate": 8.627418254355e-06,
|
|
"loss": 0.9919838905334473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515830397605896,
|
|
"step": 851,
|
|
"valid_targets_mean": 14232.6,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 3.626865671641791,
|
|
"grad_norm": 0.23374782787698362,
|
|
"learning_rate": 8.578570631816474e-06,
|
|
"loss": 0.9726259112358093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23803842067718506,
|
|
"step": 852,
|
|
"valid_targets_mean": 14178.5,
|
|
"valid_targets_min": 7984
|
|
},
|
|
{
|
|
"epoch": 3.631130063965885,
|
|
"grad_norm": 0.2424184350347015,
|
|
"learning_rate": 8.529823904339472e-06,
|
|
"loss": 0.9699015617370605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25163164734840393,
|
|
"step": 853,
|
|
"valid_targets_mean": 13938.4,
|
|
"valid_targets_min": 4176
|
|
},
|
|
{
|
|
"epoch": 3.635394456289979,
|
|
"grad_norm": 0.25635501513156594,
|
|
"learning_rate": 8.481178502544684e-06,
|
|
"loss": 0.9935340881347656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22088128328323364,
|
|
"step": 854,
|
|
"valid_targets_mean": 14311.4,
|
|
"valid_targets_min": 3387
|
|
},
|
|
{
|
|
"epoch": 3.6396588486140726,
|
|
"grad_norm": 0.26303322143128804,
|
|
"learning_rate": 8.43263485615774e-06,
|
|
"loss": 0.9702484011650085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25029247999191284,
|
|
"step": 855,
|
|
"valid_targets_mean": 15099.9,
|
|
"valid_targets_min": 4158
|
|
},
|
|
{
|
|
"epoch": 3.6439232409381663,
|
|
"grad_norm": 0.2677065947643943,
|
|
"learning_rate": 8.384193394005372e-06,
|
|
"loss": 0.9816667437553406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25914669036865234,
|
|
"step": 856,
|
|
"valid_targets_mean": 14864.0,
|
|
"valid_targets_min": 9550
|
|
},
|
|
{
|
|
"epoch": 3.64818763326226,
|
|
"grad_norm": 0.25216339509853064,
|
|
"learning_rate": 8.33585454401161e-06,
|
|
"loss": 0.9883568286895752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2371150106191635,
|
|
"step": 857,
|
|
"valid_targets_mean": 14149.2,
|
|
"valid_targets_min": 5806
|
|
},
|
|
{
|
|
"epoch": 3.6524520255863537,
|
|
"grad_norm": 0.24108654740369923,
|
|
"learning_rate": 8.287618733194073e-06,
|
|
"loss": 0.9662503600120544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23698334395885468,
|
|
"step": 858,
|
|
"valid_targets_mean": 14875.3,
|
|
"valid_targets_min": 4261
|
|
},
|
|
{
|
|
"epoch": 3.656716417910448,
|
|
"grad_norm": 0.24007182531454413,
|
|
"learning_rate": 8.239486387660096e-06,
|
|
"loss": 0.9632735252380371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24465878307819366,
|
|
"step": 859,
|
|
"valid_targets_mean": 14762.0,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 3.6609808102345416,
|
|
"grad_norm": 0.24851212890633603,
|
|
"learning_rate": 8.191457932603052e-06,
|
|
"loss": 0.9559136033058167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2483808994293213,
|
|
"step": 860,
|
|
"valid_targets_mean": 14555.2,
|
|
"valid_targets_min": 2317
|
|
},
|
|
{
|
|
"epoch": 3.6652452025586353,
|
|
"grad_norm": 0.25292304798578846,
|
|
"learning_rate": 8.143533792298545e-06,
|
|
"loss": 0.9959200620651245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23943336308002472,
|
|
"step": 861,
|
|
"valid_targets_mean": 13622.2,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 3.6695095948827294,
|
|
"grad_norm": 0.23439603192195196,
|
|
"learning_rate": 8.095714390100698e-06,
|
|
"loss": 0.9712970852851868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25741633772850037,
|
|
"step": 862,
|
|
"valid_targets_mean": 14786.1,
|
|
"valid_targets_min": 5323
|
|
},
|
|
{
|
|
"epoch": 3.673773987206823,
|
|
"grad_norm": 0.23042290975565483,
|
|
"learning_rate": 8.048000148438375e-06,
|
|
"loss": 0.9706142544746399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24785256385803223,
|
|
"step": 863,
|
|
"valid_targets_mean": 14706.8,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 3.678038379530917,
|
|
"grad_norm": 0.22583229144376504,
|
|
"learning_rate": 8.000391488811485e-06,
|
|
"loss": 0.9806746244430542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24755212664604187,
|
|
"step": 864,
|
|
"valid_targets_mean": 14964.3,
|
|
"valid_targets_min": 4977
|
|
},
|
|
{
|
|
"epoch": 3.6823027718550105,
|
|
"grad_norm": 0.24399195437254342,
|
|
"learning_rate": 7.952888831787215e-06,
|
|
"loss": 1.0361812114715576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.243907630443573,
|
|
"step": 865,
|
|
"valid_targets_mean": 14916.1,
|
|
"valid_targets_min": 5985
|
|
},
|
|
{
|
|
"epoch": 3.6865671641791042,
|
|
"grad_norm": 0.21939154756844886,
|
|
"learning_rate": 7.905492596996391e-06,
|
|
"loss": 0.9567290544509888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23049962520599365,
|
|
"step": 866,
|
|
"valid_targets_mean": 14725.6,
|
|
"valid_targets_min": 6183
|
|
},
|
|
{
|
|
"epoch": 3.6908315565031984,
|
|
"grad_norm": 0.23451287060659043,
|
|
"learning_rate": 7.858203203129668e-06,
|
|
"loss": 0.9520031809806824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24754250049591064,
|
|
"step": 867,
|
|
"valid_targets_mean": 15242.0,
|
|
"valid_targets_min": 7757
|
|
},
|
|
{
|
|
"epoch": 3.695095948827292,
|
|
"grad_norm": 0.23310614570680926,
|
|
"learning_rate": 7.811021067933919e-06,
|
|
"loss": 0.9988101720809937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24427476525306702,
|
|
"step": 868,
|
|
"valid_targets_mean": 14525.9,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 3.699360341151386,
|
|
"grad_norm": 0.23593631097515333,
|
|
"learning_rate": 7.763946608208504e-06,
|
|
"loss": 0.9649564623832703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22403889894485474,
|
|
"step": 869,
|
|
"valid_targets_mean": 14157.7,
|
|
"valid_targets_min": 2294
|
|
},
|
|
{
|
|
"epoch": 3.70362473347548,
|
|
"grad_norm": 0.22874045545790603,
|
|
"learning_rate": 7.716980239801588e-06,
|
|
"loss": 0.9769153594970703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22000473737716675,
|
|
"step": 870,
|
|
"valid_targets_mean": 13162.8,
|
|
"valid_targets_min": 2704
|
|
},
|
|
{
|
|
"epoch": 3.7078891257995736,
|
|
"grad_norm": 0.2697549519752153,
|
|
"learning_rate": 7.670122377606495e-06,
|
|
"loss": 1.0231093168258667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25248855352401733,
|
|
"step": 871,
|
|
"valid_targets_mean": 13542.7,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 3.7121535181236673,
|
|
"grad_norm": 0.23761816571156935,
|
|
"learning_rate": 7.623373435557988e-06,
|
|
"loss": 0.9462850093841553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23748812079429626,
|
|
"step": 872,
|
|
"valid_targets_mean": 15127.0,
|
|
"valid_targets_min": 6449
|
|
},
|
|
{
|
|
"epoch": 3.716417910447761,
|
|
"grad_norm": 0.2418690392104566,
|
|
"learning_rate": 7.5767338266286775e-06,
|
|
"loss": 0.9786210060119629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23155134916305542,
|
|
"step": 873,
|
|
"valid_targets_mean": 13910.7,
|
|
"valid_targets_min": 5037
|
|
},
|
|
{
|
|
"epoch": 3.7206823027718547,
|
|
"grad_norm": 0.23092835117175475,
|
|
"learning_rate": 7.530203962825331e-06,
|
|
"loss": 0.9910449981689453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21116535365581512,
|
|
"step": 874,
|
|
"valid_targets_mean": 13119.2,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 3.724946695095949,
|
|
"grad_norm": 0.26564379972125934,
|
|
"learning_rate": 7.483784255185249e-06,
|
|
"loss": 0.9989334344863892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2472216784954071,
|
|
"step": 875,
|
|
"valid_targets_mean": 14785.9,
|
|
"valid_targets_min": 3874
|
|
},
|
|
{
|
|
"epoch": 3.7292110874200426,
|
|
"grad_norm": 0.2994596068518666,
|
|
"learning_rate": 7.437475113772632e-06,
|
|
"loss": 1.0079087018966675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2512202560901642,
|
|
"step": 876,
|
|
"valid_targets_mean": 15057.7,
|
|
"valid_targets_min": 9723
|
|
},
|
|
{
|
|
"epoch": 3.7334754797441363,
|
|
"grad_norm": 0.23884776976138294,
|
|
"learning_rate": 7.391276947674932e-06,
|
|
"loss": 0.9666019082069397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2395220249891281,
|
|
"step": 877,
|
|
"valid_targets_mean": 14631.8,
|
|
"valid_targets_min": 6130
|
|
},
|
|
{
|
|
"epoch": 3.7377398720682304,
|
|
"grad_norm": 0.29736483120931556,
|
|
"learning_rate": 7.345190164999307e-06,
|
|
"loss": 0.9678869247436523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23056000471115112,
|
|
"step": 878,
|
|
"valid_targets_mean": 14925.0,
|
|
"valid_targets_min": 7657
|
|
},
|
|
{
|
|
"epoch": 3.742004264392324,
|
|
"grad_norm": 0.26240877603734347,
|
|
"learning_rate": 7.299215172868947e-06,
|
|
"loss": 0.9956834316253662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2454250007867813,
|
|
"step": 879,
|
|
"valid_targets_mean": 13903.8,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 3.746268656716418,
|
|
"grad_norm": 0.2099079929210055,
|
|
"learning_rate": 7.2533523774194865e-06,
|
|
"loss": 0.9482271671295166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26138341426849365,
|
|
"step": 880,
|
|
"valid_targets_mean": 15312.8,
|
|
"valid_targets_min": 11925
|
|
},
|
|
{
|
|
"epoch": 3.750533049040512,
|
|
"grad_norm": 0.2565302279485427,
|
|
"learning_rate": 7.2076021837954616e-06,
|
|
"loss": 0.9806998372077942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23872928321361542,
|
|
"step": 881,
|
|
"valid_targets_mean": 14891.8,
|
|
"valid_targets_min": 1848
|
|
},
|
|
{
|
|
"epoch": 3.7547974413646057,
|
|
"grad_norm": 0.2716008582626385,
|
|
"learning_rate": 7.161964996146689e-06,
|
|
"loss": 1.046633005142212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2463754117488861,
|
|
"step": 882,
|
|
"valid_targets_mean": 14668.8,
|
|
"valid_targets_min": 4126
|
|
},
|
|
{
|
|
"epoch": 3.7590618336886994,
|
|
"grad_norm": 0.21714431008718005,
|
|
"learning_rate": 7.116441217624708e-06,
|
|
"loss": 0.993405818939209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2700158953666687,
|
|
"step": 883,
|
|
"valid_targets_mean": 15408.2,
|
|
"valid_targets_min": 9820
|
|
},
|
|
{
|
|
"epoch": 3.763326226012793,
|
|
"grad_norm": 0.2742491054624345,
|
|
"learning_rate": 7.071031250379228e-06,
|
|
"loss": 0.9970163106918335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2420862913131714,
|
|
"step": 884,
|
|
"valid_targets_mean": 14343.3,
|
|
"valid_targets_min": 4333
|
|
},
|
|
{
|
|
"epoch": 3.767590618336887,
|
|
"grad_norm": 0.26956059566803753,
|
|
"learning_rate": 7.0257354955545466e-06,
|
|
"loss": 1.00797700881958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2438875436782837,
|
|
"step": 885,
|
|
"valid_targets_mean": 15130.5,
|
|
"valid_targets_min": 7247
|
|
},
|
|
{
|
|
"epoch": 3.771855010660981,
|
|
"grad_norm": 0.23506360504442678,
|
|
"learning_rate": 6.980554353286066e-06,
|
|
"loss": 0.9907431602478027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25692564249038696,
|
|
"step": 886,
|
|
"valid_targets_mean": 14252.0,
|
|
"valid_targets_min": 2331
|
|
},
|
|
{
|
|
"epoch": 3.7761194029850746,
|
|
"grad_norm": 0.22944013639885835,
|
|
"learning_rate": 6.935488222696676e-06,
|
|
"loss": 0.9467117786407471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23344078660011292,
|
|
"step": 887,
|
|
"valid_targets_mean": 14437.1,
|
|
"valid_targets_min": 2454
|
|
},
|
|
{
|
|
"epoch": 3.7803837953091683,
|
|
"grad_norm": 0.23977199507428115,
|
|
"learning_rate": 6.890537501893302e-06,
|
|
"loss": 1.0113825798034668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26561421155929565,
|
|
"step": 888,
|
|
"valid_targets_mean": 15158.0,
|
|
"valid_targets_min": 5390
|
|
},
|
|
{
|
|
"epoch": 3.7846481876332625,
|
|
"grad_norm": 0.22437726694777016,
|
|
"learning_rate": 6.845702587963352e-06,
|
|
"loss": 0.9820016622543335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22855736315250397,
|
|
"step": 889,
|
|
"valid_targets_mean": 14585.2,
|
|
"valid_targets_min": 4294
|
|
},
|
|
{
|
|
"epoch": 3.788912579957356,
|
|
"grad_norm": 0.23638769504391416,
|
|
"learning_rate": 6.800983876971192e-06,
|
|
"loss": 1.0018742084503174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25380653142929077,
|
|
"step": 890,
|
|
"valid_targets_mean": 15159.2,
|
|
"valid_targets_min": 4537
|
|
},
|
|
{
|
|
"epoch": 3.79317697228145,
|
|
"grad_norm": 0.21421496775116577,
|
|
"learning_rate": 6.756381763954718e-06,
|
|
"loss": 0.9877946376800537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22715067863464355,
|
|
"step": 891,
|
|
"valid_targets_mean": 14488.2,
|
|
"valid_targets_min": 4167
|
|
},
|
|
{
|
|
"epoch": 3.7974413646055436,
|
|
"grad_norm": 0.22529755844434154,
|
|
"learning_rate": 6.7118966429217645e-06,
|
|
"loss": 0.9948891401290894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22523371875286102,
|
|
"step": 892,
|
|
"valid_targets_mean": 13439.8,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 3.8017057569296373,
|
|
"grad_norm": 0.2255652438085674,
|
|
"learning_rate": 6.667528906846714e-06,
|
|
"loss": 0.9880792498588562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25235530734062195,
|
|
"step": 893,
|
|
"valid_targets_mean": 15426.2,
|
|
"valid_targets_min": 7311
|
|
},
|
|
{
|
|
"epoch": 3.8059701492537314,
|
|
"grad_norm": 0.23220933732001683,
|
|
"learning_rate": 6.623278947666974e-06,
|
|
"loss": 1.0030194520950317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551913857460022,
|
|
"step": 894,
|
|
"valid_targets_mean": 14742.0,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 3.810234541577825,
|
|
"grad_norm": 0.2249962379860535,
|
|
"learning_rate": 6.579147156279538e-06,
|
|
"loss": 0.9969605207443237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24424317479133606,
|
|
"step": 895,
|
|
"valid_targets_mean": 14518.3,
|
|
"valid_targets_min": 5077
|
|
},
|
|
{
|
|
"epoch": 3.814498933901919,
|
|
"grad_norm": 0.23028288819336276,
|
|
"learning_rate": 6.535133922537513e-06,
|
|
"loss": 1.0048408508300781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534922957420349,
|
|
"step": 896,
|
|
"valid_targets_mean": 15031.2,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 3.818763326226013,
|
|
"grad_norm": 0.2307164765754668,
|
|
"learning_rate": 6.491239635246709e-06,
|
|
"loss": 0.9471386671066284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25362256169319153,
|
|
"step": 897,
|
|
"valid_targets_mean": 14355.9,
|
|
"valid_targets_min": 3785
|
|
},
|
|
{
|
|
"epoch": 3.8230277185501067,
|
|
"grad_norm": 0.21103255369431073,
|
|
"learning_rate": 6.447464682162143e-06,
|
|
"loss": 0.9706763029098511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2511069178581238,
|
|
"step": 898,
|
|
"valid_targets_mean": 14254.9,
|
|
"valid_targets_min": 5724
|
|
},
|
|
{
|
|
"epoch": 3.8272921108742004,
|
|
"grad_norm": 0.2163351379756889,
|
|
"learning_rate": 6.403809449984704e-06,
|
|
"loss": 0.9517117738723755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2282985895872116,
|
|
"step": 899,
|
|
"valid_targets_mean": 13606.8,
|
|
"valid_targets_min": 2633
|
|
},
|
|
{
|
|
"epoch": 3.831556503198294,
|
|
"grad_norm": 0.2137081551898619,
|
|
"learning_rate": 6.3602743243576405e-06,
|
|
"loss": 1.0055999755859375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24647054076194763,
|
|
"step": 900,
|
|
"valid_targets_mean": 14836.6,
|
|
"valid_targets_min": 2264
|
|
},
|
|
{
|
|
"epoch": 3.835820895522388,
|
|
"grad_norm": 0.2512808813192437,
|
|
"learning_rate": 6.316859689863222e-06,
|
|
"loss": 0.9790571331977844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21408924460411072,
|
|
"step": 901,
|
|
"valid_targets_mean": 14308.3,
|
|
"valid_targets_min": 5342
|
|
},
|
|
{
|
|
"epoch": 3.840085287846482,
|
|
"grad_norm": 0.22610062280570734,
|
|
"learning_rate": 6.273565930019316e-06,
|
|
"loss": 0.98270183801651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22871488332748413,
|
|
"step": 902,
|
|
"valid_targets_mean": 13936.7,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 3.8443496801705757,
|
|
"grad_norm": 0.22023088868762836,
|
|
"learning_rate": 6.230393427276e-06,
|
|
"loss": 0.9563218355178833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23217496275901794,
|
|
"step": 903,
|
|
"valid_targets_mean": 15035.5,
|
|
"valid_targets_min": 6891
|
|
},
|
|
{
|
|
"epoch": 3.8486140724946694,
|
|
"grad_norm": 0.22785980232747963,
|
|
"learning_rate": 6.187342563012198e-06,
|
|
"loss": 0.964765191078186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2280435860157013,
|
|
"step": 904,
|
|
"valid_targets_mean": 15162.3,
|
|
"valid_targets_min": 7415
|
|
},
|
|
{
|
|
"epoch": 3.8528784648187635,
|
|
"grad_norm": 0.24256802903006872,
|
|
"learning_rate": 6.144413717532269e-06,
|
|
"loss": 0.9996974468231201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2449914515018463,
|
|
"step": 905,
|
|
"valid_targets_mean": 14434.5,
|
|
"valid_targets_min": 4254
|
|
},
|
|
{
|
|
"epoch": 3.857142857142857,
|
|
"grad_norm": 0.22759208964313332,
|
|
"learning_rate": 6.1016072700627106e-06,
|
|
"loss": 1.0134855508804321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638273239135742,
|
|
"step": 906,
|
|
"valid_targets_mean": 15279.8,
|
|
"valid_targets_min": 9661
|
|
},
|
|
{
|
|
"epoch": 3.861407249466951,
|
|
"grad_norm": 0.2183329889031039,
|
|
"learning_rate": 6.058923598748756e-06,
|
|
"loss": 0.9877858757972717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.250029057264328,
|
|
"step": 907,
|
|
"valid_targets_mean": 14704.1,
|
|
"valid_targets_min": 4873
|
|
},
|
|
{
|
|
"epoch": 3.8656716417910446,
|
|
"grad_norm": 0.22701246103491296,
|
|
"learning_rate": 6.016363080651066e-06,
|
|
"loss": 0.972379207611084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678333520889282,
|
|
"step": 908,
|
|
"valid_targets_mean": 15310.9,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 3.8699360341151388,
|
|
"grad_norm": 0.22362256724937227,
|
|
"learning_rate": 5.973926091742386e-06,
|
|
"loss": 0.9702513217926025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25443366169929504,
|
|
"step": 909,
|
|
"valid_targets_mean": 14615.7,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 3.8742004264392325,
|
|
"grad_norm": 0.24281992815576195,
|
|
"learning_rate": 5.931613006904196e-06,
|
|
"loss": 1.0411319732666016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588690221309662,
|
|
"step": 910,
|
|
"valid_targets_mean": 14657.2,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 3.878464818763326,
|
|
"grad_norm": 0.23284136369236155,
|
|
"learning_rate": 5.889424199923473e-06,
|
|
"loss": 1.0260488986968994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27578866481781006,
|
|
"step": 911,
|
|
"valid_targets_mean": 14530.1,
|
|
"valid_targets_min": 2574
|
|
},
|
|
{
|
|
"epoch": 3.88272921108742,
|
|
"grad_norm": 0.21952547069398864,
|
|
"learning_rate": 5.847360043489318e-06,
|
|
"loss": 0.9903779029846191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23721489310264587,
|
|
"step": 912,
|
|
"valid_targets_mean": 13643.0,
|
|
"valid_targets_min": 4384
|
|
},
|
|
{
|
|
"epoch": 3.886993603411514,
|
|
"grad_norm": 0.2406281118612385,
|
|
"learning_rate": 5.805420909189683e-06,
|
|
"loss": 0.9668896198272705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2383928894996643,
|
|
"step": 913,
|
|
"valid_targets_mean": 13716.4,
|
|
"valid_targets_min": 1999
|
|
},
|
|
{
|
|
"epoch": 3.8912579957356077,
|
|
"grad_norm": 0.2175408518516596,
|
|
"learning_rate": 5.7636071675081076e-06,
|
|
"loss": 0.9407345056533813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22822970151901245,
|
|
"step": 914,
|
|
"valid_targets_mean": 14781.8,
|
|
"valid_targets_min": 4557
|
|
},
|
|
{
|
|
"epoch": 3.8955223880597014,
|
|
"grad_norm": 0.20830705856218412,
|
|
"learning_rate": 5.721919187820431e-06,
|
|
"loss": 0.9660102725028992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23975585401058197,
|
|
"step": 915,
|
|
"valid_targets_mean": 14888.3,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 3.8997867803837956,
|
|
"grad_norm": 0.21632443889029693,
|
|
"learning_rate": 5.6803573383915265e-06,
|
|
"loss": 0.9738289713859558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23945684731006622,
|
|
"step": 916,
|
|
"valid_targets_mean": 14734.2,
|
|
"valid_targets_min": 6220
|
|
},
|
|
{
|
|
"epoch": 3.9040511727078893,
|
|
"grad_norm": 0.224221923427914,
|
|
"learning_rate": 5.638921986372064e-06,
|
|
"loss": 0.9694260954856873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25046929717063904,
|
|
"step": 917,
|
|
"valid_targets_mean": 14521.5,
|
|
"valid_targets_min": 4590
|
|
},
|
|
{
|
|
"epoch": 3.908315565031983,
|
|
"grad_norm": 0.2298240345648187,
|
|
"learning_rate": 5.5976134977952315e-06,
|
|
"loss": 0.9808531403541565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26723894476890564,
|
|
"step": 918,
|
|
"valid_targets_mean": 14542.9,
|
|
"valid_targets_min": 4690
|
|
},
|
|
{
|
|
"epoch": 3.9125799573560767,
|
|
"grad_norm": 0.2215604099403996,
|
|
"learning_rate": 5.556432237573564e-06,
|
|
"loss": 0.9920763969421387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24294498562812805,
|
|
"step": 919,
|
|
"valid_targets_mean": 14229.2,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 3.9168443496801704,
|
|
"grad_norm": 0.21540261933930946,
|
|
"learning_rate": 5.5153785694956416e-06,
|
|
"loss": 0.9566727876663208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24480316042900085,
|
|
"step": 920,
|
|
"valid_targets_mean": 14657.9,
|
|
"valid_targets_min": 5751
|
|
},
|
|
{
|
|
"epoch": 3.9211087420042645,
|
|
"grad_norm": 0.24597762423816472,
|
|
"learning_rate": 5.474452856222942e-06,
|
|
"loss": 1.0440037250518799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535979449748993,
|
|
"step": 921,
|
|
"valid_targets_mean": 14060.7,
|
|
"valid_targets_min": 2813
|
|
},
|
|
{
|
|
"epoch": 3.925373134328358,
|
|
"grad_norm": 0.2189522633151993,
|
|
"learning_rate": 5.433655459286611e-06,
|
|
"loss": 0.9538445472717285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23559778928756714,
|
|
"step": 922,
|
|
"valid_targets_mean": 14302.5,
|
|
"valid_targets_min": 4541
|
|
},
|
|
{
|
|
"epoch": 3.929637526652452,
|
|
"grad_norm": 0.20795760943492506,
|
|
"learning_rate": 5.392986739084238e-06,
|
|
"loss": 1.0004758834838867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24290123581886292,
|
|
"step": 923,
|
|
"valid_targets_mean": 14002.6,
|
|
"valid_targets_min": 3538
|
|
},
|
|
{
|
|
"epoch": 3.933901918976546,
|
|
"grad_norm": 0.2265030260604343,
|
|
"learning_rate": 5.352447054876755e-06,
|
|
"loss": 0.9766792058944702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2285555899143219,
|
|
"step": 924,
|
|
"valid_targets_mean": 13950.4,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 3.9381663113006398,
|
|
"grad_norm": 0.24053658447388504,
|
|
"learning_rate": 5.31203676478516e-06,
|
|
"loss": 1.0060276985168457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.255367249250412,
|
|
"step": 925,
|
|
"valid_targets_mean": 15236.5,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 3.9424307036247335,
|
|
"grad_norm": 0.21602416326107177,
|
|
"learning_rate": 5.271756225787434e-06,
|
|
"loss": 0.9795374870300293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2554417848587036,
|
|
"step": 926,
|
|
"valid_targets_mean": 15215.2,
|
|
"valid_targets_min": 6385
|
|
},
|
|
{
|
|
"epoch": 3.946695095948827,
|
|
"grad_norm": 0.2534499728526741,
|
|
"learning_rate": 5.231605793715348e-06,
|
|
"loss": 0.9589778184890747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2268856167793274,
|
|
"step": 927,
|
|
"valid_targets_mean": 14792.0,
|
|
"valid_targets_min": 6755
|
|
},
|
|
{
|
|
"epoch": 3.950959488272921,
|
|
"grad_norm": 0.2383331984225829,
|
|
"learning_rate": 5.191585823251335e-06,
|
|
"loss": 0.9927548170089722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22233620285987854,
|
|
"step": 928,
|
|
"valid_targets_mean": 14103.3,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 3.955223880597015,
|
|
"grad_norm": 0.22499113915923333,
|
|
"learning_rate": 5.151696667925348e-06,
|
|
"loss": 0.986518383026123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2589643597602844,
|
|
"step": 929,
|
|
"valid_targets_mean": 14469.6,
|
|
"valid_targets_min": 2411
|
|
},
|
|
{
|
|
"epoch": 3.9594882729211087,
|
|
"grad_norm": 0.2045620584100863,
|
|
"learning_rate": 5.111938680111732e-06,
|
|
"loss": 0.967449963092804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2582744359970093,
|
|
"step": 930,
|
|
"valid_targets_mean": 14435.3,
|
|
"valid_targets_min": 2399
|
|
},
|
|
{
|
|
"epoch": 3.9637526652452024,
|
|
"grad_norm": 0.2496367032227465,
|
|
"learning_rate": 5.072312211026125e-06,
|
|
"loss": 1.012898564338684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2825775444507599,
|
|
"step": 931,
|
|
"valid_targets_mean": 14951.0,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 3.9680170575692966,
|
|
"grad_norm": 0.21798779394403045,
|
|
"learning_rate": 5.032817610722369e-06,
|
|
"loss": 0.9658070802688599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23163622617721558,
|
|
"step": 932,
|
|
"valid_targets_mean": 14676.0,
|
|
"valid_targets_min": 5449
|
|
},
|
|
{
|
|
"epoch": 3.9722814498933903,
|
|
"grad_norm": 0.21530655517619576,
|
|
"learning_rate": 4.993455228089366e-06,
|
|
"loss": 1.0015664100646973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2477431297302246,
|
|
"step": 933,
|
|
"valid_targets_mean": 14040.2,
|
|
"valid_targets_min": 2181
|
|
},
|
|
{
|
|
"epoch": 3.976545842217484,
|
|
"grad_norm": 0.20479331465856604,
|
|
"learning_rate": 4.954225410848048e-06,
|
|
"loss": 1.0132780075073242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25588130950927734,
|
|
"step": 934,
|
|
"valid_targets_mean": 14669.7,
|
|
"valid_targets_min": 6432
|
|
},
|
|
{
|
|
"epoch": 3.9808102345415777,
|
|
"grad_norm": 0.22163145488550764,
|
|
"learning_rate": 4.915128505548284e-06,
|
|
"loss": 1.0022435188293457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24669809639453888,
|
|
"step": 935,
|
|
"valid_targets_mean": 14697.2,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 3.9850746268656714,
|
|
"grad_norm": 0.2202497704496609,
|
|
"learning_rate": 4.8761648575658145e-06,
|
|
"loss": 0.9922990202903748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2363756000995636,
|
|
"step": 936,
|
|
"valid_targets_mean": 14532.8,
|
|
"valid_targets_min": 3243
|
|
},
|
|
{
|
|
"epoch": 3.9893390191897655,
|
|
"grad_norm": 0.2230749511261461,
|
|
"learning_rate": 4.837334811099217e-06,
|
|
"loss": 0.9765486717224121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520136535167694,
|
|
"step": 937,
|
|
"valid_targets_mean": 14174.2,
|
|
"valid_targets_min": 3072
|
|
},
|
|
{
|
|
"epoch": 3.9936034115138592,
|
|
"grad_norm": 0.21084377809783839,
|
|
"learning_rate": 4.7986387091668365e-06,
|
|
"loss": 0.9671496152877808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24725645780563354,
|
|
"step": 938,
|
|
"valid_targets_mean": 14576.5,
|
|
"valid_targets_min": 6313
|
|
},
|
|
{
|
|
"epoch": 3.997867803837953,
|
|
"grad_norm": 0.2241068083091471,
|
|
"learning_rate": 4.760076893603791e-06,
|
|
"loss": 0.9731559753417969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.241220623254776,
|
|
"step": 939,
|
|
"valid_targets_mean": 14682.5,
|
|
"valid_targets_min": 3668
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.27150620390980645,
|
|
"learning_rate": 4.721649705058926e-06,
|
|
"loss": 0.9118474721908569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45057356357574463,
|
|
"step": 940,
|
|
"valid_targets_mean": 15165.0,
|
|
"valid_targets_min": 6436
|
|
},
|
|
{
|
|
"epoch": 4.004264392324094,
|
|
"grad_norm": 0.28256013345588044,
|
|
"learning_rate": 4.683357482991819e-06,
|
|
"loss": 1.0042099952697754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682952284812927,
|
|
"step": 941,
|
|
"valid_targets_mean": 14863.8,
|
|
"valid_targets_min": 2691
|
|
},
|
|
{
|
|
"epoch": 4.008528784648187,
|
|
"grad_norm": 0.2649269193338297,
|
|
"learning_rate": 4.645200565669776e-06,
|
|
"loss": 1.000551462173462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2577502131462097,
|
|
"step": 942,
|
|
"valid_targets_mean": 15028.4,
|
|
"valid_targets_min": 3047
|
|
},
|
|
{
|
|
"epoch": 4.0127931769722816,
|
|
"grad_norm": 0.22530203293671064,
|
|
"learning_rate": 4.607179290164823e-06,
|
|
"loss": 1.0080432891845703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23703917860984802,
|
|
"step": 943,
|
|
"valid_targets_mean": 14554.3,
|
|
"valid_targets_min": 3704
|
|
},
|
|
{
|
|
"epoch": 4.017057569296376,
|
|
"grad_norm": 0.2559856589514295,
|
|
"learning_rate": 4.569293992350783e-06,
|
|
"loss": 0.9192423820495605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22103016078472137,
|
|
"step": 944,
|
|
"valid_targets_mean": 14898.5,
|
|
"valid_targets_min": 2985
|
|
},
|
|
{
|
|
"epoch": 4.021321961620469,
|
|
"grad_norm": 0.22821413419478453,
|
|
"learning_rate": 4.531545006900244e-06,
|
|
"loss": 0.956257164478302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22548536956310272,
|
|
"step": 945,
|
|
"valid_targets_mean": 14250.9,
|
|
"valid_targets_min": 2251
|
|
},
|
|
{
|
|
"epoch": 4.025586353944563,
|
|
"grad_norm": 0.23784623978147518,
|
|
"learning_rate": 4.493932667281646e-06,
|
|
"loss": 0.9235379099845886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2133762538433075,
|
|
"step": 946,
|
|
"valid_targets_mean": 15698.6,
|
|
"valid_targets_min": 11052
|
|
},
|
|
{
|
|
"epoch": 4.029850746268656,
|
|
"grad_norm": 0.22068950079285038,
|
|
"learning_rate": 4.456457305756321e-06,
|
|
"loss": 0.905700147151947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22572782635688782,
|
|
"step": 947,
|
|
"valid_targets_mean": 14185.8,
|
|
"valid_targets_min": 1860
|
|
},
|
|
{
|
|
"epoch": 4.0341151385927505,
|
|
"grad_norm": 0.25806705345581443,
|
|
"learning_rate": 4.419119253375557e-06,
|
|
"loss": 1.012787103652954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502809166908264,
|
|
"step": 948,
|
|
"valid_targets_mean": 14074.9,
|
|
"valid_targets_min": 2264
|
|
},
|
|
{
|
|
"epoch": 4.038379530916845,
|
|
"grad_norm": 0.2546859965050811,
|
|
"learning_rate": 4.381918839977675e-06,
|
|
"loss": 1.0134446620941162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24352416396141052,
|
|
"step": 949,
|
|
"valid_targets_mean": 14197.3,
|
|
"valid_targets_min": 4327
|
|
},
|
|
{
|
|
"epoch": 4.042643923240938,
|
|
"grad_norm": 0.24583840538751098,
|
|
"learning_rate": 4.344856394185122e-06,
|
|
"loss": 0.996058464050293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.234785258769989,
|
|
"step": 950,
|
|
"valid_targets_mean": 14938.5,
|
|
"valid_targets_min": 5877
|
|
},
|
|
{
|
|
"epoch": 4.046908315565032,
|
|
"grad_norm": 0.2296312180797508,
|
|
"learning_rate": 4.307932243401538e-06,
|
|
"loss": 0.9637879133224487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22970783710479736,
|
|
"step": 951,
|
|
"valid_targets_mean": 14600.7,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 4.051172707889126,
|
|
"grad_norm": 0.22481143584533303,
|
|
"learning_rate": 4.271146713808927e-06,
|
|
"loss": 0.9624608755111694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22701914608478546,
|
|
"step": 952,
|
|
"valid_targets_mean": 13891.4,
|
|
"valid_targets_min": 6092
|
|
},
|
|
{
|
|
"epoch": 4.0554371002132195,
|
|
"grad_norm": 0.24556871387064833,
|
|
"learning_rate": 4.234500130364698e-06,
|
|
"loss": 0.9672844409942627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22592121362686157,
|
|
"step": 953,
|
|
"valid_targets_mean": 13705.8,
|
|
"valid_targets_min": 3021
|
|
},
|
|
{
|
|
"epoch": 4.059701492537314,
|
|
"grad_norm": 0.23510154115328177,
|
|
"learning_rate": 4.197992816798851e-06,
|
|
"loss": 0.961010217666626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24817736446857452,
|
|
"step": 954,
|
|
"valid_targets_mean": 15000.9,
|
|
"valid_targets_min": 9098
|
|
},
|
|
{
|
|
"epoch": 4.063965884861407,
|
|
"grad_norm": 0.2179398594561336,
|
|
"learning_rate": 4.161625095611101e-06,
|
|
"loss": 0.9844958186149597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2261737883090973,
|
|
"step": 955,
|
|
"valid_targets_mean": 13627.3,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 4.068230277185501,
|
|
"grad_norm": 0.22759081155161134,
|
|
"learning_rate": 4.125397288068007e-06,
|
|
"loss": 0.9344425201416016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2527036964893341,
|
|
"step": 956,
|
|
"valid_targets_mean": 15573.6,
|
|
"valid_targets_min": 8774
|
|
},
|
|
{
|
|
"epoch": 4.072494669509595,
|
|
"grad_norm": 0.2466018095173116,
|
|
"learning_rate": 4.089309714200187e-06,
|
|
"loss": 0.980632483959198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23343364894390106,
|
|
"step": 957,
|
|
"valid_targets_mean": 14154.8,
|
|
"valid_targets_min": 4403
|
|
},
|
|
{
|
|
"epoch": 4.076759061833688,
|
|
"grad_norm": 0.24218821254307063,
|
|
"learning_rate": 4.0533626927994185e-06,
|
|
"loss": 0.9529193639755249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22672417759895325,
|
|
"step": 958,
|
|
"valid_targets_mean": 14209.4,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 4.081023454157783,
|
|
"grad_norm": 0.20934879272888285,
|
|
"learning_rate": 4.017556541415888e-06,
|
|
"loss": 0.9678058624267578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24231302738189697,
|
|
"step": 959,
|
|
"valid_targets_mean": 14519.5,
|
|
"valid_targets_min": 4361
|
|
},
|
|
{
|
|
"epoch": 4.085287846481877,
|
|
"grad_norm": 0.20368002250099138,
|
|
"learning_rate": 3.981891576355352e-06,
|
|
"loss": 0.9462436437606812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2565672695636749,
|
|
"step": 960,
|
|
"valid_targets_mean": 15968.7,
|
|
"valid_targets_min": 11170
|
|
},
|
|
{
|
|
"epoch": 4.08955223880597,
|
|
"grad_norm": 0.21733515143478332,
|
|
"learning_rate": 3.946368112676346e-06,
|
|
"loss": 0.932870626449585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2336617112159729,
|
|
"step": 961,
|
|
"valid_targets_mean": 14337.7,
|
|
"valid_targets_min": 5469
|
|
},
|
|
{
|
|
"epoch": 4.093816631130064,
|
|
"grad_norm": 0.2192099926666813,
|
|
"learning_rate": 3.9109864641874166e-06,
|
|
"loss": 0.9877800941467285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25421765446662903,
|
|
"step": 962,
|
|
"valid_targets_mean": 14935.7,
|
|
"valid_targets_min": 8954
|
|
},
|
|
{
|
|
"epoch": 4.098081023454157,
|
|
"grad_norm": 0.20406995782358714,
|
|
"learning_rate": 3.875746943444316e-06,
|
|
"loss": 0.9596678018569946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.212519571185112,
|
|
"step": 963,
|
|
"valid_targets_mean": 14245.0,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 4.1023454157782515,
|
|
"grad_norm": 0.2145177600562408,
|
|
"learning_rate": 3.840649861747278e-06,
|
|
"loss": 0.9821524620056152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2564830183982849,
|
|
"step": 964,
|
|
"valid_targets_mean": 14939.0,
|
|
"valid_targets_min": 5564
|
|
},
|
|
{
|
|
"epoch": 4.106609808102346,
|
|
"grad_norm": 0.215337816359981,
|
|
"learning_rate": 3.8056955291382667e-06,
|
|
"loss": 0.9720021486282349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2259758859872818,
|
|
"step": 965,
|
|
"valid_targets_mean": 14278.5,
|
|
"valid_targets_min": 2855
|
|
},
|
|
{
|
|
"epoch": 4.110874200426439,
|
|
"grad_norm": 0.21583433451665665,
|
|
"learning_rate": 3.7708842543981928e-06,
|
|
"loss": 1.0156824588775635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.220872700214386,
|
|
"step": 966,
|
|
"valid_targets_mean": 13830.6,
|
|
"valid_targets_min": 3608
|
|
},
|
|
{
|
|
"epoch": 4.115138592750533,
|
|
"grad_norm": 0.22351654835384138,
|
|
"learning_rate": 3.736216345044237e-06,
|
|
"loss": 0.9629800319671631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24485626816749573,
|
|
"step": 967,
|
|
"valid_targets_mean": 15243.3,
|
|
"valid_targets_min": 9348
|
|
},
|
|
{
|
|
"epoch": 4.119402985074627,
|
|
"grad_norm": 0.22902557560047432,
|
|
"learning_rate": 3.7016921073271084e-06,
|
|
"loss": 0.9766925573348999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25122424960136414,
|
|
"step": 968,
|
|
"valid_targets_mean": 14588.3,
|
|
"valid_targets_min": 5330
|
|
},
|
|
{
|
|
"epoch": 4.1236673773987205,
|
|
"grad_norm": 0.2040094434563164,
|
|
"learning_rate": 3.6673118462283453e-06,
|
|
"loss": 0.9763757586479187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22237417101860046,
|
|
"step": 969,
|
|
"valid_targets_mean": 14617.9,
|
|
"valid_targets_min": 7151
|
|
},
|
|
{
|
|
"epoch": 4.127931769722815,
|
|
"grad_norm": 0.20835280187342026,
|
|
"learning_rate": 3.6330758654576227e-06,
|
|
"loss": 0.9870020151138306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530421316623688,
|
|
"step": 970,
|
|
"valid_targets_mean": 15085.6,
|
|
"valid_targets_min": 5562
|
|
},
|
|
{
|
|
"epoch": 4.132196162046908,
|
|
"grad_norm": 0.20889541806002418,
|
|
"learning_rate": 3.598984467450055e-06,
|
|
"loss": 0.9310990571975708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25087663531303406,
|
|
"step": 971,
|
|
"valid_targets_mean": 15062.1,
|
|
"valid_targets_min": 7551
|
|
},
|
|
{
|
|
"epoch": 4.136460554371002,
|
|
"grad_norm": 0.21143480891641425,
|
|
"learning_rate": 3.565037953363546e-06,
|
|
"loss": 0.9461225867271423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24634364247322083,
|
|
"step": 972,
|
|
"valid_targets_mean": 15131.4,
|
|
"valid_targets_min": 4479
|
|
},
|
|
{
|
|
"epoch": 4.140724946695096,
|
|
"grad_norm": 0.22043944971350854,
|
|
"learning_rate": 3.5312366230761154e-06,
|
|
"loss": 0.9399993419647217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2461288720369339,
|
|
"step": 973,
|
|
"valid_targets_mean": 14582.4,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 4.144989339019189,
|
|
"grad_norm": 0.20732636833791046,
|
|
"learning_rate": 3.497580775183258e-06,
|
|
"loss": 0.9510713815689087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21086552739143372,
|
|
"step": 974,
|
|
"valid_targets_mean": 13831.5,
|
|
"valid_targets_min": 3354
|
|
},
|
|
{
|
|
"epoch": 4.149253731343284,
|
|
"grad_norm": 0.21224130041621564,
|
|
"learning_rate": 3.464070706995295e-06,
|
|
"loss": 1.0145010948181152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24939191341400146,
|
|
"step": 975,
|
|
"valid_targets_mean": 15031.1,
|
|
"valid_targets_min": 2622
|
|
},
|
|
{
|
|
"epoch": 4.153518123667378,
|
|
"grad_norm": 0.22746712598850918,
|
|
"learning_rate": 3.4307067145347417e-06,
|
|
"loss": 0.9797348976135254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22808049619197845,
|
|
"step": 976,
|
|
"valid_targets_mean": 13718.6,
|
|
"valid_targets_min": 2764
|
|
},
|
|
{
|
|
"epoch": 4.157782515991471,
|
|
"grad_norm": 0.22450380102371767,
|
|
"learning_rate": 3.397489092533739e-06,
|
|
"loss": 0.9286082983016968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2307516485452652,
|
|
"step": 977,
|
|
"valid_targets_mean": 15139.5,
|
|
"valid_targets_min": 4248
|
|
},
|
|
{
|
|
"epoch": 4.162046908315565,
|
|
"grad_norm": 0.20143897482124337,
|
|
"learning_rate": 3.364418134431371e-06,
|
|
"loss": 0.9552431106567383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2490731030702591,
|
|
"step": 978,
|
|
"valid_targets_mean": 14692.8,
|
|
"valid_targets_min": 5739
|
|
},
|
|
{
|
|
"epoch": 4.166311300639659,
|
|
"grad_norm": 0.2061162824485008,
|
|
"learning_rate": 3.331494132371149e-06,
|
|
"loss": 1.010917067527771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25879770517349243,
|
|
"step": 979,
|
|
"valid_targets_mean": 14593.1,
|
|
"valid_targets_min": 6091
|
|
},
|
|
{
|
|
"epoch": 4.1705756929637525,
|
|
"grad_norm": 0.20318318761047863,
|
|
"learning_rate": 3.2987173771983816e-06,
|
|
"loss": 0.9764742851257324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2452343851327896,
|
|
"step": 980,
|
|
"valid_targets_mean": 13589.6,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 4.174840085287847,
|
|
"grad_norm": 0.2154933068221611,
|
|
"learning_rate": 3.266088158457634e-06,
|
|
"loss": 0.9941388368606567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23634712398052216,
|
|
"step": 981,
|
|
"valid_targets_mean": 15645.4,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 4.17910447761194,
|
|
"grad_norm": 0.2162986176814452,
|
|
"learning_rate": 3.233606764390147e-06,
|
|
"loss": 0.9838998913764954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2615757882595062,
|
|
"step": 982,
|
|
"valid_targets_mean": 14633.2,
|
|
"valid_targets_min": 2488
|
|
},
|
|
{
|
|
"epoch": 4.183368869936034,
|
|
"grad_norm": 0.2038562625869719,
|
|
"learning_rate": 3.2012734819313127e-06,
|
|
"loss": 0.9590896368026733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2380497008562088,
|
|
"step": 983,
|
|
"valid_targets_mean": 14881.7,
|
|
"valid_targets_min": 2565
|
|
},
|
|
{
|
|
"epoch": 4.187633262260128,
|
|
"grad_norm": 0.21410133636000414,
|
|
"learning_rate": 3.1690885967081187e-06,
|
|
"loss": 0.9502666592597961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23590557277202606,
|
|
"step": 984,
|
|
"valid_targets_mean": 14258.9,
|
|
"valid_targets_min": 4410
|
|
},
|
|
{
|
|
"epoch": 4.1918976545842215,
|
|
"grad_norm": 0.2146590549244702,
|
|
"learning_rate": 3.1370523930366393e-06,
|
|
"loss": 1.0116958618164062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2786130905151367,
|
|
"step": 985,
|
|
"valid_targets_mean": 15395.7,
|
|
"valid_targets_min": 7915
|
|
},
|
|
{
|
|
"epoch": 4.196162046908316,
|
|
"grad_norm": 0.20958353183185127,
|
|
"learning_rate": 3.105165153919525e-06,
|
|
"loss": 0.9581502676010132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24928778409957886,
|
|
"step": 986,
|
|
"valid_targets_mean": 14460.1,
|
|
"valid_targets_min": 7483
|
|
},
|
|
{
|
|
"epoch": 4.20042643923241,
|
|
"grad_norm": 0.21529262137934305,
|
|
"learning_rate": 3.073427161043492e-06,
|
|
"loss": 0.9945790767669678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24089477956295013,
|
|
"step": 987,
|
|
"valid_targets_mean": 14355.3,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 4.204690831556503,
|
|
"grad_norm": 0.21431820620633432,
|
|
"learning_rate": 3.0418386947768463e-06,
|
|
"loss": 0.9737157821655273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26264503598213196,
|
|
"step": 988,
|
|
"valid_targets_mean": 15095.2,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 4.208955223880597,
|
|
"grad_norm": 0.18791244760781117,
|
|
"learning_rate": 3.01040003416698e-06,
|
|
"loss": 0.9926658868789673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28287503123283386,
|
|
"step": 989,
|
|
"valid_targets_mean": 15240.2,
|
|
"valid_targets_min": 11135
|
|
},
|
|
{
|
|
"epoch": 4.21321961620469,
|
|
"grad_norm": 0.2593058081050793,
|
|
"learning_rate": 2.97911145693796e-06,
|
|
"loss": 0.9960601329803467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2687077522277832,
|
|
"step": 990,
|
|
"valid_targets_mean": 14847.2,
|
|
"valid_targets_min": 2936
|
|
},
|
|
{
|
|
"epoch": 4.217484008528785,
|
|
"grad_norm": 0.20105488445034136,
|
|
"learning_rate": 2.947973239488009e-06,
|
|
"loss": 0.980415940284729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25325921177864075,
|
|
"step": 991,
|
|
"valid_targets_mean": 14551.3,
|
|
"valid_targets_min": 2467
|
|
},
|
|
{
|
|
"epoch": 4.221748400852879,
|
|
"grad_norm": 0.23225220458020215,
|
|
"learning_rate": 2.91698565688711e-06,
|
|
"loss": 0.9603126049041748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556496858596802,
|
|
"step": 992,
|
|
"valid_targets_mean": 14250.3,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 4.226012793176972,
|
|
"grad_norm": 0.20562525693058248,
|
|
"learning_rate": 2.886148982874566e-06,
|
|
"loss": 0.9346132874488831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2503368854522705,
|
|
"step": 993,
|
|
"valid_targets_mean": 14932.4,
|
|
"valid_targets_min": 5263
|
|
},
|
|
{
|
|
"epoch": 4.230277185501066,
|
|
"grad_norm": 0.1987821114316106,
|
|
"learning_rate": 2.8554634898565668e-06,
|
|
"loss": 0.9752383828163147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23577341437339783,
|
|
"step": 994,
|
|
"valid_targets_mean": 14343.8,
|
|
"valid_targets_min": 2517
|
|
},
|
|
{
|
|
"epoch": 4.23454157782516,
|
|
"grad_norm": 0.21466060222153166,
|
|
"learning_rate": 2.824929448903806e-06,
|
|
"loss": 0.9946862459182739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22818049788475037,
|
|
"step": 995,
|
|
"valid_targets_mean": 15112.9,
|
|
"valid_targets_min": 4219
|
|
},
|
|
{
|
|
"epoch": 4.2388059701492535,
|
|
"grad_norm": 0.2052986670074733,
|
|
"learning_rate": 2.794547129749059e-06,
|
|
"loss": 0.9419824481010437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23334798216819763,
|
|
"step": 996,
|
|
"valid_targets_mean": 15241.3,
|
|
"valid_targets_min": 4205
|
|
},
|
|
{
|
|
"epoch": 4.243070362473348,
|
|
"grad_norm": 0.210589774584146,
|
|
"learning_rate": 2.7643168007848255e-06,
|
|
"loss": 0.9856055378913879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23363542556762695,
|
|
"step": 997,
|
|
"valid_targets_mean": 13191.8,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 4.247334754797441,
|
|
"grad_norm": 0.20159845803103243,
|
|
"learning_rate": 2.734238729060956e-06,
|
|
"loss": 0.9388062357902527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23155111074447632,
|
|
"step": 998,
|
|
"valid_targets_mean": 14693.8,
|
|
"valid_targets_min": 6332
|
|
},
|
|
{
|
|
"epoch": 4.251599147121535,
|
|
"grad_norm": 0.19841540725401038,
|
|
"learning_rate": 2.7043131802822653e-06,
|
|
"loss": 0.9250397682189941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23295876383781433,
|
|
"step": 999,
|
|
"valid_targets_mean": 14107.6,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 4.255863539445629,
|
|
"grad_norm": 0.21472035814166593,
|
|
"learning_rate": 2.674540418806222e-06,
|
|
"loss": 0.9822738766670227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22888775169849396,
|
|
"step": 1000,
|
|
"valid_targets_mean": 13355.3,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 4.2601279317697225,
|
|
"grad_norm": 0.21293974933015425,
|
|
"learning_rate": 2.6449207076405857e-06,
|
|
"loss": 0.990465521812439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25961148738861084,
|
|
"step": 1001,
|
|
"valid_targets_mean": 13930.4,
|
|
"valid_targets_min": 3569
|
|
},
|
|
{
|
|
"epoch": 4.264392324093817,
|
|
"grad_norm": 0.19974668361238312,
|
|
"learning_rate": 2.6154543084411035e-06,
|
|
"loss": 0.9995056390762329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2363622784614563,
|
|
"step": 1002,
|
|
"valid_targets_mean": 13908.2,
|
|
"valid_targets_min": 3123
|
|
},
|
|
{
|
|
"epoch": 4.268656716417911,
|
|
"grad_norm": 0.1980194004346049,
|
|
"learning_rate": 2.5861414815091834e-06,
|
|
"loss": 0.9617325067520142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23854133486747742,
|
|
"step": 1003,
|
|
"valid_targets_mean": 14161.7,
|
|
"valid_targets_min": 3380
|
|
},
|
|
{
|
|
"epoch": 4.272921108742004,
|
|
"grad_norm": 0.2021275426898355,
|
|
"learning_rate": 2.5569824857895987e-06,
|
|
"loss": 0.9621407389640808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2301676869392395,
|
|
"step": 1004,
|
|
"valid_targets_mean": 15462.8,
|
|
"valid_targets_min": 8361
|
|
},
|
|
{
|
|
"epoch": 4.277185501066098,
|
|
"grad_norm": 0.20373434870626908,
|
|
"learning_rate": 2.5279775788682083e-06,
|
|
"loss": 0.970307469367981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22979024052619934,
|
|
"step": 1005,
|
|
"valid_targets_mean": 13786.1,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 4.281449893390192,
|
|
"grad_norm": 0.19044660559305157,
|
|
"learning_rate": 2.499127016969671e-06,
|
|
"loss": 0.997826099395752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27288347482681274,
|
|
"step": 1006,
|
|
"valid_targets_mean": 15124.5,
|
|
"valid_targets_min": 7044
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.19287068236436444,
|
|
"learning_rate": 2.4704310549551934e-06,
|
|
"loss": 0.9802889823913574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2448410540819168,
|
|
"step": 1007,
|
|
"valid_targets_mean": 14889.1,
|
|
"valid_targets_min": 2810
|
|
},
|
|
{
|
|
"epoch": 4.28997867803838,
|
|
"grad_norm": 0.18930024617420768,
|
|
"learning_rate": 2.441889946320266e-06,
|
|
"loss": 0.9607802629470825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22162513434886932,
|
|
"step": 1008,
|
|
"valid_targets_mean": 14288.4,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 4.294243070362473,
|
|
"grad_norm": 0.2018906410964189,
|
|
"learning_rate": 2.4135039431924233e-06,
|
|
"loss": 1.0122121572494507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23975487053394318,
|
|
"step": 1009,
|
|
"valid_targets_mean": 14440.8,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 4.298507462686567,
|
|
"grad_norm": 0.19722736024974016,
|
|
"learning_rate": 2.3852732963290426e-06,
|
|
"loss": 0.9613465070724487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24492129683494568,
|
|
"step": 1010,
|
|
"valid_targets_mean": 15772.8,
|
|
"valid_targets_min": 11202
|
|
},
|
|
{
|
|
"epoch": 4.302771855010661,
|
|
"grad_norm": 0.21498948755335925,
|
|
"learning_rate": 2.3571982551150853e-06,
|
|
"loss": 0.9710594415664673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2269625961780548,
|
|
"step": 1011,
|
|
"valid_targets_mean": 14957.4,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 4.3070362473347545,
|
|
"grad_norm": 0.22412323186375557,
|
|
"learning_rate": 2.329279067560937e-06,
|
|
"loss": 0.9520131349563599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23197180032730103,
|
|
"step": 1012,
|
|
"valid_targets_mean": 15725.0,
|
|
"valid_targets_min": 10802
|
|
},
|
|
{
|
|
"epoch": 4.311300639658849,
|
|
"grad_norm": 0.22537801821682382,
|
|
"learning_rate": 2.301515980300182e-06,
|
|
"loss": 1.0108246803283691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24902258813381195,
|
|
"step": 1013,
|
|
"valid_targets_mean": 14878.5,
|
|
"valid_targets_min": 4121
|
|
},
|
|
{
|
|
"epoch": 4.315565031982943,
|
|
"grad_norm": 0.2001491403327739,
|
|
"learning_rate": 2.2739092385874527e-06,
|
|
"loss": 0.9675329923629761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24441777169704437,
|
|
"step": 1014,
|
|
"valid_targets_mean": 15091.1,
|
|
"valid_targets_min": 9708
|
|
},
|
|
{
|
|
"epoch": 4.319829424307036,
|
|
"grad_norm": 0.20908604613563797,
|
|
"learning_rate": 2.2464590862962443e-06,
|
|
"loss": 1.0072133541107178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25043320655822754,
|
|
"step": 1015,
|
|
"valid_targets_mean": 15506.5,
|
|
"valid_targets_min": 11439
|
|
},
|
|
{
|
|
"epoch": 4.32409381663113,
|
|
"grad_norm": 0.22253420025421772,
|
|
"learning_rate": 2.219165765916769e-06,
|
|
"loss": 0.9655839800834656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24847547709941864,
|
|
"step": 1016,
|
|
"valid_targets_mean": 13985.8,
|
|
"valid_targets_min": 5122
|
|
},
|
|
{
|
|
"epoch": 4.3283582089552235,
|
|
"grad_norm": 0.20529498680933142,
|
|
"learning_rate": 2.192029518553798e-06,
|
|
"loss": 0.9373665452003479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24634675681591034,
|
|
"step": 1017,
|
|
"valid_targets_mean": 14612.4,
|
|
"valid_targets_min": 2367
|
|
},
|
|
{
|
|
"epoch": 4.332622601279318,
|
|
"grad_norm": 0.19619931112758604,
|
|
"learning_rate": 2.165050583924566e-06,
|
|
"loss": 0.9549002647399902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24367493391036987,
|
|
"step": 1018,
|
|
"valid_targets_mean": 14470.8,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 4.336886993603412,
|
|
"grad_norm": 0.1994064580671731,
|
|
"learning_rate": 2.1382292003566163e-06,
|
|
"loss": 0.981368362903595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24733182787895203,
|
|
"step": 1019,
|
|
"valid_targets_mean": 14428.2,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 4.341151385927505,
|
|
"grad_norm": 0.21250530539061135,
|
|
"learning_rate": 2.1115656047857213e-06,
|
|
"loss": 0.9029428362846375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22050178050994873,
|
|
"step": 1020,
|
|
"valid_targets_mean": 14133.2,
|
|
"valid_targets_min": 6151
|
|
},
|
|
{
|
|
"epoch": 4.345415778251599,
|
|
"grad_norm": 0.20364733623700895,
|
|
"learning_rate": 2.0850600327537806e-06,
|
|
"loss": 0.9489177465438843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23130053281784058,
|
|
"step": 1021,
|
|
"valid_targets_mean": 13676.0,
|
|
"valid_targets_min": 1755
|
|
},
|
|
{
|
|
"epoch": 4.349680170575693,
|
|
"grad_norm": 0.20793489657627956,
|
|
"learning_rate": 2.058712718406719e-06,
|
|
"loss": 1.0012977123260498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24294458329677582,
|
|
"step": 1022,
|
|
"valid_targets_mean": 14311.4,
|
|
"valid_targets_min": 3247
|
|
},
|
|
{
|
|
"epoch": 4.353944562899787,
|
|
"grad_norm": 0.1996083012867493,
|
|
"learning_rate": 2.032523894492471e-06,
|
|
"loss": 1.002089262008667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545391321182251,
|
|
"step": 1023,
|
|
"valid_targets_mean": 15110.4,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 4.358208955223881,
|
|
"grad_norm": 0.20524861939094846,
|
|
"learning_rate": 2.0064937923588634e-06,
|
|
"loss": 1.0034849643707275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25179699063301086,
|
|
"step": 1024,
|
|
"valid_targets_mean": 14426.9,
|
|
"valid_targets_min": 5173
|
|
},
|
|
{
|
|
"epoch": 4.362473347547974,
|
|
"grad_norm": 0.20530960633526557,
|
|
"learning_rate": 1.9806226419516195e-06,
|
|
"loss": 0.9880184531211853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.247049480676651,
|
|
"step": 1025,
|
|
"valid_targets_mean": 14459.1,
|
|
"valid_targets_min": 4839
|
|
},
|
|
{
|
|
"epoch": 4.366737739872068,
|
|
"grad_norm": 0.20369935863209962,
|
|
"learning_rate": 1.954910671812298e-06,
|
|
"loss": 0.9592328667640686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22283047437667847,
|
|
"step": 1026,
|
|
"valid_targets_mean": 14732.3,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 4.371002132196162,
|
|
"grad_norm": 0.203705084175426,
|
|
"learning_rate": 1.9293581090762894e-06,
|
|
"loss": 0.9722834229469299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688411474227905,
|
|
"step": 1027,
|
|
"valid_targets_mean": 14489.7,
|
|
"valid_targets_min": 6514
|
|
},
|
|
{
|
|
"epoch": 4.3752665245202556,
|
|
"grad_norm": 0.19553934077825036,
|
|
"learning_rate": 1.9039651794708058e-06,
|
|
"loss": 0.939979612827301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2241957187652588,
|
|
"step": 1028,
|
|
"valid_targets_mean": 14407.0,
|
|
"valid_targets_min": 4268
|
|
},
|
|
{
|
|
"epoch": 4.37953091684435,
|
|
"grad_norm": 0.19895295420118192,
|
|
"learning_rate": 1.8787321073128817e-06,
|
|
"loss": 0.9941613674163818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707201838493347,
|
|
"step": 1029,
|
|
"valid_targets_mean": 15288.0,
|
|
"valid_targets_min": 11404
|
|
},
|
|
{
|
|
"epoch": 4.383795309168444,
|
|
"grad_norm": 0.20924745812634882,
|
|
"learning_rate": 1.8536591155073958e-06,
|
|
"loss": 1.0341296195983887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24675840139389038,
|
|
"step": 1030,
|
|
"valid_targets_mean": 14246.6,
|
|
"valid_targets_min": 2476
|
|
},
|
|
{
|
|
"epoch": 4.388059701492537,
|
|
"grad_norm": 0.19061264074416104,
|
|
"learning_rate": 1.8287464255451181e-06,
|
|
"loss": 0.9730916023254395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2314380705356598,
|
|
"step": 1031,
|
|
"valid_targets_mean": 14085.7,
|
|
"valid_targets_min": 4573
|
|
},
|
|
{
|
|
"epoch": 4.392324093816631,
|
|
"grad_norm": 0.20397601553099576,
|
|
"learning_rate": 1.803994257500714e-06,
|
|
"loss": 0.9657227396965027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24006201326847076,
|
|
"step": 1032,
|
|
"valid_targets_mean": 14661.8,
|
|
"valid_targets_min": 2977
|
|
},
|
|
{
|
|
"epoch": 4.396588486140725,
|
|
"grad_norm": 0.20865705411493327,
|
|
"learning_rate": 1.7794028300308474e-06,
|
|
"loss": 0.986137866973877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520632743835449,
|
|
"step": 1033,
|
|
"valid_targets_mean": 14641.4,
|
|
"valid_targets_min": 1570
|
|
},
|
|
{
|
|
"epoch": 4.400852878464819,
|
|
"grad_norm": 0.20284781020744608,
|
|
"learning_rate": 1.7549723603722003e-06,
|
|
"loss": 0.9705761671066284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24235820770263672,
|
|
"step": 1034,
|
|
"valid_targets_mean": 15059.8,
|
|
"valid_targets_min": 4959
|
|
},
|
|
{
|
|
"epoch": 4.405117270788913,
|
|
"grad_norm": 0.19892338160497552,
|
|
"learning_rate": 1.730703064339605e-06,
|
|
"loss": 0.9851474165916443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2524890601634979,
|
|
"step": 1035,
|
|
"valid_targets_mean": 15045.9,
|
|
"valid_targets_min": 6417
|
|
},
|
|
{
|
|
"epoch": 4.409381663113006,
|
|
"grad_norm": 0.20375038794812805,
|
|
"learning_rate": 1.7065951563241022e-06,
|
|
"loss": 0.9457334280014038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22716794908046722,
|
|
"step": 1036,
|
|
"valid_targets_mean": 14376.1,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 4.4136460554371,
|
|
"grad_norm": 0.20449893216561013,
|
|
"learning_rate": 1.682648849291051e-06,
|
|
"loss": 1.0145095586776733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23718661069869995,
|
|
"step": 1037,
|
|
"valid_targets_mean": 14745.7,
|
|
"valid_targets_min": 4989
|
|
},
|
|
{
|
|
"epoch": 4.417910447761194,
|
|
"grad_norm": 0.1996462200092275,
|
|
"learning_rate": 1.6588643547782579e-06,
|
|
"loss": 1.0064222812652588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24623239040374756,
|
|
"step": 1038,
|
|
"valid_targets_mean": 14692.8,
|
|
"valid_targets_min": 6686
|
|
},
|
|
{
|
|
"epoch": 4.422174840085288,
|
|
"grad_norm": 0.21389140632699094,
|
|
"learning_rate": 1.6352418828941052e-06,
|
|
"loss": 0.9757487773895264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23529288172721863,
|
|
"step": 1039,
|
|
"valid_targets_mean": 14086.5,
|
|
"valid_targets_min": 5272
|
|
},
|
|
{
|
|
"epoch": 4.426439232409382,
|
|
"grad_norm": 0.20425936795686,
|
|
"learning_rate": 1.6117816423156952e-06,
|
|
"loss": 0.9777926206588745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25531190633773804,
|
|
"step": 1040,
|
|
"valid_targets_mean": 14957.0,
|
|
"valid_targets_min": 8445
|
|
},
|
|
{
|
|
"epoch": 4.430703624733475,
|
|
"grad_norm": 0.20492237636884952,
|
|
"learning_rate": 1.5884838402870029e-06,
|
|
"loss": 0.9735987186431885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24197989702224731,
|
|
"step": 1041,
|
|
"valid_targets_mean": 14819.6,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 4.434968017057569,
|
|
"grad_norm": 0.19880291032050762,
|
|
"learning_rate": 1.5653486826170384e-06,
|
|
"loss": 0.9829146265983582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2480197250843048,
|
|
"step": 1042,
|
|
"valid_targets_mean": 14243.4,
|
|
"valid_targets_min": 2334
|
|
},
|
|
{
|
|
"epoch": 4.439232409381663,
|
|
"grad_norm": 0.19315708773563553,
|
|
"learning_rate": 1.5423763736780583e-06,
|
|
"loss": 0.9392817616462708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22090306878089905,
|
|
"step": 1043,
|
|
"valid_targets_mean": 13959.9,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 4.443496801705757,
|
|
"grad_norm": 0.19358944058085437,
|
|
"learning_rate": 1.5195671164037173e-06,
|
|
"loss": 0.9625712633132935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25485068559646606,
|
|
"step": 1044,
|
|
"valid_targets_mean": 14064.9,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 4.447761194029851,
|
|
"grad_norm": 0.19348030520223267,
|
|
"learning_rate": 1.496921112287315e-06,
|
|
"loss": 0.9723199605941772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2429625689983368,
|
|
"step": 1045,
|
|
"valid_targets_mean": 15046.3,
|
|
"valid_targets_min": 6694
|
|
},
|
|
{
|
|
"epoch": 4.452025586353945,
|
|
"grad_norm": 0.2117723483120604,
|
|
"learning_rate": 1.4744385613799894e-06,
|
|
"loss": 0.9834375381469727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23440350592136383,
|
|
"step": 1046,
|
|
"valid_targets_mean": 14326.0,
|
|
"valid_targets_min": 6455
|
|
},
|
|
{
|
|
"epoch": 4.456289978678038,
|
|
"grad_norm": 0.21773765328036296,
|
|
"learning_rate": 1.4521196622889644e-06,
|
|
"loss": 0.9381828308105469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22930076718330383,
|
|
"step": 1047,
|
|
"valid_targets_mean": 13610.3,
|
|
"valid_targets_min": 2201
|
|
},
|
|
{
|
|
"epoch": 4.460554371002132,
|
|
"grad_norm": 0.1991311576162202,
|
|
"learning_rate": 1.4299646121757892e-06,
|
|
"loss": 0.9944796562194824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2413451373577118,
|
|
"step": 1048,
|
|
"valid_targets_mean": 15320.3,
|
|
"valid_targets_min": 6751
|
|
},
|
|
{
|
|
"epoch": 4.464818763326226,
|
|
"grad_norm": 0.18407849986177524,
|
|
"learning_rate": 1.4079736067545912e-06,
|
|
"loss": 0.9564270973205566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24462206661701202,
|
|
"step": 1049,
|
|
"valid_targets_mean": 15228.7,
|
|
"valid_targets_min": 6663
|
|
},
|
|
{
|
|
"epoch": 4.46908315565032,
|
|
"grad_norm": 0.193159491048493,
|
|
"learning_rate": 1.3861468402903634e-06,
|
|
"loss": 0.9560383558273315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24201983213424683,
|
|
"step": 1050,
|
|
"valid_targets_mean": 14435.8,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 4.473347547974414,
|
|
"grad_norm": 0.19354462424796337,
|
|
"learning_rate": 1.3644845055972322e-06,
|
|
"loss": 0.9236656427383423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21291989088058472,
|
|
"step": 1051,
|
|
"valid_targets_mean": 14298.3,
|
|
"valid_targets_min": 5524
|
|
},
|
|
{
|
|
"epoch": 4.477611940298507,
|
|
"grad_norm": 0.19452739699997101,
|
|
"learning_rate": 1.3429867940367626e-06,
|
|
"loss": 0.9672813415527344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27295464277267456,
|
|
"step": 1052,
|
|
"valid_targets_mean": 15544.0,
|
|
"valid_targets_min": 8747
|
|
},
|
|
{
|
|
"epoch": 4.481876332622601,
|
|
"grad_norm": 0.18738754449816722,
|
|
"learning_rate": 1.321653895516264e-06,
|
|
"loss": 0.9609742760658264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.226200133562088,
|
|
"step": 1053,
|
|
"valid_targets_mean": 14384.1,
|
|
"valid_targets_min": 2210
|
|
},
|
|
{
|
|
"epoch": 4.486140724946695,
|
|
"grad_norm": 0.1887567323720837,
|
|
"learning_rate": 1.3004859984871199e-06,
|
|
"loss": 0.9949420094490051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23817016184329987,
|
|
"step": 1054,
|
|
"valid_targets_mean": 14810.5,
|
|
"valid_targets_min": 3268
|
|
},
|
|
{
|
|
"epoch": 4.490405117270789,
|
|
"grad_norm": 0.2070617872395055,
|
|
"learning_rate": 1.279483289943102e-06,
|
|
"loss": 0.9484139084815979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2263062745332718,
|
|
"step": 1055,
|
|
"valid_targets_mean": 14849.5,
|
|
"valid_targets_min": 5249
|
|
},
|
|
{
|
|
"epoch": 4.494669509594883,
|
|
"grad_norm": 0.19987485529085944,
|
|
"learning_rate": 1.2586459554187558e-06,
|
|
"loss": 0.9571871757507324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22258028388023376,
|
|
"step": 1056,
|
|
"valid_targets_mean": 13745.0,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 4.498933901918977,
|
|
"grad_norm": 0.19674734475989858,
|
|
"learning_rate": 1.2379741789877175e-06,
|
|
"loss": 0.9423996210098267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22760263085365295,
|
|
"step": 1057,
|
|
"valid_targets_mean": 13998.2,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 4.50319829424307,
|
|
"grad_norm": 0.1952951967366402,
|
|
"learning_rate": 1.2174681432611245e-06,
|
|
"loss": 0.9592997431755066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2522493004798889,
|
|
"step": 1058,
|
|
"valid_targets_mean": 15055.7,
|
|
"valid_targets_min": 5225
|
|
},
|
|
{
|
|
"epoch": 4.507462686567164,
|
|
"grad_norm": 0.199452907778698,
|
|
"learning_rate": 1.1971280293859811e-06,
|
|
"loss": 0.9340470433235168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26179543137550354,
|
|
"step": 1059,
|
|
"valid_targets_mean": 15610.4,
|
|
"valid_targets_min": 8812
|
|
},
|
|
{
|
|
"epoch": 4.5117270788912585,
|
|
"grad_norm": 0.1986750426004486,
|
|
"learning_rate": 1.17695401704357e-06,
|
|
"loss": 0.9508144855499268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2387581467628479,
|
|
"step": 1060,
|
|
"valid_targets_mean": 14518.6,
|
|
"valid_targets_min": 4628
|
|
},
|
|
{
|
|
"epoch": 4.515991471215352,
|
|
"grad_norm": 0.1962840328492531,
|
|
"learning_rate": 1.1569462844478552e-06,
|
|
"loss": 0.9892048239707947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530350983142853,
|
|
"step": 1061,
|
|
"valid_targets_mean": 14293.0,
|
|
"valid_targets_min": 7136
|
|
},
|
|
{
|
|
"epoch": 4.520255863539446,
|
|
"grad_norm": 0.20247449993400812,
|
|
"learning_rate": 1.1371050083439107e-06,
|
|
"loss": 0.9638339281082153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25034797191619873,
|
|
"step": 1062,
|
|
"valid_targets_mean": 14417.9,
|
|
"valid_targets_min": 3406
|
|
},
|
|
{
|
|
"epoch": 4.524520255863539,
|
|
"grad_norm": 0.21083926014951604,
|
|
"learning_rate": 1.1174303640063622e-06,
|
|
"loss": 0.9721899628639221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25117212533950806,
|
|
"step": 1063,
|
|
"valid_targets_mean": 14916.6,
|
|
"valid_targets_min": 8177
|
|
},
|
|
{
|
|
"epoch": 4.528784648187633,
|
|
"grad_norm": 0.20357023967062668,
|
|
"learning_rate": 1.097922525237849e-06,
|
|
"loss": 0.9916815757751465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24109315872192383,
|
|
"step": 1064,
|
|
"valid_targets_mean": 15386.4,
|
|
"valid_targets_min": 6365
|
|
},
|
|
{
|
|
"epoch": 4.533049040511727,
|
|
"grad_norm": 0.1927643028792073,
|
|
"learning_rate": 1.078581664367455e-06,
|
|
"loss": 0.9920657873153687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630733549594879,
|
|
"step": 1065,
|
|
"valid_targets_mean": 14550.4,
|
|
"valid_targets_min": 4874
|
|
},
|
|
{
|
|
"epoch": 4.537313432835821,
|
|
"grad_norm": 0.18683045438312407,
|
|
"learning_rate": 1.0594079522492274e-06,
|
|
"loss": 0.9641156196594238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2433815896511078,
|
|
"step": 1066,
|
|
"valid_targets_mean": 14769.9,
|
|
"valid_targets_min": 7369
|
|
},
|
|
{
|
|
"epoch": 4.541577825159915,
|
|
"grad_norm": 0.1821657437058267,
|
|
"learning_rate": 1.040401558260633e-06,
|
|
"loss": 0.932820737361908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2297649085521698,
|
|
"step": 1067,
|
|
"valid_targets_mean": 15161.9,
|
|
"valid_targets_min": 4365
|
|
},
|
|
{
|
|
"epoch": 4.545842217484008,
|
|
"grad_norm": 0.1963807749093568,
|
|
"learning_rate": 1.0215626503010911e-06,
|
|
"loss": 0.9635487794876099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24949057400226593,
|
|
"step": 1068,
|
|
"valid_targets_mean": 15064.1,
|
|
"valid_targets_min": 5965
|
|
},
|
|
{
|
|
"epoch": 4.550106609808102,
|
|
"grad_norm": 0.1870221177247441,
|
|
"learning_rate": 1.002891394790475e-06,
|
|
"loss": 0.96346116065979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2602008581161499,
|
|
"step": 1069,
|
|
"valid_targets_mean": 15075.1,
|
|
"valid_targets_min": 10306
|
|
},
|
|
{
|
|
"epoch": 4.554371002132196,
|
|
"grad_norm": 0.1822849417529856,
|
|
"learning_rate": 9.843879566676273e-07,
|
|
"loss": 0.9370256662368774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21624112129211426,
|
|
"step": 1070,
|
|
"valid_targets_mean": 14604.8,
|
|
"valid_targets_min": 7174
|
|
},
|
|
{
|
|
"epoch": 4.55863539445629,
|
|
"grad_norm": 0.20643035886676678,
|
|
"learning_rate": 9.660524993889386e-07,
|
|
"loss": 0.9779180884361267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24937522411346436,
|
|
"step": 1071,
|
|
"valid_targets_mean": 14791.7,
|
|
"valid_targets_min": 2852
|
|
},
|
|
{
|
|
"epoch": 4.562899786780384,
|
|
"grad_norm": 0.19109701365353887,
|
|
"learning_rate": 9.478851849268733e-07,
|
|
"loss": 1.0181219577789307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2429862916469574,
|
|
"step": 1072,
|
|
"valid_targets_mean": 15323.3,
|
|
"valid_targets_min": 4432
|
|
},
|
|
{
|
|
"epoch": 4.567164179104478,
|
|
"grad_norm": 0.18118081679449022,
|
|
"learning_rate": 9.298861737685527e-07,
|
|
"loss": 0.9861464500427246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25720667839050293,
|
|
"step": 1073,
|
|
"valid_targets_mean": 15189.2,
|
|
"valid_targets_min": 4444
|
|
},
|
|
{
|
|
"epoch": 4.571428571428571,
|
|
"grad_norm": 0.18212939384405893,
|
|
"learning_rate": 9.120556249143341e-07,
|
|
"loss": 0.9695151448249817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25585541129112244,
|
|
"step": 1074,
|
|
"valid_targets_mean": 15192.9,
|
|
"valid_targets_min": 6463
|
|
},
|
|
{
|
|
"epoch": 4.575692963752665,
|
|
"grad_norm": 0.19423545840605633,
|
|
"learning_rate": 8.943936958763988e-07,
|
|
"loss": 0.9865801334381104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24307169020175934,
|
|
"step": 1075,
|
|
"valid_targets_mean": 14832.0,
|
|
"valid_targets_min": 6920
|
|
},
|
|
{
|
|
"epoch": 4.5799573560767595,
|
|
"grad_norm": 0.19147142446889295,
|
|
"learning_rate": 8.769005426773836e-07,
|
|
"loss": 0.963005781173706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25235921144485474,
|
|
"step": 1076,
|
|
"valid_targets_mean": 14330.1,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 4.584221748400853,
|
|
"grad_norm": 0.19455004514016275,
|
|
"learning_rate": 8.595763198489714e-07,
|
|
"loss": 0.9741256237030029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26922300457954407,
|
|
"step": 1077,
|
|
"valid_targets_mean": 15214.1,
|
|
"valid_targets_min": 2728
|
|
},
|
|
{
|
|
"epoch": 4.588486140724947,
|
|
"grad_norm": 0.19368908881746322,
|
|
"learning_rate": 8.42421180430546e-07,
|
|
"loss": 0.9532241821289062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24407343566417694,
|
|
"step": 1078,
|
|
"valid_targets_mean": 13688.9,
|
|
"valid_targets_min": 3217
|
|
},
|
|
{
|
|
"epoch": 4.59275053304904,
|
|
"grad_norm": 0.19496360297809873,
|
|
"learning_rate": 8.254352759678386e-07,
|
|
"loss": 0.9714552760124207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24510961771011353,
|
|
"step": 1079,
|
|
"valid_targets_mean": 14655.3,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 4.597014925373134,
|
|
"grad_norm": 0.1908774658271576,
|
|
"learning_rate": 8.086187565115877e-07,
|
|
"loss": 0.984738826751709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25681084394454956,
|
|
"step": 1080,
|
|
"valid_targets_mean": 14677.1,
|
|
"valid_targets_min": 6142
|
|
},
|
|
{
|
|
"epoch": 4.601279317697228,
|
|
"grad_norm": 0.1877237083834181,
|
|
"learning_rate": 7.919717706162067e-07,
|
|
"loss": 0.9372379779815674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2245333045721054,
|
|
"step": 1081,
|
|
"valid_targets_mean": 15138.4,
|
|
"valid_targets_min": 4384
|
|
},
|
|
{
|
|
"epoch": 4.605543710021322,
|
|
"grad_norm": 0.1831507254650395,
|
|
"learning_rate": 7.754944653384777e-07,
|
|
"loss": 0.9521864652633667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23007790744304657,
|
|
"step": 1082,
|
|
"valid_targets_mean": 14424.0,
|
|
"valid_targets_min": 3680
|
|
},
|
|
{
|
|
"epoch": 4.609808102345416,
|
|
"grad_norm": 0.191111772789293,
|
|
"learning_rate": 7.591869862362534e-07,
|
|
"loss": 0.9851489067077637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24849659204483032,
|
|
"step": 1083,
|
|
"valid_targets_mean": 13968.2,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 4.61407249466951,
|
|
"grad_norm": 0.1864331560292401,
|
|
"learning_rate": 7.430494773671682e-07,
|
|
"loss": 0.9887434840202332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22047407925128937,
|
|
"step": 1084,
|
|
"valid_targets_mean": 13599.2,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 4.618336886993603,
|
|
"grad_norm": 0.1817411655396072,
|
|
"learning_rate": 7.270820812873714e-07,
|
|
"loss": 0.9169268608093262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22810609638690948,
|
|
"step": 1085,
|
|
"valid_targets_mean": 14565.4,
|
|
"valid_targets_min": 3230
|
|
},
|
|
{
|
|
"epoch": 4.622601279317697,
|
|
"grad_norm": 0.1992395987360907,
|
|
"learning_rate": 7.112849390502563e-07,
|
|
"loss": 0.9655635356903076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23568344116210938,
|
|
"step": 1086,
|
|
"valid_targets_mean": 14391.3,
|
|
"valid_targets_min": 5144
|
|
},
|
|
{
|
|
"epoch": 4.6268656716417915,
|
|
"grad_norm": 0.18944238972818978,
|
|
"learning_rate": 6.956581902052306e-07,
|
|
"loss": 0.9082788228988647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502131760120392,
|
|
"step": 1087,
|
|
"valid_targets_mean": 14731.2,
|
|
"valid_targets_min": 4783
|
|
},
|
|
{
|
|
"epoch": 4.631130063965885,
|
|
"grad_norm": 0.19055715439203152,
|
|
"learning_rate": 6.802019727964593e-07,
|
|
"loss": 0.9884518384933472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24835459887981415,
|
|
"step": 1088,
|
|
"valid_targets_mean": 15128.8,
|
|
"valid_targets_min": 7616
|
|
},
|
|
{
|
|
"epoch": 4.635394456289979,
|
|
"grad_norm": 0.1823937865521721,
|
|
"learning_rate": 6.64916423361679e-07,
|
|
"loss": 0.9693202376365662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26051390171051025,
|
|
"step": 1089,
|
|
"valid_targets_mean": 15145.5,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 4.639658848614072,
|
|
"grad_norm": 0.19675032678958396,
|
|
"learning_rate": 6.498016769309567e-07,
|
|
"loss": 0.9987223148345947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25262898206710815,
|
|
"step": 1090,
|
|
"valid_targets_mean": 14835.9,
|
|
"valid_targets_min": 6481
|
|
},
|
|
{
|
|
"epoch": 4.643923240938166,
|
|
"grad_norm": 0.18832584384253528,
|
|
"learning_rate": 6.348578670255224e-07,
|
|
"loss": 0.9596326351165771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21527081727981567,
|
|
"step": 1091,
|
|
"valid_targets_mean": 14126.3,
|
|
"valid_targets_min": 2377
|
|
},
|
|
{
|
|
"epoch": 4.6481876332622605,
|
|
"grad_norm": 0.18664634386085083,
|
|
"learning_rate": 6.200851256565799e-07,
|
|
"loss": 0.9876736402511597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23937787115573883,
|
|
"step": 1092,
|
|
"valid_targets_mean": 15427.7,
|
|
"valid_targets_min": 5751
|
|
},
|
|
{
|
|
"epoch": 4.652452025586354,
|
|
"grad_norm": 0.19428265256980123,
|
|
"learning_rate": 6.054835833241357e-07,
|
|
"loss": 1.0274147987365723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26426398754119873,
|
|
"step": 1093,
|
|
"valid_targets_mean": 15013.9,
|
|
"valid_targets_min": 4037
|
|
},
|
|
{
|
|
"epoch": 4.656716417910448,
|
|
"grad_norm": 0.20788066764770038,
|
|
"learning_rate": 5.910533690158593e-07,
|
|
"loss": 0.9401581287384033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23334196209907532,
|
|
"step": 1094,
|
|
"valid_targets_mean": 14892.3,
|
|
"valid_targets_min": 3271
|
|
},
|
|
{
|
|
"epoch": 4.660980810234541,
|
|
"grad_norm": 0.19186387056237522,
|
|
"learning_rate": 5.767946102059307e-07,
|
|
"loss": 0.9360172748565674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23446571826934814,
|
|
"step": 1095,
|
|
"valid_targets_mean": 14785.7,
|
|
"valid_targets_min": 8805
|
|
},
|
|
{
|
|
"epoch": 4.665245202558635,
|
|
"grad_norm": 0.1893500666241742,
|
|
"learning_rate": 5.627074328539173e-07,
|
|
"loss": 0.9583896398544312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23185381293296814,
|
|
"step": 1096,
|
|
"valid_targets_mean": 14196.5,
|
|
"valid_targets_min": 2432
|
|
},
|
|
{
|
|
"epoch": 4.669509594882729,
|
|
"grad_norm": 0.18542846343612387,
|
|
"learning_rate": 5.487919614036741e-07,
|
|
"loss": 0.9889509081840515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24657568335533142,
|
|
"step": 1097,
|
|
"valid_targets_mean": 14630.5,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 4.673773987206823,
|
|
"grad_norm": 0.18057976698980382,
|
|
"learning_rate": 5.350483187822231e-07,
|
|
"loss": 0.9848129749298096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22202110290527344,
|
|
"step": 1098,
|
|
"valid_targets_mean": 14386.3,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 4.678038379530917,
|
|
"grad_norm": 0.17782009267891613,
|
|
"learning_rate": 5.214766263986848e-07,
|
|
"loss": 0.9672038555145264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23220334947109222,
|
|
"step": 1099,
|
|
"valid_targets_mean": 14210.4,
|
|
"valid_targets_min": 4534
|
|
},
|
|
{
|
|
"epoch": 4.682302771855011,
|
|
"grad_norm": 0.1926245261721798,
|
|
"learning_rate": 5.080770041431926e-07,
|
|
"loss": 0.9739687442779541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25985684990882874,
|
|
"step": 1100,
|
|
"valid_targets_mean": 14957.4,
|
|
"valid_targets_min": 7315
|
|
},
|
|
{
|
|
"epoch": 4.686567164179104,
|
|
"grad_norm": 0.18359492740224656,
|
|
"learning_rate": 4.948495703858492e-07,
|
|
"loss": 1.0037221908569336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2474515289068222,
|
|
"step": 1101,
|
|
"valid_targets_mean": 15043.2,
|
|
"valid_targets_min": 7809
|
|
},
|
|
{
|
|
"epoch": 4.690831556503198,
|
|
"grad_norm": 0.1883747032991499,
|
|
"learning_rate": 4.81794441975667e-07,
|
|
"loss": 0.9757822751998901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2467784881591797,
|
|
"step": 1102,
|
|
"valid_targets_mean": 14021.8,
|
|
"valid_targets_min": 4164
|
|
},
|
|
{
|
|
"epoch": 4.6950959488272925,
|
|
"grad_norm": 0.19867445113700383,
|
|
"learning_rate": 4.689117342395388e-07,
|
|
"loss": 0.9495012164115906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2412489503622055,
|
|
"step": 1103,
|
|
"valid_targets_mean": 14995.4,
|
|
"valid_targets_min": 7534
|
|
},
|
|
{
|
|
"epoch": 4.699360341151386,
|
|
"grad_norm": 0.20599125719542694,
|
|
"learning_rate": 4.5620156098122204e-07,
|
|
"loss": 0.958198606967926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24601440131664276,
|
|
"step": 1104,
|
|
"valid_targets_mean": 15277.9,
|
|
"valid_targets_min": 5342
|
|
},
|
|
{
|
|
"epoch": 4.70362473347548,
|
|
"grad_norm": 0.2072001696287035,
|
|
"learning_rate": 4.4366403448033334e-07,
|
|
"loss": 0.9746035933494568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24131663143634796,
|
|
"step": 1105,
|
|
"valid_targets_mean": 14005.7,
|
|
"valid_targets_min": 3799
|
|
},
|
|
{
|
|
"epoch": 4.707889125799573,
|
|
"grad_norm": 0.18646393735473216,
|
|
"learning_rate": 4.3129926549136057e-07,
|
|
"loss": 0.9648074507713318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21114256978034973,
|
|
"step": 1106,
|
|
"valid_targets_mean": 13501.6,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 4.712153518123667,
|
|
"grad_norm": 0.1801809206992736,
|
|
"learning_rate": 4.191073632426701e-07,
|
|
"loss": 0.9577111601829529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24206210672855377,
|
|
"step": 1107,
|
|
"valid_targets_mean": 14789.1,
|
|
"valid_targets_min": 2956
|
|
},
|
|
{
|
|
"epoch": 4.7164179104477615,
|
|
"grad_norm": 0.20244718123289038,
|
|
"learning_rate": 4.0708843543555643e-07,
|
|
"loss": 0.9597060680389404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592220604419708,
|
|
"step": 1108,
|
|
"valid_targets_mean": 14971.0,
|
|
"valid_targets_min": 7082
|
|
},
|
|
{
|
|
"epoch": 4.720682302771855,
|
|
"grad_norm": 0.19004476967103429,
|
|
"learning_rate": 3.95242588243292e-07,
|
|
"loss": 0.9578391909599304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24784809350967407,
|
|
"step": 1109,
|
|
"valid_targets_mean": 14803.8,
|
|
"valid_targets_min": 5534
|
|
},
|
|
{
|
|
"epoch": 4.724946695095949,
|
|
"grad_norm": 0.1787557222587649,
|
|
"learning_rate": 3.8356992631017e-07,
|
|
"loss": 0.9749581813812256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2568989396095276,
|
|
"step": 1110,
|
|
"valid_targets_mean": 14341.0,
|
|
"valid_targets_min": 4140
|
|
},
|
|
{
|
|
"epoch": 4.729211087420042,
|
|
"grad_norm": 0.183170124003171,
|
|
"learning_rate": 3.720705527506008e-07,
|
|
"loss": 0.9631220698356628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22998714447021484,
|
|
"step": 1111,
|
|
"valid_targets_mean": 14496.9,
|
|
"valid_targets_min": 2722
|
|
},
|
|
{
|
|
"epoch": 4.733475479744136,
|
|
"grad_norm": 0.1913108582335516,
|
|
"learning_rate": 3.60744569148197e-07,
|
|
"loss": 0.9523494243621826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2521563172340393,
|
|
"step": 1112,
|
|
"valid_targets_mean": 15027.6,
|
|
"valid_targets_min": 8215
|
|
},
|
|
{
|
|
"epoch": 4.73773987206823,
|
|
"grad_norm": 0.1871176698881298,
|
|
"learning_rate": 3.4959207555485873e-07,
|
|
"loss": 0.981401264667511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24374215304851532,
|
|
"step": 1113,
|
|
"valid_targets_mean": 14313.0,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 4.742004264392325,
|
|
"grad_norm": 0.1943575296716895,
|
|
"learning_rate": 3.3861317048992317e-07,
|
|
"loss": 0.9813427925109863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24430063366889954,
|
|
"step": 1114,
|
|
"valid_targets_mean": 14462.2,
|
|
"valid_targets_min": 4500
|
|
},
|
|
{
|
|
"epoch": 4.746268656716418,
|
|
"grad_norm": 0.18626119810585876,
|
|
"learning_rate": 3.278079509392562e-07,
|
|
"loss": 0.9732893705368042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25778117775917053,
|
|
"step": 1115,
|
|
"valid_targets_mean": 15003.0,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 4.750533049040512,
|
|
"grad_norm": 0.1811114411390865,
|
|
"learning_rate": 3.171765123544224e-07,
|
|
"loss": 0.9803482294082642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26195472478866577,
|
|
"step": 1116,
|
|
"valid_targets_mean": 14364.8,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 4.754797441364605,
|
|
"grad_norm": 0.1923340252471767,
|
|
"learning_rate": 3.06718948651834e-07,
|
|
"loss": 0.9418221116065979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24983885884284973,
|
|
"step": 1117,
|
|
"valid_targets_mean": 15741.3,
|
|
"valid_targets_min": 9257
|
|
},
|
|
{
|
|
"epoch": 4.759061833688699,
|
|
"grad_norm": 0.18813994100062065,
|
|
"learning_rate": 2.964353522119168e-07,
|
|
"loss": 0.9535703659057617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23694348335266113,
|
|
"step": 1118,
|
|
"valid_targets_mean": 14265.1,
|
|
"valid_targets_min": 3473
|
|
},
|
|
{
|
|
"epoch": 4.7633262260127935,
|
|
"grad_norm": 0.19063162736733666,
|
|
"learning_rate": 2.863258138783032e-07,
|
|
"loss": 0.9458210468292236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22786492109298706,
|
|
"step": 1119,
|
|
"valid_targets_mean": 15495.7,
|
|
"valid_targets_min": 7691
|
|
},
|
|
{
|
|
"epoch": 4.767590618336887,
|
|
"grad_norm": 0.20158024903992858,
|
|
"learning_rate": 2.7639042295702245e-07,
|
|
"loss": 0.9903920888900757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551453709602356,
|
|
"step": 1120,
|
|
"valid_targets_mean": 14624.2,
|
|
"valid_targets_min": 2602
|
|
},
|
|
{
|
|
"epoch": 4.771855010660981,
|
|
"grad_norm": 0.1791922004784323,
|
|
"learning_rate": 2.666292672157056e-07,
|
|
"loss": 1.004233956336975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25212574005126953,
|
|
"step": 1121,
|
|
"valid_targets_mean": 15172.8,
|
|
"valid_targets_min": 4005
|
|
},
|
|
{
|
|
"epoch": 4.776119402985074,
|
|
"grad_norm": 0.19609224652597176,
|
|
"learning_rate": 2.570424328828325e-07,
|
|
"loss": 1.0066776275634766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26131948828697205,
|
|
"step": 1122,
|
|
"valid_targets_mean": 14962.2,
|
|
"valid_targets_min": 6855
|
|
},
|
|
{
|
|
"epoch": 4.780383795309168,
|
|
"grad_norm": 0.182369505616676,
|
|
"learning_rate": 2.4763000464694377e-07,
|
|
"loss": 0.9485456347465515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24176406860351562,
|
|
"step": 1123,
|
|
"valid_targets_mean": 15655.1,
|
|
"valid_targets_min": 7801
|
|
},
|
|
{
|
|
"epoch": 4.7846481876332625,
|
|
"grad_norm": 0.18174153145703756,
|
|
"learning_rate": 2.383920656559102e-07,
|
|
"loss": 0.9461928606033325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22751101851463318,
|
|
"step": 1124,
|
|
"valid_targets_mean": 14220.1,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 4.788912579957356,
|
|
"grad_norm": 0.17912251421687572,
|
|
"learning_rate": 2.2932869751619568e-07,
|
|
"loss": 0.9231736660003662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2434135526418686,
|
|
"step": 1125,
|
|
"valid_targets_mean": 14937.3,
|
|
"valid_targets_min": 4412
|
|
},
|
|
{
|
|
"epoch": 4.79317697228145,
|
|
"grad_norm": 0.17873064702229766,
|
|
"learning_rate": 2.2043998029212643e-07,
|
|
"loss": 0.9572075605392456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24633853137493134,
|
|
"step": 1126,
|
|
"valid_targets_mean": 14208.7,
|
|
"valid_targets_min": 3253
|
|
},
|
|
{
|
|
"epoch": 4.797441364605544,
|
|
"grad_norm": 0.19325578017308032,
|
|
"learning_rate": 2.1172599250519398e-07,
|
|
"loss": 0.9735586643218994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2285764515399933,
|
|
"step": 1127,
|
|
"valid_targets_mean": 14014.3,
|
|
"valid_targets_min": 4944
|
|
},
|
|
{
|
|
"epoch": 4.801705756929637,
|
|
"grad_norm": 0.18854210514438088,
|
|
"learning_rate": 2.0318681113336013e-07,
|
|
"loss": 1.003481149673462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2884986996650696,
|
|
"step": 1128,
|
|
"valid_targets_mean": 15012.4,
|
|
"valid_targets_min": 7985
|
|
},
|
|
{
|
|
"epoch": 4.8059701492537314,
|
|
"grad_norm": 0.18222857144245486,
|
|
"learning_rate": 1.9482251161037302e-07,
|
|
"loss": 0.9663025736808777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22748203575611115,
|
|
"step": 1129,
|
|
"valid_targets_mean": 15023.7,
|
|
"valid_targets_min": 4122
|
|
},
|
|
{
|
|
"epoch": 4.810234541577826,
|
|
"grad_norm": 0.17974618806226456,
|
|
"learning_rate": 1.866331678251032e-07,
|
|
"loss": 0.9956282377243042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21607357263565063,
|
|
"step": 1130,
|
|
"valid_targets_mean": 13762.8,
|
|
"valid_targets_min": 2047
|
|
},
|
|
{
|
|
"epoch": 4.814498933901919,
|
|
"grad_norm": 0.1758600637063627,
|
|
"learning_rate": 1.7861885212088869e-07,
|
|
"loss": 0.9475678205490112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24526748061180115,
|
|
"step": 1131,
|
|
"valid_targets_mean": 15130.3,
|
|
"valid_targets_min": 7622
|
|
},
|
|
{
|
|
"epoch": 4.818763326226013,
|
|
"grad_norm": 0.18150019802034476,
|
|
"learning_rate": 1.7077963529490204e-07,
|
|
"loss": 0.9758622050285339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25129640102386475,
|
|
"step": 1132,
|
|
"valid_targets_mean": 14312.8,
|
|
"valid_targets_min": 5570
|
|
},
|
|
{
|
|
"epoch": 4.823027718550106,
|
|
"grad_norm": 0.18595590687608943,
|
|
"learning_rate": 1.6311558659751535e-07,
|
|
"loss": 0.9581972360610962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22696828842163086,
|
|
"step": 1133,
|
|
"valid_targets_mean": 14382.9,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 4.8272921108742,
|
|
"grad_norm": 0.18946504147825557,
|
|
"learning_rate": 1.5562677373169855e-07,
|
|
"loss": 0.9886982440948486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27297037839889526,
|
|
"step": 1134,
|
|
"valid_targets_mean": 14403.2,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 4.8315565031982945,
|
|
"grad_norm": 0.1988029565921578,
|
|
"learning_rate": 1.483132628524131e-07,
|
|
"loss": 1.0126937627792358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24579432606697083,
|
|
"step": 1135,
|
|
"valid_targets_mean": 13454.9,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 4.835820895522388,
|
|
"grad_norm": 0.18331947908146323,
|
|
"learning_rate": 1.4117511856603262e-07,
|
|
"loss": 0.9641690254211426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2405238151550293,
|
|
"step": 1136,
|
|
"valid_targets_mean": 14659.2,
|
|
"valid_targets_min": 7141
|
|
},
|
|
{
|
|
"epoch": 4.840085287846482,
|
|
"grad_norm": 0.1858642422987959,
|
|
"learning_rate": 1.342124039297721e-07,
|
|
"loss": 0.9916709661483765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2584751546382904,
|
|
"step": 1137,
|
|
"valid_targets_mean": 14581.5,
|
|
"valid_targets_min": 3649
|
|
},
|
|
{
|
|
"epoch": 4.844349680170575,
|
|
"grad_norm": 0.18455684729446292,
|
|
"learning_rate": 1.2742518045112396e-07,
|
|
"loss": 0.9750644564628601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25415563583374023,
|
|
"step": 1138,
|
|
"valid_targets_mean": 14580.6,
|
|
"valid_targets_min": 4957
|
|
},
|
|
{
|
|
"epoch": 4.848614072494669,
|
|
"grad_norm": 0.18221139202265096,
|
|
"learning_rate": 1.2081350808732518e-07,
|
|
"loss": 0.9195218086242676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.252757728099823,
|
|
"step": 1139,
|
|
"valid_targets_mean": 14550.2,
|
|
"valid_targets_min": 5470
|
|
},
|
|
{
|
|
"epoch": 4.8528784648187635,
|
|
"grad_norm": 0.19375662722988862,
|
|
"learning_rate": 1.143774452448243e-07,
|
|
"loss": 1.068016529083252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26408565044403076,
|
|
"step": 1140,
|
|
"valid_targets_mean": 14606.9,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 4.857142857142857,
|
|
"grad_norm": 0.17589785731943583,
|
|
"learning_rate": 1.0811704877875528e-07,
|
|
"loss": 0.9681274890899658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22096103429794312,
|
|
"step": 1141,
|
|
"valid_targets_mean": 14835.0,
|
|
"valid_targets_min": 2783
|
|
},
|
|
{
|
|
"epoch": 4.861407249466951,
|
|
"grad_norm": 0.18169988636955903,
|
|
"learning_rate": 1.0203237399245336e-07,
|
|
"loss": 0.9908976554870605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23787662386894226,
|
|
"step": 1142,
|
|
"valid_targets_mean": 14631.3,
|
|
"valid_targets_min": 3338
|
|
},
|
|
{
|
|
"epoch": 4.865671641791045,
|
|
"grad_norm": 0.1875296887268478,
|
|
"learning_rate": 9.612347463694882e-08,
|
|
"loss": 0.9561929702758789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24236030876636505,
|
|
"step": 1143,
|
|
"valid_targets_mean": 15098.5,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 4.869936034115138,
|
|
"grad_norm": 0.18636510555027214,
|
|
"learning_rate": 9.039040291050738e-08,
|
|
"loss": 0.9645097851753235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532026171684265,
|
|
"step": 1144,
|
|
"valid_targets_mean": 15200.3,
|
|
"valid_targets_min": 3911
|
|
},
|
|
{
|
|
"epoch": 4.8742004264392325,
|
|
"grad_norm": 0.194394975693115,
|
|
"learning_rate": 8.483320945815499e-08,
|
|
"loss": 0.9629539251327515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2467735856771469,
|
|
"step": 1145,
|
|
"valid_targets_mean": 15471.6,
|
|
"valid_targets_min": 2373
|
|
},
|
|
{
|
|
"epoch": 4.878464818763327,
|
|
"grad_norm": 0.18567680121853064,
|
|
"learning_rate": 7.945194337124262e-08,
|
|
"loss": 0.9523435831069946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24072405695915222,
|
|
"step": 1146,
|
|
"valid_targets_mean": 15206.4,
|
|
"valid_targets_min": 4402
|
|
},
|
|
{
|
|
"epoch": 4.88272921108742,
|
|
"grad_norm": 0.19955722684157318,
|
|
"learning_rate": 7.424665218700444e-08,
|
|
"loss": 1.0447909832000732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784472405910492,
|
|
"step": 1147,
|
|
"valid_targets_mean": 15334.1,
|
|
"valid_targets_min": 9093
|
|
},
|
|
{
|
|
"epoch": 4.886993603411514,
|
|
"grad_norm": 0.1828713222757318,
|
|
"learning_rate": 6.921738188814254e-08,
|
|
"loss": 0.9593808650970459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2340017855167389,
|
|
"step": 1148,
|
|
"valid_targets_mean": 14965.5,
|
|
"valid_targets_min": 7527
|
|
},
|
|
{
|
|
"epoch": 4.891257995735607,
|
|
"grad_norm": 0.1928454297781806,
|
|
"learning_rate": 6.436417690241614e-08,
|
|
"loss": 0.9661131501197815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21552234888076782,
|
|
"step": 1149,
|
|
"valid_targets_mean": 13662.2,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 4.895522388059701,
|
|
"grad_norm": 0.1924270678283599,
|
|
"learning_rate": 5.968708010225532e-08,
|
|
"loss": 0.9736749529838562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682981491088867,
|
|
"step": 1150,
|
|
"valid_targets_mean": 15144.8,
|
|
"valid_targets_min": 9892
|
|
},
|
|
{
|
|
"epoch": 4.899786780383796,
|
|
"grad_norm": 0.1869384094621727,
|
|
"learning_rate": 5.518613280437901e-08,
|
|
"loss": 0.9437440037727356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22726953029632568,
|
|
"step": 1151,
|
|
"valid_targets_mean": 14869.4,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 4.904051172707889,
|
|
"grad_norm": 0.1875151927759769,
|
|
"learning_rate": 5.0861374769426433e-08,
|
|
"loss": 0.963232159614563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23168283700942993,
|
|
"step": 1152,
|
|
"valid_targets_mean": 13918.9,
|
|
"valid_targets_min": 4828
|
|
},
|
|
{
|
|
"epoch": 4.908315565031983,
|
|
"grad_norm": 0.18738265485642772,
|
|
"learning_rate": 4.671284420161071e-08,
|
|
"loss": 0.9642470479011536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2354758083820343,
|
|
"step": 1153,
|
|
"valid_targets_mean": 14218.9,
|
|
"valid_targets_min": 4878
|
|
},
|
|
{
|
|
"epoch": 4.912579957356077,
|
|
"grad_norm": 0.179096448776487,
|
|
"learning_rate": 4.274057774838136e-08,
|
|
"loss": 0.9545761346817017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2311761975288391,
|
|
"step": 1154,
|
|
"valid_targets_mean": 14188.5,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 4.91684434968017,
|
|
"grad_norm": 0.18370436259452722,
|
|
"learning_rate": 3.894461050010012e-08,
|
|
"loss": 0.9211704134941101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1992514282464981,
|
|
"step": 1155,
|
|
"valid_targets_mean": 13616.8,
|
|
"valid_targets_min": 1976
|
|
},
|
|
{
|
|
"epoch": 4.9211087420042645,
|
|
"grad_norm": 0.19697341784194985,
|
|
"learning_rate": 3.5324975989725615e-08,
|
|
"loss": 0.9662893414497375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23267701268196106,
|
|
"step": 1156,
|
|
"valid_targets_mean": 14298.6,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 4.925373134328359,
|
|
"grad_norm": 0.18164859280451848,
|
|
"learning_rate": 3.188170619252473e-08,
|
|
"loss": 0.9604413509368896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2460888922214508,
|
|
"step": 1157,
|
|
"valid_targets_mean": 15502.1,
|
|
"valid_targets_min": 10452
|
|
},
|
|
{
|
|
"epoch": 4.929637526652452,
|
|
"grad_norm": 0.18442182041156924,
|
|
"learning_rate": 2.8614831525786147e-08,
|
|
"loss": 0.9696638584136963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23942352831363678,
|
|
"step": 1158,
|
|
"valid_targets_mean": 15015.1,
|
|
"valid_targets_min": 8581
|
|
},
|
|
{
|
|
"epoch": 4.933901918976546,
|
|
"grad_norm": 0.20434953182319676,
|
|
"learning_rate": 2.552438084855613e-08,
|
|
"loss": 0.9707285165786743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23770083487033844,
|
|
"step": 1159,
|
|
"valid_targets_mean": 14752.8,
|
|
"valid_targets_min": 3297
|
|
},
|
|
{
|
|
"epoch": 4.938166311300639,
|
|
"grad_norm": 0.18846406005781263,
|
|
"learning_rate": 2.2610381461372068e-08,
|
|
"loss": 1.0132946968078613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27154213190078735,
|
|
"step": 1160,
|
|
"valid_targets_mean": 15038.8,
|
|
"valid_targets_min": 7635
|
|
},
|
|
{
|
|
"epoch": 4.9424307036247335,
|
|
"grad_norm": 0.1816173347597828,
|
|
"learning_rate": 1.987285910603598e-08,
|
|
"loss": 0.9828102588653564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2403227686882019,
|
|
"step": 1161,
|
|
"valid_targets_mean": 14799.3,
|
|
"valid_targets_min": 3960
|
|
},
|
|
{
|
|
"epoch": 4.946695095948828,
|
|
"grad_norm": 0.19718993160217102,
|
|
"learning_rate": 1.7311837965379164e-08,
|
|
"loss": 0.9818974137306213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25159767270088196,
|
|
"step": 1162,
|
|
"valid_targets_mean": 15367.5,
|
|
"valid_targets_min": 7569
|
|
},
|
|
{
|
|
"epoch": 4.950959488272921,
|
|
"grad_norm": 0.184685719967902,
|
|
"learning_rate": 1.4927340663046798e-08,
|
|
"loss": 0.9775370955467224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25196710228919983,
|
|
"step": 1163,
|
|
"valid_targets_mean": 14676.9,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 4.955223880597015,
|
|
"grad_norm": 0.1888304549830865,
|
|
"learning_rate": 1.2719388263300325e-08,
|
|
"loss": 0.9846158027648926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23142623901367188,
|
|
"step": 1164,
|
|
"valid_targets_mean": 13634.5,
|
|
"valid_targets_min": 2356
|
|
},
|
|
{
|
|
"epoch": 4.959488272921108,
|
|
"grad_norm": 0.18541490359266527,
|
|
"learning_rate": 1.0688000270839827e-08,
|
|
"loss": 1.003100872039795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24990428984165192,
|
|
"step": 1165,
|
|
"valid_targets_mean": 14019.7,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 4.963752665245202,
|
|
"grad_norm": 0.18361387965535805,
|
|
"learning_rate": 8.833194630615271e-09,
|
|
"loss": 0.9978989362716675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2524747848510742,
|
|
"step": 1166,
|
|
"valid_targets_mean": 14548.3,
|
|
"valid_targets_min": 7809
|
|
},
|
|
{
|
|
"epoch": 4.968017057569297,
|
|
"grad_norm": 0.19174431795363184,
|
|
"learning_rate": 7.154987727682194e-09,
|
|
"loss": 0.9677139520645142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24135658144950867,
|
|
"step": 1167,
|
|
"valid_targets_mean": 14866.4,
|
|
"valid_targets_min": 4105
|
|
},
|
|
{
|
|
"epoch": 4.97228144989339,
|
|
"grad_norm": 0.18257838735005472,
|
|
"learning_rate": 5.6533943870462625e-09,
|
|
"loss": 0.9879318475723267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24627341330051422,
|
|
"step": 1168,
|
|
"valid_targets_mean": 14967.6,
|
|
"valid_targets_min": 3003
|
|
},
|
|
{
|
|
"epoch": 4.976545842217484,
|
|
"grad_norm": 0.19170415109429254,
|
|
"learning_rate": 4.328427873541152e-09,
|
|
"loss": 1.0250473022460938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258064866065979,
|
|
"step": 1169,
|
|
"valid_targets_mean": 15066.1,
|
|
"valid_targets_min": 8759
|
|
},
|
|
{
|
|
"epoch": 4.980810234541578,
|
|
"grad_norm": 0.1837889528395531,
|
|
"learning_rate": 3.1800998917086432e-09,
|
|
"loss": 0.973028838634491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24662089347839355,
|
|
"step": 1170,
|
|
"valid_targets_mean": 15083.5,
|
|
"valid_targets_min": 5835
|
|
},
|
|
{
|
|
"epoch": 4.985074626865671,
|
|
"grad_norm": 0.17878044891064748,
|
|
"learning_rate": 2.2084205856920393e-09,
|
|
"loss": 0.9649015665054321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2296697348356247,
|
|
"step": 1171,
|
|
"valid_targets_mean": 14655.2,
|
|
"valid_targets_min": 3045
|
|
},
|
|
{
|
|
"epoch": 4.9893390191897655,
|
|
"grad_norm": 0.19227545584561032,
|
|
"learning_rate": 1.4133985391473482e-09,
|
|
"loss": 1.0228734016418457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2613134980201721,
|
|
"step": 1172,
|
|
"valid_targets_mean": 14752.4,
|
|
"valid_targets_min": 3992
|
|
},
|
|
{
|
|
"epoch": 4.99360341151386,
|
|
"grad_norm": 0.18235692036318835,
|
|
"learning_rate": 7.950407751722288e-10,
|
|
"loss": 0.9098037481307983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22016096115112305,
|
|
"step": 1173,
|
|
"valid_targets_mean": 14614.0,
|
|
"valid_targets_min": 4385
|
|
},
|
|
{
|
|
"epoch": 4.997867803837953,
|
|
"grad_norm": 0.18124280454259,
|
|
"learning_rate": 3.5335275624159835e-10,
|
|
"loss": 0.9602288007736206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2466222047805786,
|
|
"step": 1174,
|
|
"valid_targets_mean": 15355.7,
|
|
"valid_targets_min": 8230
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.2569032695441435,
|
|
"learning_rate": 8.833838415212014e-11,
|
|
"loss": 1.0513310432434082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4691266119480133,
|
|
"step": 1175,
|
|
"valid_targets_mean": 13499.5,
|
|
"valid_targets_min": 3345
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4691266119480133,
|
|
"step": 1175,
|
|
"total_flos": 1658037051588608.0,
|
|
"train_loss": 1.0214665260213487,
|
|
"train_runtime": 6089.571,
|
|
"train_samples_per_second": 24.632,
|
|
"train_steps_per_second": 0.193,
|
|
"valid_targets_mean": 13499.5,
|
|
"valid_targets_min": 3345
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 1175,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1658037051588608.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|