8804 lines
244 KiB
JSON
8804 lines
244 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3983,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008787346221441126,
|
|
"grad_norm": 38.6978838722848,
|
|
"learning_rate": 4.010025062656642e-07,
|
|
"loss": 0.9753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9984568357467651,
|
|
"step": 5,
|
|
"valid_targets_mean": 8204.4,
|
|
"valid_targets_min": 7101
|
|
},
|
|
{
|
|
"epoch": 0.01757469244288225,
|
|
"grad_norm": 36.44900496665165,
|
|
"learning_rate": 9.022556390977444e-07,
|
|
"loss": 1.0116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9997004270553589,
|
|
"step": 10,
|
|
"valid_targets_mean": 7846.7,
|
|
"valid_targets_min": 7038
|
|
},
|
|
{
|
|
"epoch": 0.026362038664323375,
|
|
"grad_norm": 30.270769651579307,
|
|
"learning_rate": 1.4035087719298246e-06,
|
|
"loss": 0.9528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8993930816650391,
|
|
"step": 15,
|
|
"valid_targets_mean": 7765.9,
|
|
"valid_targets_min": 7114
|
|
},
|
|
{
|
|
"epoch": 0.0351493848857645,
|
|
"grad_norm": 21.151573665966424,
|
|
"learning_rate": 1.904761904761905e-06,
|
|
"loss": 0.8447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.826770007610321,
|
|
"step": 20,
|
|
"valid_targets_mean": 7497.1,
|
|
"valid_targets_min": 7056
|
|
},
|
|
{
|
|
"epoch": 0.043936731107205626,
|
|
"grad_norm": 8.682299524123176,
|
|
"learning_rate": 2.406015037593985e-06,
|
|
"loss": 0.7579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.740318775177002,
|
|
"step": 25,
|
|
"valid_targets_mean": 7646.0,
|
|
"valid_targets_min": 6783
|
|
},
|
|
{
|
|
"epoch": 0.05272407732864675,
|
|
"grad_norm": 3.8730139452847805,
|
|
"learning_rate": 2.9072681704260652e-06,
|
|
"loss": 0.679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6605329513549805,
|
|
"step": 30,
|
|
"valid_targets_mean": 7726.4,
|
|
"valid_targets_min": 7030
|
|
},
|
|
{
|
|
"epoch": 0.061511423550087874,
|
|
"grad_norm": 2.072050986380433,
|
|
"learning_rate": 3.4085213032581455e-06,
|
|
"loss": 0.6448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6354802250862122,
|
|
"step": 35,
|
|
"valid_targets_mean": 8878.6,
|
|
"valid_targets_min": 6835
|
|
},
|
|
{
|
|
"epoch": 0.070298769771529,
|
|
"grad_norm": 1.6266156298187409,
|
|
"learning_rate": 3.909774436090225e-06,
|
|
"loss": 0.6236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6287806034088135,
|
|
"step": 40,
|
|
"valid_targets_mean": 8068.2,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 0.07908611599297012,
|
|
"grad_norm": 1.378698835683514,
|
|
"learning_rate": 4.411027568922306e-06,
|
|
"loss": 0.5979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6028242707252502,
|
|
"step": 45,
|
|
"valid_targets_mean": 7623.4,
|
|
"valid_targets_min": 6618
|
|
},
|
|
{
|
|
"epoch": 0.08787346221441125,
|
|
"grad_norm": 1.0411202965264905,
|
|
"learning_rate": 4.912280701754386e-06,
|
|
"loss": 0.6119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6232005953788757,
|
|
"step": 50,
|
|
"valid_targets_mean": 7622.8,
|
|
"valid_targets_min": 6016
|
|
},
|
|
{
|
|
"epoch": 0.09666080843585237,
|
|
"grad_norm": 0.7770352727164161,
|
|
"learning_rate": 5.413533834586467e-06,
|
|
"loss": 0.5674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5420302152633667,
|
|
"step": 55,
|
|
"valid_targets_mean": 8124.6,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 0.1054481546572935,
|
|
"grad_norm": 0.796039133753143,
|
|
"learning_rate": 5.9147869674185465e-06,
|
|
"loss": 0.5599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5746988654136658,
|
|
"step": 60,
|
|
"valid_targets_mean": 7137.2,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 0.11423550087873462,
|
|
"grad_norm": 0.6336106625079481,
|
|
"learning_rate": 6.416040100250627e-06,
|
|
"loss": 0.5348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5362576246261597,
|
|
"step": 65,
|
|
"valid_targets_mean": 7793.7,
|
|
"valid_targets_min": 6922
|
|
},
|
|
{
|
|
"epoch": 0.12302284710017575,
|
|
"grad_norm": 0.5700155831436438,
|
|
"learning_rate": 6.917293233082707e-06,
|
|
"loss": 0.5188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.50221848487854,
|
|
"step": 70,
|
|
"valid_targets_mean": 7509.6,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 0.13181019332161686,
|
|
"grad_norm": 0.5355565407471911,
|
|
"learning_rate": 7.418546365914787e-06,
|
|
"loss": 0.5024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5001769661903381,
|
|
"step": 75,
|
|
"valid_targets_mean": 7870.4,
|
|
"valid_targets_min": 7366
|
|
},
|
|
{
|
|
"epoch": 0.140597539543058,
|
|
"grad_norm": 0.49202281440443324,
|
|
"learning_rate": 7.919799498746868e-06,
|
|
"loss": 0.4863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.466791570186615,
|
|
"step": 80,
|
|
"valid_targets_mean": 7792.9,
|
|
"valid_targets_min": 6296
|
|
},
|
|
{
|
|
"epoch": 0.14938488576449913,
|
|
"grad_norm": 0.4667523533023862,
|
|
"learning_rate": 8.421052631578948e-06,
|
|
"loss": 0.4748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4645511507987976,
|
|
"step": 85,
|
|
"valid_targets_mean": 7683.8,
|
|
"valid_targets_min": 6859
|
|
},
|
|
{
|
|
"epoch": 0.15817223198594024,
|
|
"grad_norm": 0.46381986185960605,
|
|
"learning_rate": 8.922305764411027e-06,
|
|
"loss": 0.4505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44488275051116943,
|
|
"step": 90,
|
|
"valid_targets_mean": 7648.6,
|
|
"valid_targets_min": 6536
|
|
},
|
|
{
|
|
"epoch": 0.16695957820738136,
|
|
"grad_norm": 0.49268227301820733,
|
|
"learning_rate": 9.423558897243108e-06,
|
|
"loss": 0.4527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4484066069126129,
|
|
"step": 95,
|
|
"valid_targets_mean": 7653.2,
|
|
"valid_targets_min": 6951
|
|
},
|
|
{
|
|
"epoch": 0.1757469244288225,
|
|
"grad_norm": 0.4684597306217091,
|
|
"learning_rate": 9.924812030075189e-06,
|
|
"loss": 0.4377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43168944120407104,
|
|
"step": 100,
|
|
"valid_targets_mean": 7581.9,
|
|
"valid_targets_min": 6596
|
|
},
|
|
{
|
|
"epoch": 0.18453427065026362,
|
|
"grad_norm": 0.4533014610851105,
|
|
"learning_rate": 1.0426065162907268e-05,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.411173015832901,
|
|
"step": 105,
|
|
"valid_targets_mean": 7877.8,
|
|
"valid_targets_min": 7346
|
|
},
|
|
{
|
|
"epoch": 0.19332161687170474,
|
|
"grad_norm": 0.420632290889455,
|
|
"learning_rate": 1.0927318295739348e-05,
|
|
"loss": 0.4179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40126168727874756,
|
|
"step": 110,
|
|
"valid_targets_mean": 8554.2,
|
|
"valid_targets_min": 7236
|
|
},
|
|
{
|
|
"epoch": 0.20210896309314588,
|
|
"grad_norm": 0.4742732806072542,
|
|
"learning_rate": 1.1428571428571429e-05,
|
|
"loss": 0.4146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4155147075653076,
|
|
"step": 115,
|
|
"valid_targets_mean": 8634.9,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 0.210896309314587,
|
|
"grad_norm": 0.4594208243066726,
|
|
"learning_rate": 1.192982456140351e-05,
|
|
"loss": 0.4177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3859213888645172,
|
|
"step": 120,
|
|
"valid_targets_mean": 7575.8,
|
|
"valid_targets_min": 6708
|
|
},
|
|
{
|
|
"epoch": 0.21968365553602812,
|
|
"grad_norm": 0.4997540440424557,
|
|
"learning_rate": 1.2431077694235589e-05,
|
|
"loss": 0.4037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3957754969596863,
|
|
"step": 125,
|
|
"valid_targets_mean": 7793.8,
|
|
"valid_targets_min": 7241
|
|
},
|
|
{
|
|
"epoch": 0.22847100175746923,
|
|
"grad_norm": 0.428361000886045,
|
|
"learning_rate": 1.293233082706767e-05,
|
|
"loss": 0.4002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3891918659210205,
|
|
"step": 130,
|
|
"valid_targets_mean": 8369.6,
|
|
"valid_targets_min": 7025
|
|
},
|
|
{
|
|
"epoch": 0.23725834797891038,
|
|
"grad_norm": 0.4743185424801239,
|
|
"learning_rate": 1.343358395989975e-05,
|
|
"loss": 0.405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3979344964027405,
|
|
"step": 135,
|
|
"valid_targets_mean": 7767.6,
|
|
"valid_targets_min": 6628
|
|
},
|
|
{
|
|
"epoch": 0.2460456942003515,
|
|
"grad_norm": 0.4841423226769736,
|
|
"learning_rate": 1.3934837092731829e-05,
|
|
"loss": 0.4012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3948283791542053,
|
|
"step": 140,
|
|
"valid_targets_mean": 7875.4,
|
|
"valid_targets_min": 7146
|
|
},
|
|
{
|
|
"epoch": 0.2548330404217926,
|
|
"grad_norm": 0.588951244848714,
|
|
"learning_rate": 1.443609022556391e-05,
|
|
"loss": 0.3951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4132270812988281,
|
|
"step": 145,
|
|
"valid_targets_mean": 8222.6,
|
|
"valid_targets_min": 6658
|
|
},
|
|
{
|
|
"epoch": 0.26362038664323373,
|
|
"grad_norm": 0.5495889925349385,
|
|
"learning_rate": 1.493734335839599e-05,
|
|
"loss": 0.3902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38743358850479126,
|
|
"step": 150,
|
|
"valid_targets_mean": 7656.6,
|
|
"valid_targets_min": 6637
|
|
},
|
|
{
|
|
"epoch": 0.27240773286467485,
|
|
"grad_norm": 0.5958013982628857,
|
|
"learning_rate": 1.543859649122807e-05,
|
|
"loss": 0.3796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3920867443084717,
|
|
"step": 155,
|
|
"valid_targets_mean": 7690.2,
|
|
"valid_targets_min": 6713
|
|
},
|
|
{
|
|
"epoch": 0.281195079086116,
|
|
"grad_norm": 0.46010368035874943,
|
|
"learning_rate": 1.593984962406015e-05,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36779534816741943,
|
|
"step": 160,
|
|
"valid_targets_mean": 7557.8,
|
|
"valid_targets_min": 1962
|
|
},
|
|
{
|
|
"epoch": 0.28998242530755713,
|
|
"grad_norm": 0.5929789279662462,
|
|
"learning_rate": 1.6441102756892233e-05,
|
|
"loss": 0.3778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3819672167301178,
|
|
"step": 165,
|
|
"valid_targets_mean": 7351.0,
|
|
"valid_targets_min": 4393
|
|
},
|
|
{
|
|
"epoch": 0.29876977152899825,
|
|
"grad_norm": 0.5524672538423943,
|
|
"learning_rate": 1.694235588972431e-05,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36684030294418335,
|
|
"step": 170,
|
|
"valid_targets_mean": 7323.3,
|
|
"valid_targets_min": 4028
|
|
},
|
|
{
|
|
"epoch": 0.30755711775043937,
|
|
"grad_norm": 0.6041910949646787,
|
|
"learning_rate": 1.744360902255639e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3752996623516083,
|
|
"step": 175,
|
|
"valid_targets_mean": 7826.6,
|
|
"valid_targets_min": 6948
|
|
},
|
|
{
|
|
"epoch": 0.3163444639718805,
|
|
"grad_norm": 0.5273237629264332,
|
|
"learning_rate": 1.7944862155388473e-05,
|
|
"loss": 0.3734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3750697374343872,
|
|
"step": 180,
|
|
"valid_targets_mean": 8417.7,
|
|
"valid_targets_min": 6580
|
|
},
|
|
{
|
|
"epoch": 0.3251318101933216,
|
|
"grad_norm": 0.5634785021242477,
|
|
"learning_rate": 1.8446115288220552e-05,
|
|
"loss": 0.3701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3679591715335846,
|
|
"step": 185,
|
|
"valid_targets_mean": 8481.8,
|
|
"valid_targets_min": 7148
|
|
},
|
|
{
|
|
"epoch": 0.3339191564147627,
|
|
"grad_norm": 0.6490715556412919,
|
|
"learning_rate": 1.894736842105263e-05,
|
|
"loss": 0.3707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35834622383117676,
|
|
"step": 190,
|
|
"valid_targets_mean": 7840.2,
|
|
"valid_targets_min": 7374
|
|
},
|
|
{
|
|
"epoch": 0.3427065026362039,
|
|
"grad_norm": 0.7040665040619704,
|
|
"learning_rate": 1.9448621553884713e-05,
|
|
"loss": 0.3633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.360437273979187,
|
|
"step": 195,
|
|
"valid_targets_mean": 7532.4,
|
|
"valid_targets_min": 6567
|
|
},
|
|
{
|
|
"epoch": 0.351493848857645,
|
|
"grad_norm": 0.6661072177146762,
|
|
"learning_rate": 1.9949874686716792e-05,
|
|
"loss": 0.3675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3582548499107361,
|
|
"step": 200,
|
|
"valid_targets_mean": 7941.7,
|
|
"valid_targets_min": 4770
|
|
},
|
|
{
|
|
"epoch": 0.3602811950790861,
|
|
"grad_norm": 0.6446147681036916,
|
|
"learning_rate": 2.045112781954887e-05,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3482518792152405,
|
|
"step": 205,
|
|
"valid_targets_mean": 7836.3,
|
|
"valid_targets_min": 6808
|
|
},
|
|
{
|
|
"epoch": 0.36906854130052724,
|
|
"grad_norm": 0.7425622932228986,
|
|
"learning_rate": 2.0952380952380954e-05,
|
|
"loss": 0.3641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3838837742805481,
|
|
"step": 210,
|
|
"valid_targets_mean": 7816.9,
|
|
"valid_targets_min": 7214
|
|
},
|
|
{
|
|
"epoch": 0.37785588752196836,
|
|
"grad_norm": 0.5581982531493921,
|
|
"learning_rate": 2.1453634085213033e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36858636140823364,
|
|
"step": 215,
|
|
"valid_targets_mean": 7943.6,
|
|
"valid_targets_min": 7011
|
|
},
|
|
{
|
|
"epoch": 0.3866432337434095,
|
|
"grad_norm": 0.5795300028635507,
|
|
"learning_rate": 2.1954887218045115e-05,
|
|
"loss": 0.3665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36795204877853394,
|
|
"step": 220,
|
|
"valid_targets_mean": 7693.4,
|
|
"valid_targets_min": 6714
|
|
},
|
|
{
|
|
"epoch": 0.3954305799648506,
|
|
"grad_norm": 0.5961129101229606,
|
|
"learning_rate": 2.2456140350877194e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35297703742980957,
|
|
"step": 225,
|
|
"valid_targets_mean": 7667.0,
|
|
"valid_targets_min": 7113
|
|
},
|
|
{
|
|
"epoch": 0.40421792618629176,
|
|
"grad_norm": 0.5346483708070339,
|
|
"learning_rate": 2.2957393483709277e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.355159193277359,
|
|
"step": 230,
|
|
"valid_targets_mean": 7672.8,
|
|
"valid_targets_min": 6972
|
|
},
|
|
{
|
|
"epoch": 0.4130052724077329,
|
|
"grad_norm": 0.6007854655638409,
|
|
"learning_rate": 2.3458646616541356e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34793052077293396,
|
|
"step": 235,
|
|
"valid_targets_mean": 7685.1,
|
|
"valid_targets_min": 6490
|
|
},
|
|
{
|
|
"epoch": 0.421792618629174,
|
|
"grad_norm": 0.6225739772119563,
|
|
"learning_rate": 2.3959899749373438e-05,
|
|
"loss": 0.3538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3514181971549988,
|
|
"step": 240,
|
|
"valid_targets_mean": 7597.9,
|
|
"valid_targets_min": 6470
|
|
},
|
|
{
|
|
"epoch": 0.4305799648506151,
|
|
"grad_norm": 0.5289284486385892,
|
|
"learning_rate": 2.4461152882205514e-05,
|
|
"loss": 0.3536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3455802798271179,
|
|
"step": 245,
|
|
"valid_targets_mean": 7803.9,
|
|
"valid_targets_min": 7042
|
|
},
|
|
{
|
|
"epoch": 0.43936731107205623,
|
|
"grad_norm": 0.6527177005189011,
|
|
"learning_rate": 2.4962406015037596e-05,
|
|
"loss": 0.3401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3467724323272705,
|
|
"step": 250,
|
|
"valid_targets_mean": 7513.4,
|
|
"valid_targets_min": 6591
|
|
},
|
|
{
|
|
"epoch": 0.44815465729349735,
|
|
"grad_norm": 0.592816842394283,
|
|
"learning_rate": 2.5463659147869675e-05,
|
|
"loss": 0.3511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3463764190673828,
|
|
"step": 255,
|
|
"valid_targets_mean": 7793.3,
|
|
"valid_targets_min": 6874
|
|
},
|
|
{
|
|
"epoch": 0.45694200351493847,
|
|
"grad_norm": 0.5890938532081315,
|
|
"learning_rate": 2.5964912280701757e-05,
|
|
"loss": 0.3586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36159226298332214,
|
|
"step": 260,
|
|
"valid_targets_mean": 7705.4,
|
|
"valid_targets_min": 6749
|
|
},
|
|
{
|
|
"epoch": 0.46572934973637964,
|
|
"grad_norm": 0.5980314248262064,
|
|
"learning_rate": 2.6466165413533836e-05,
|
|
"loss": 0.3551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35110318660736084,
|
|
"step": 265,
|
|
"valid_targets_mean": 8175.6,
|
|
"valid_targets_min": 7201
|
|
},
|
|
{
|
|
"epoch": 0.47451669595782076,
|
|
"grad_norm": 0.49692421838506523,
|
|
"learning_rate": 2.696741854636592e-05,
|
|
"loss": 0.3583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3421872854232788,
|
|
"step": 270,
|
|
"valid_targets_mean": 8735.7,
|
|
"valid_targets_min": 7134
|
|
},
|
|
{
|
|
"epoch": 0.4833040421792619,
|
|
"grad_norm": 0.7089117092095248,
|
|
"learning_rate": 2.7468671679197994e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3664856553077698,
|
|
"step": 275,
|
|
"valid_targets_mean": 7563.2,
|
|
"valid_targets_min": 6711
|
|
},
|
|
{
|
|
"epoch": 0.492091388400703,
|
|
"grad_norm": 0.6619046729096473,
|
|
"learning_rate": 2.7969924812030077e-05,
|
|
"loss": 0.3564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38198232650756836,
|
|
"step": 280,
|
|
"valid_targets_mean": 8438.1,
|
|
"valid_targets_min": 6326
|
|
},
|
|
{
|
|
"epoch": 0.5008787346221442,
|
|
"grad_norm": 0.8062671883298813,
|
|
"learning_rate": 2.8471177944862156e-05,
|
|
"loss": 0.3461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3398236632347107,
|
|
"step": 285,
|
|
"valid_targets_mean": 7585.6,
|
|
"valid_targets_min": 6803
|
|
},
|
|
{
|
|
"epoch": 0.5096660808435852,
|
|
"grad_norm": 0.5995976148755775,
|
|
"learning_rate": 2.8972431077694238e-05,
|
|
"loss": 0.3583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35415637493133545,
|
|
"step": 290,
|
|
"valid_targets_mean": 9409.5,
|
|
"valid_targets_min": 7583
|
|
},
|
|
{
|
|
"epoch": 0.5184534270650264,
|
|
"grad_norm": 0.49270556955808065,
|
|
"learning_rate": 2.9473684210526317e-05,
|
|
"loss": 0.3473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.332955002784729,
|
|
"step": 295,
|
|
"valid_targets_mean": 8608.6,
|
|
"valid_targets_min": 6638
|
|
},
|
|
{
|
|
"epoch": 0.5272407732864675,
|
|
"grad_norm": 0.6063298210454802,
|
|
"learning_rate": 2.99749373433584e-05,
|
|
"loss": 0.3463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35231274366378784,
|
|
"step": 300,
|
|
"valid_targets_mean": 7578.2,
|
|
"valid_targets_min": 6504
|
|
},
|
|
{
|
|
"epoch": 0.5360281195079086,
|
|
"grad_norm": 0.6270112520829497,
|
|
"learning_rate": 3.047619047619048e-05,
|
|
"loss": 0.3486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3481753170490265,
|
|
"step": 305,
|
|
"valid_targets_mean": 7567.4,
|
|
"valid_targets_min": 6744
|
|
},
|
|
{
|
|
"epoch": 0.5448154657293497,
|
|
"grad_norm": 0.5807218083467116,
|
|
"learning_rate": 3.097744360902256e-05,
|
|
"loss": 0.35,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3547857403755188,
|
|
"step": 310,
|
|
"valid_targets_mean": 7679.1,
|
|
"valid_targets_min": 6530
|
|
},
|
|
{
|
|
"epoch": 0.5536028119507909,
|
|
"grad_norm": 0.5797707420078969,
|
|
"learning_rate": 3.147869674185464e-05,
|
|
"loss": 0.3523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3483213782310486,
|
|
"step": 315,
|
|
"valid_targets_mean": 7858.9,
|
|
"valid_targets_min": 7052
|
|
},
|
|
{
|
|
"epoch": 0.562390158172232,
|
|
"grad_norm": 0.5659965873691432,
|
|
"learning_rate": 3.197994987468672e-05,
|
|
"loss": 0.3397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32356488704681396,
|
|
"step": 320,
|
|
"valid_targets_mean": 8695.8,
|
|
"valid_targets_min": 7163
|
|
},
|
|
{
|
|
"epoch": 0.5711775043936731,
|
|
"grad_norm": 0.549458311776311,
|
|
"learning_rate": 3.24812030075188e-05,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3535029888153076,
|
|
"step": 325,
|
|
"valid_targets_mean": 7525.9,
|
|
"valid_targets_min": 6717
|
|
},
|
|
{
|
|
"epoch": 0.5799648506151143,
|
|
"grad_norm": 0.6488259689086234,
|
|
"learning_rate": 3.298245614035088e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33021873235702515,
|
|
"step": 330,
|
|
"valid_targets_mean": 8136.8,
|
|
"valid_targets_min": 7100
|
|
},
|
|
{
|
|
"epoch": 0.5887521968365553,
|
|
"grad_norm": 0.6190982787223087,
|
|
"learning_rate": 3.3483709273182956e-05,
|
|
"loss": 0.3433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3612127900123596,
|
|
"step": 335,
|
|
"valid_targets_mean": 7711.2,
|
|
"valid_targets_min": 6996
|
|
},
|
|
{
|
|
"epoch": 0.5975395430579965,
|
|
"grad_norm": 0.7134611976932143,
|
|
"learning_rate": 3.398496240601504e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32457542419433594,
|
|
"step": 340,
|
|
"valid_targets_mean": 7711.4,
|
|
"valid_targets_min": 6467
|
|
},
|
|
{
|
|
"epoch": 0.6063268892794376,
|
|
"grad_norm": 0.4825474653552003,
|
|
"learning_rate": 3.448621553884712e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33146989345550537,
|
|
"step": 345,
|
|
"valid_targets_mean": 8664.6,
|
|
"valid_targets_min": 6905
|
|
},
|
|
{
|
|
"epoch": 0.6151142355008787,
|
|
"grad_norm": 0.6066546859254534,
|
|
"learning_rate": 3.49874686716792e-05,
|
|
"loss": 0.336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3363463878631592,
|
|
"step": 350,
|
|
"valid_targets_mean": 7706.2,
|
|
"valid_targets_min": 6749
|
|
},
|
|
{
|
|
"epoch": 0.6239015817223199,
|
|
"grad_norm": 0.5358291220456245,
|
|
"learning_rate": 3.548872180451128e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35363635420799255,
|
|
"step": 355,
|
|
"valid_targets_mean": 8824.1,
|
|
"valid_targets_min": 6443
|
|
},
|
|
{
|
|
"epoch": 0.632688927943761,
|
|
"grad_norm": 0.5400754436186094,
|
|
"learning_rate": 3.5989974937343364e-05,
|
|
"loss": 0.3357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33272236585617065,
|
|
"step": 360,
|
|
"valid_targets_mean": 7586.1,
|
|
"valid_targets_min": 6781
|
|
},
|
|
{
|
|
"epoch": 0.6414762741652021,
|
|
"grad_norm": 0.5505762025043917,
|
|
"learning_rate": 3.649122807017544e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.337479829788208,
|
|
"step": 365,
|
|
"valid_targets_mean": 7690.2,
|
|
"valid_targets_min": 7086
|
|
},
|
|
{
|
|
"epoch": 0.6502636203866432,
|
|
"grad_norm": 0.5973982759050109,
|
|
"learning_rate": 3.699248120300752e-05,
|
|
"loss": 0.3412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35638201236724854,
|
|
"step": 370,
|
|
"valid_targets_mean": 7434.1,
|
|
"valid_targets_min": 6424
|
|
},
|
|
{
|
|
"epoch": 0.6590509666080844,
|
|
"grad_norm": 0.5693931599659594,
|
|
"learning_rate": 3.74937343358396e-05,
|
|
"loss": 0.3424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.338626891374588,
|
|
"step": 375,
|
|
"valid_targets_mean": 7283.1,
|
|
"valid_targets_min": 3803
|
|
},
|
|
{
|
|
"epoch": 0.6678383128295254,
|
|
"grad_norm": 0.5971595429698976,
|
|
"learning_rate": 3.799498746867168e-05,
|
|
"loss": 0.3445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3575047552585602,
|
|
"step": 380,
|
|
"valid_targets_mean": 7449.1,
|
|
"valid_targets_min": 6542
|
|
},
|
|
{
|
|
"epoch": 0.6766256590509666,
|
|
"grad_norm": 0.5578406633304958,
|
|
"learning_rate": 3.849624060150376e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32988211512565613,
|
|
"step": 385,
|
|
"valid_targets_mean": 7770.6,
|
|
"valid_targets_min": 6719
|
|
},
|
|
{
|
|
"epoch": 0.6854130052724078,
|
|
"grad_norm": 0.5668191415205301,
|
|
"learning_rate": 3.8997493734335845e-05,
|
|
"loss": 0.3394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3269059956073761,
|
|
"step": 390,
|
|
"valid_targets_mean": 7783.9,
|
|
"valid_targets_min": 6911
|
|
},
|
|
{
|
|
"epoch": 0.6942003514938488,
|
|
"grad_norm": 0.5498326107964799,
|
|
"learning_rate": 3.949874686716792e-05,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32613101601600647,
|
|
"step": 395,
|
|
"valid_targets_mean": 7709.3,
|
|
"valid_targets_min": 6875
|
|
},
|
|
{
|
|
"epoch": 0.70298769771529,
|
|
"grad_norm": 0.5545998996247578,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.335163950920105,
|
|
"step": 400,
|
|
"valid_targets_mean": 7833.9,
|
|
"valid_targets_min": 7032
|
|
},
|
|
{
|
|
"epoch": 0.7117750439367311,
|
|
"grad_norm": 0.5467208617580587,
|
|
"learning_rate": 3.999980791075177e-05,
|
|
"loss": 0.3326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3213801980018616,
|
|
"step": 405,
|
|
"valid_targets_mean": 7722.5,
|
|
"valid_targets_min": 7033
|
|
},
|
|
{
|
|
"epoch": 0.7205623901581723,
|
|
"grad_norm": 0.5385613574993566,
|
|
"learning_rate": 3.999923164669689e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3487439751625061,
|
|
"step": 410,
|
|
"valid_targets_mean": 7201.6,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 0.7293497363796133,
|
|
"grad_norm": 0.5542392343867473,
|
|
"learning_rate": 3.9998271218904776e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3364104628562927,
|
|
"step": 415,
|
|
"valid_targets_mean": 7159.6,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 0.7381370826010545,
|
|
"grad_norm": 0.5169891050813411,
|
|
"learning_rate": 3.999692664582422e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32832974195480347,
|
|
"step": 420,
|
|
"valid_targets_mean": 7732.6,
|
|
"valid_targets_min": 7105
|
|
},
|
|
{
|
|
"epoch": 0.7469244288224957,
|
|
"grad_norm": 0.5031414610488824,
|
|
"learning_rate": 3.999519795328302e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33884960412979126,
|
|
"step": 425,
|
|
"valid_targets_mean": 9959.8,
|
|
"valid_targets_min": 7246
|
|
},
|
|
{
|
|
"epoch": 0.7557117750439367,
|
|
"grad_norm": 0.5026124113090491,
|
|
"learning_rate": 3.9993085174487494e-05,
|
|
"loss": 0.3297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32484903931617737,
|
|
"step": 430,
|
|
"valid_targets_mean": 8865.3,
|
|
"valid_targets_min": 6744
|
|
},
|
|
{
|
|
"epoch": 0.7644991212653779,
|
|
"grad_norm": 0.4817890109970856,
|
|
"learning_rate": 3.999058835002187e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3292863368988037,
|
|
"step": 435,
|
|
"valid_targets_mean": 7735.2,
|
|
"valid_targets_min": 7043
|
|
},
|
|
{
|
|
"epoch": 0.773286467486819,
|
|
"grad_norm": 0.4936188500345479,
|
|
"learning_rate": 3.998770752784745e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3398468494415283,
|
|
"step": 440,
|
|
"valid_targets_mean": 8204.1,
|
|
"valid_targets_min": 5811
|
|
},
|
|
{
|
|
"epoch": 0.7820738137082601,
|
|
"grad_norm": 0.6464708805528303,
|
|
"learning_rate": 3.998444276330172e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325589656829834,
|
|
"step": 445,
|
|
"valid_targets_mean": 7733.5,
|
|
"valid_targets_min": 6404
|
|
},
|
|
{
|
|
"epoch": 0.7908611599297012,
|
|
"grad_norm": 0.5662160775112339,
|
|
"learning_rate": 3.998079411909731e-05,
|
|
"loss": 0.3309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3355258107185364,
|
|
"step": 450,
|
|
"valid_targets_mean": 7588.7,
|
|
"valid_targets_min": 6381
|
|
},
|
|
{
|
|
"epoch": 0.7996485061511424,
|
|
"grad_norm": 0.5147294588993504,
|
|
"learning_rate": 3.997676166532076e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.331130713224411,
|
|
"step": 455,
|
|
"valid_targets_mean": 7780.9,
|
|
"valid_targets_min": 6994
|
|
},
|
|
{
|
|
"epoch": 0.8084358523725835,
|
|
"grad_norm": 0.48151422115917686,
|
|
"learning_rate": 3.997234547943116e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3537714183330536,
|
|
"step": 460,
|
|
"valid_targets_mean": 7668.2,
|
|
"valid_targets_min": 6956
|
|
},
|
|
{
|
|
"epoch": 0.8172231985940246,
|
|
"grad_norm": 0.5152148119091646,
|
|
"learning_rate": 3.9967545646258684e-05,
|
|
"loss": 0.3301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34110844135284424,
|
|
"step": 465,
|
|
"valid_targets_mean": 7722.5,
|
|
"valid_targets_min": 7138
|
|
},
|
|
{
|
|
"epoch": 0.8260105448154658,
|
|
"grad_norm": 0.5327181478773625,
|
|
"learning_rate": 3.996236225800298e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3305248022079468,
|
|
"step": 470,
|
|
"valid_targets_mean": 7872.7,
|
|
"valid_targets_min": 7232
|
|
},
|
|
{
|
|
"epoch": 0.8347978910369068,
|
|
"grad_norm": 0.4832077772333212,
|
|
"learning_rate": 3.995679541423137e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3223893642425537,
|
|
"step": 475,
|
|
"valid_targets_mean": 8212.9,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 0.843585237258348,
|
|
"grad_norm": 0.5352684147985118,
|
|
"learning_rate": 3.9950845221876926e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3198257386684418,
|
|
"step": 480,
|
|
"valid_targets_mean": 7823.2,
|
|
"valid_targets_min": 6958
|
|
},
|
|
{
|
|
"epoch": 0.8523725834797891,
|
|
"grad_norm": 0.5561370729488068,
|
|
"learning_rate": 3.994451179523644e-05,
|
|
"loss": 0.336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3468901515007019,
|
|
"step": 485,
|
|
"valid_targets_mean": 7766.8,
|
|
"valid_targets_min": 7054
|
|
},
|
|
{
|
|
"epoch": 0.8611599297012302,
|
|
"grad_norm": 0.49490216854159513,
|
|
"learning_rate": 3.993779525596824e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325663298368454,
|
|
"step": 490,
|
|
"valid_targets_mean": 7161.1,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 0.8699472759226714,
|
|
"grad_norm": 0.5556201651984429,
|
|
"learning_rate": 3.993069573308982e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3325248956680298,
|
|
"step": 495,
|
|
"valid_targets_mean": 7954.0,
|
|
"valid_targets_min": 6859
|
|
},
|
|
{
|
|
"epoch": 0.8787346221441125,
|
|
"grad_norm": 0.5767089291539206,
|
|
"learning_rate": 3.992321336297537e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32498735189437866,
|
|
"step": 500,
|
|
"valid_targets_mean": 7508.8,
|
|
"valid_targets_min": 6708
|
|
},
|
|
{
|
|
"epoch": 0.8875219683655536,
|
|
"grad_norm": 0.592345847494032,
|
|
"learning_rate": 3.99153482893532e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3253369629383087,
|
|
"step": 505,
|
|
"valid_targets_mean": 7774.0,
|
|
"valid_targets_min": 6744
|
|
},
|
|
{
|
|
"epoch": 0.8963093145869947,
|
|
"grad_norm": 0.5437305711608164,
|
|
"learning_rate": 3.99071006633029e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3150107264518738,
|
|
"step": 510,
|
|
"valid_targets_mean": 7838.1,
|
|
"valid_targets_min": 7349
|
|
},
|
|
{
|
|
"epoch": 0.9050966608084359,
|
|
"grad_norm": 0.6455180582172454,
|
|
"learning_rate": 3.9898470643252496e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31834477186203003,
|
|
"step": 515,
|
|
"valid_targets_mean": 7792.3,
|
|
"valid_targets_min": 7277
|
|
},
|
|
{
|
|
"epoch": 0.9138840070298769,
|
|
"grad_norm": 0.5860488049976041,
|
|
"learning_rate": 3.9889458394975404e-05,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3431106209754944,
|
|
"step": 520,
|
|
"valid_targets_mean": 7672.0,
|
|
"valid_targets_min": 6788
|
|
},
|
|
{
|
|
"epoch": 0.9226713532513181,
|
|
"grad_norm": 0.49869424349859637,
|
|
"learning_rate": 3.9880064091587226e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3385096788406372,
|
|
"step": 525,
|
|
"valid_targets_mean": 8874.5,
|
|
"valid_targets_min": 7049
|
|
},
|
|
{
|
|
"epoch": 0.9314586994727593,
|
|
"grad_norm": 0.4791798901114336,
|
|
"learning_rate": 3.9870287913542416e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3190639317035675,
|
|
"step": 530,
|
|
"valid_targets_mean": 7695.1,
|
|
"valid_targets_min": 7294
|
|
},
|
|
{
|
|
"epoch": 0.9402460456942003,
|
|
"grad_norm": 0.5268875896832551,
|
|
"learning_rate": 3.9860130048630855e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3270207345485687,
|
|
"step": 535,
|
|
"valid_targets_mean": 8432.9,
|
|
"valid_targets_min": 6766
|
|
},
|
|
{
|
|
"epoch": 0.9490333919156415,
|
|
"grad_norm": 0.5109366805942274,
|
|
"learning_rate": 3.9849590691974206e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33174222707748413,
|
|
"step": 540,
|
|
"valid_targets_mean": 7830.2,
|
|
"valid_targets_min": 7068
|
|
},
|
|
{
|
|
"epoch": 0.9578207381370826,
|
|
"grad_norm": 0.5965366370776712,
|
|
"learning_rate": 3.9838670046022166e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3299826979637146,
|
|
"step": 545,
|
|
"valid_targets_mean": 7773.7,
|
|
"valid_targets_min": 7215
|
|
},
|
|
{
|
|
"epoch": 0.9666080843585237,
|
|
"grad_norm": 0.4847692573044287,
|
|
"learning_rate": 3.982736832054862e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3373975157737732,
|
|
"step": 550,
|
|
"valid_targets_mean": 7721.5,
|
|
"valid_targets_min": 6768
|
|
},
|
|
{
|
|
"epoch": 0.9753954305799648,
|
|
"grad_norm": 0.5236300498438478,
|
|
"learning_rate": 3.9815685732647556e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31755590438842773,
|
|
"step": 555,
|
|
"valid_targets_mean": 7896.3,
|
|
"valid_targets_min": 7133
|
|
},
|
|
{
|
|
"epoch": 0.984182776801406,
|
|
"grad_norm": 0.5487102288494853,
|
|
"learning_rate": 3.9803622506728916e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31796884536743164,
|
|
"step": 560,
|
|
"valid_targets_mean": 8345.5,
|
|
"valid_targets_min": 6772
|
|
},
|
|
{
|
|
"epoch": 0.9929701230228472,
|
|
"grad_norm": 0.48622322101501725,
|
|
"learning_rate": 3.9791178874514314e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005170524120331,
|
|
"step": 565,
|
|
"valid_targets_mean": 7888.6,
|
|
"valid_targets_min": 7338
|
|
},
|
|
{
|
|
"epoch": 1.0017574692442883,
|
|
"grad_norm": 0.5209897970025318,
|
|
"learning_rate": 3.977835507503254e-05,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32238900661468506,
|
|
"step": 570,
|
|
"valid_targets_mean": 9126.5,
|
|
"valid_targets_min": 7123
|
|
},
|
|
{
|
|
"epoch": 1.0105448154657293,
|
|
"grad_norm": 0.5277875114886403,
|
|
"learning_rate": 3.9765151354614995e-05,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3237850069999695,
|
|
"step": 575,
|
|
"valid_targets_mean": 7542.9,
|
|
"valid_targets_min": 6828
|
|
},
|
|
{
|
|
"epoch": 1.0193321616871704,
|
|
"grad_norm": 0.5597616301590367,
|
|
"learning_rate": 3.9751567966890946e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32237479090690613,
|
|
"step": 580,
|
|
"valid_targets_mean": 7596.3,
|
|
"valid_targets_min": 6688
|
|
},
|
|
{
|
|
"epoch": 1.0281195079086116,
|
|
"grad_norm": 0.5285554956078993,
|
|
"learning_rate": 3.973760517278268e-05,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3246050775051117,
|
|
"step": 585,
|
|
"valid_targets_mean": 7218.9,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 1.0369068541300528,
|
|
"grad_norm": 0.5016573118247876,
|
|
"learning_rate": 3.972326324050045e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3259458541870117,
|
|
"step": 590,
|
|
"valid_targets_mean": 7718.3,
|
|
"valid_targets_min": 6451
|
|
},
|
|
{
|
|
"epoch": 1.0456942003514937,
|
|
"grad_norm": 0.47518350285088584,
|
|
"learning_rate": 3.970854244553736e-05,
|
|
"loss": 0.3278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33282628655433655,
|
|
"step": 595,
|
|
"valid_targets_mean": 7674.4,
|
|
"valid_targets_min": 7189
|
|
},
|
|
{
|
|
"epoch": 1.054481546572935,
|
|
"grad_norm": 0.5009335948586953,
|
|
"learning_rate": 3.969344307066404e-05,
|
|
"loss": 0.3278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3331737816333771,
|
|
"step": 600,
|
|
"valid_targets_mean": 7871.4,
|
|
"valid_targets_min": 7054
|
|
},
|
|
{
|
|
"epoch": 1.063268892794376,
|
|
"grad_norm": 0.5651686519634432,
|
|
"learning_rate": 3.967796540592327e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3132637143135071,
|
|
"step": 605,
|
|
"valid_targets_mean": 7547.6,
|
|
"valid_targets_min": 6890
|
|
},
|
|
{
|
|
"epoch": 1.0720562390158173,
|
|
"grad_norm": 0.42436518651387334,
|
|
"learning_rate": 3.966210974862433e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30082017183303833,
|
|
"step": 610,
|
|
"valid_targets_mean": 7801.0,
|
|
"valid_targets_min": 6996
|
|
},
|
|
{
|
|
"epoch": 1.0808435852372584,
|
|
"grad_norm": 0.543738759441098,
|
|
"learning_rate": 3.964587640333736e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31432604789733887,
|
|
"step": 615,
|
|
"valid_targets_mean": 7765.2,
|
|
"valid_targets_min": 6892
|
|
},
|
|
{
|
|
"epoch": 1.0896309314586994,
|
|
"grad_norm": 0.5181416198044555,
|
|
"learning_rate": 3.9629265681887456e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32652005553245544,
|
|
"step": 620,
|
|
"valid_targets_mean": 7536.4,
|
|
"valid_targets_min": 5784
|
|
},
|
|
{
|
|
"epoch": 1.0984182776801406,
|
|
"grad_norm": 0.5140276582122142,
|
|
"learning_rate": 3.961227790334872e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31211698055267334,
|
|
"step": 625,
|
|
"valid_targets_mean": 7735.3,
|
|
"valid_targets_min": 7001
|
|
},
|
|
{
|
|
"epoch": 1.1072056239015817,
|
|
"grad_norm": 0.5282119837909162,
|
|
"learning_rate": 3.959491339403814e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33794674277305603,
|
|
"step": 630,
|
|
"valid_targets_mean": 7889.0,
|
|
"valid_targets_min": 6956
|
|
},
|
|
{
|
|
"epoch": 1.115992970123023,
|
|
"grad_norm": 0.47248502297977735,
|
|
"learning_rate": 3.957717248750923e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3132312595844269,
|
|
"step": 635,
|
|
"valid_targets_mean": 9497.9,
|
|
"valid_targets_min": 7402
|
|
},
|
|
{
|
|
"epoch": 1.124780316344464,
|
|
"grad_norm": 0.6004536131705314,
|
|
"learning_rate": 3.9559055524545755e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32647284865379333,
|
|
"step": 640,
|
|
"valid_targets_mean": 7229.4,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 1.133567662565905,
|
|
"grad_norm": 0.6010689936463117,
|
|
"learning_rate": 3.9540562853155086e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3334662914276123,
|
|
"step": 645,
|
|
"valid_targets_mean": 7780.3,
|
|
"valid_targets_min": 6782
|
|
},
|
|
{
|
|
"epoch": 1.1423550087873462,
|
|
"grad_norm": 0.4885332419281782,
|
|
"learning_rate": 3.9521694828561566e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3191852867603302,
|
|
"step": 650,
|
|
"valid_targets_mean": 7669.9,
|
|
"valid_targets_min": 7189
|
|
},
|
|
{
|
|
"epoch": 1.1511423550087874,
|
|
"grad_norm": 0.5719088465025136,
|
|
"learning_rate": 3.950245181319965e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3156038522720337,
|
|
"step": 655,
|
|
"valid_targets_mean": 7323.6,
|
|
"valid_targets_min": 4561
|
|
},
|
|
{
|
|
"epoch": 1.1599297012302285,
|
|
"grad_norm": 0.48450178709647856,
|
|
"learning_rate": 3.948283417670698e-05,
|
|
"loss": 0.3228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31226205825805664,
|
|
"step": 660,
|
|
"valid_targets_mean": 7599.6,
|
|
"valid_targets_min": 6959
|
|
},
|
|
{
|
|
"epoch": 1.1687170474516697,
|
|
"grad_norm": 0.5336121025006089,
|
|
"learning_rate": 3.9462842295917254e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.320054292678833,
|
|
"step": 665,
|
|
"valid_targets_mean": 7689.0,
|
|
"valid_targets_min": 6442
|
|
},
|
|
{
|
|
"epoch": 1.1775043936731107,
|
|
"grad_norm": 0.5168172619234773,
|
|
"learning_rate": 3.944247655485302e-05,
|
|
"loss": 0.3147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3125112056732178,
|
|
"step": 670,
|
|
"valid_targets_mean": 7615.4,
|
|
"valid_targets_min": 7042
|
|
},
|
|
{
|
|
"epoch": 1.1862917398945518,
|
|
"grad_norm": 0.5306935722278405,
|
|
"learning_rate": 3.942173734471825e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3204430341720581,
|
|
"step": 675,
|
|
"valid_targets_mean": 7598.4,
|
|
"valid_targets_min": 6504
|
|
},
|
|
{
|
|
"epoch": 1.195079086115993,
|
|
"grad_norm": 0.49391490325677384,
|
|
"learning_rate": 3.940062506389089e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32571330666542053,
|
|
"step": 680,
|
|
"valid_targets_mean": 7688.0,
|
|
"valid_targets_min": 6977
|
|
},
|
|
{
|
|
"epoch": 1.2038664323374342,
|
|
"grad_norm": 0.4931314528962855,
|
|
"learning_rate": 3.937914011791513e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33908209204673767,
|
|
"step": 685,
|
|
"valid_targets_mean": 8584.7,
|
|
"valid_targets_min": 7423
|
|
},
|
|
{
|
|
"epoch": 1.2126537785588751,
|
|
"grad_norm": 0.48655008582263876,
|
|
"learning_rate": 3.9357282919493717e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32384777069091797,
|
|
"step": 690,
|
|
"valid_targets_mean": 7632.4,
|
|
"valid_targets_min": 5869
|
|
},
|
|
{
|
|
"epoch": 1.2214411247803163,
|
|
"grad_norm": 0.500738194693758,
|
|
"learning_rate": 3.93350538884799e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33496755361557007,
|
|
"step": 695,
|
|
"valid_targets_mean": 7711.9,
|
|
"valid_targets_min": 6950
|
|
},
|
|
{
|
|
"epoch": 1.2302284710017575,
|
|
"grad_norm": 0.5352717845302437,
|
|
"learning_rate": 3.931245345186949e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30350565910339355,
|
|
"step": 700,
|
|
"valid_targets_mean": 8455.4,
|
|
"valid_targets_min": 6773
|
|
},
|
|
{
|
|
"epoch": 1.2390158172231986,
|
|
"grad_norm": 0.4775215505295606,
|
|
"learning_rate": 3.928948204379256e-05,
|
|
"loss": 0.3206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3287331461906433,
|
|
"step": 705,
|
|
"valid_targets_mean": 7821.4,
|
|
"valid_targets_min": 7257
|
|
},
|
|
{
|
|
"epoch": 1.2478031634446398,
|
|
"grad_norm": 0.4829129399145732,
|
|
"learning_rate": 3.926614010550516e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3171924352645874,
|
|
"step": 710,
|
|
"valid_targets_mean": 7675.7,
|
|
"valid_targets_min": 6679
|
|
},
|
|
{
|
|
"epoch": 1.2565905096660808,
|
|
"grad_norm": 0.4483991183139604,
|
|
"learning_rate": 3.924242808538083e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31392353773117065,
|
|
"step": 715,
|
|
"valid_targets_mean": 7726.5,
|
|
"valid_targets_min": 6153
|
|
},
|
|
{
|
|
"epoch": 1.265377855887522,
|
|
"grad_norm": 0.5014168550769796,
|
|
"learning_rate": 3.9218346438901996e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.317163348197937,
|
|
"step": 720,
|
|
"valid_targets_mean": 7591.8,
|
|
"valid_targets_min": 6396
|
|
},
|
|
{
|
|
"epoch": 1.2741652021089631,
|
|
"grad_norm": 0.5122275835888518,
|
|
"learning_rate": 3.9193895628651174e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3096388876438141,
|
|
"step": 725,
|
|
"valid_targets_mean": 7610.3,
|
|
"valid_targets_min": 6982
|
|
},
|
|
{
|
|
"epoch": 1.2829525483304043,
|
|
"grad_norm": 0.48739308701526257,
|
|
"learning_rate": 3.916907612430215e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3071346879005432,
|
|
"step": 730,
|
|
"valid_targets_mean": 8241.4,
|
|
"valid_targets_min": 7084
|
|
},
|
|
{
|
|
"epoch": 1.2917398945518452,
|
|
"grad_norm": 0.5284650604703849,
|
|
"learning_rate": 3.914388840261092e-05,
|
|
"loss": 0.323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3126200735569,
|
|
"step": 735,
|
|
"valid_targets_mean": 7693.7,
|
|
"valid_targets_min": 6791
|
|
},
|
|
{
|
|
"epoch": 1.3005272407732864,
|
|
"grad_norm": 0.4857415987362,
|
|
"learning_rate": 3.911833294740653e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.328471302986145,
|
|
"step": 740,
|
|
"valid_targets_mean": 7574.6,
|
|
"valid_targets_min": 5191
|
|
},
|
|
{
|
|
"epoch": 1.3093145869947276,
|
|
"grad_norm": 0.41085987983419675,
|
|
"learning_rate": 3.9092410249581794e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30733928084373474,
|
|
"step": 745,
|
|
"valid_targets_mean": 7675.6,
|
|
"valid_targets_min": 6926
|
|
},
|
|
{
|
|
"epoch": 1.3181019332161688,
|
|
"grad_norm": 0.45850657958304913,
|
|
"learning_rate": 3.9066120807083875e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3096606433391571,
|
|
"step": 750,
|
|
"valid_targets_mean": 7721.5,
|
|
"valid_targets_min": 7048
|
|
},
|
|
{
|
|
"epoch": 1.32688927943761,
|
|
"grad_norm": 0.5218607509428387,
|
|
"learning_rate": 3.90394651249047e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3240182399749756,
|
|
"step": 755,
|
|
"valid_targets_mean": 7440.1,
|
|
"valid_targets_min": 6248
|
|
},
|
|
{
|
|
"epoch": 1.335676625659051,
|
|
"grad_norm": 0.5226476860028977,
|
|
"learning_rate": 3.901244371507126e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3309449553489685,
|
|
"step": 760,
|
|
"valid_targets_mean": 7491.1,
|
|
"valid_targets_min": 6608
|
|
},
|
|
{
|
|
"epoch": 1.344463971880492,
|
|
"grad_norm": 0.48601085019051943,
|
|
"learning_rate": 3.898505709663578e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3042697310447693,
|
|
"step": 765,
|
|
"valid_targets_mean": 7941.6,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.3532513181019332,
|
|
"grad_norm": 0.5192638452483873,
|
|
"learning_rate": 3.895730579566576e-05,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.335843950510025,
|
|
"step": 770,
|
|
"valid_targets_mean": 8004.8,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 1.3620386643233744,
|
|
"grad_norm": 0.49021475730604136,
|
|
"learning_rate": 3.892919034523385e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31338605284690857,
|
|
"step": 775,
|
|
"valid_targets_mean": 7635.1,
|
|
"valid_targets_min": 3605
|
|
},
|
|
{
|
|
"epoch": 1.3708260105448153,
|
|
"grad_norm": 0.4732842882172017,
|
|
"learning_rate": 3.8900711285407625e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34282398223876953,
|
|
"step": 780,
|
|
"valid_targets_mean": 7690.4,
|
|
"valid_targets_min": 6795
|
|
},
|
|
{
|
|
"epoch": 1.3796133567662565,
|
|
"grad_norm": 0.43326778052686593,
|
|
"learning_rate": 3.887186916323921e-05,
|
|
"loss": 0.3223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3298339247703552,
|
|
"step": 785,
|
|
"valid_targets_mean": 9189.6,
|
|
"valid_targets_min": 7063
|
|
},
|
|
{
|
|
"epoch": 1.3884007029876977,
|
|
"grad_norm": 0.4569354721607761,
|
|
"learning_rate": 3.884266453275476e-05,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31393471360206604,
|
|
"step": 790,
|
|
"valid_targets_mean": 7767.4,
|
|
"valid_targets_min": 6796
|
|
},
|
|
{
|
|
"epoch": 1.3971880492091389,
|
|
"grad_norm": 0.41395577038033143,
|
|
"learning_rate": 3.8813097954943814e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2988349199295044,
|
|
"step": 795,
|
|
"valid_targets_mean": 7797.6,
|
|
"valid_targets_min": 6788
|
|
},
|
|
{
|
|
"epoch": 1.40597539543058,
|
|
"grad_norm": 0.44276378335869104,
|
|
"learning_rate": 3.878316999774856e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3206707239151001,
|
|
"step": 800,
|
|
"valid_targets_mean": 8609.4,
|
|
"valid_targets_min": 6835
|
|
},
|
|
{
|
|
"epoch": 1.4147627416520212,
|
|
"grad_norm": 0.4785714205137601,
|
|
"learning_rate": 3.8752881236052864e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3262031674385071,
|
|
"step": 805,
|
|
"valid_targets_mean": 7643.6,
|
|
"valid_targets_min": 7144
|
|
},
|
|
{
|
|
"epoch": 1.4235500878734622,
|
|
"grad_norm": 0.4654192860411442,
|
|
"learning_rate": 3.872223225167129e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31912875175476074,
|
|
"step": 810,
|
|
"valid_targets_mean": 8542.4,
|
|
"valid_targets_min": 6823
|
|
},
|
|
{
|
|
"epoch": 1.4323374340949033,
|
|
"grad_norm": 0.4566427062230126,
|
|
"learning_rate": 3.8691223633337854e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31717827916145325,
|
|
"step": 815,
|
|
"valid_targets_mean": 8748.4,
|
|
"valid_targets_min": 7035
|
|
},
|
|
{
|
|
"epoch": 1.4411247803163445,
|
|
"grad_norm": 0.5065259706681324,
|
|
"learning_rate": 3.865985597669478e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32165688276290894,
|
|
"step": 820,
|
|
"valid_targets_mean": 8167.2,
|
|
"valid_targets_min": 6935
|
|
},
|
|
{
|
|
"epoch": 1.4499121265377855,
|
|
"grad_norm": 0.5235301993054804,
|
|
"learning_rate": 3.8628129884281034e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32480478286743164,
|
|
"step": 825,
|
|
"valid_targets_mean": 7586.2,
|
|
"valid_targets_min": 6637
|
|
},
|
|
{
|
|
"epoch": 1.4586994727592266,
|
|
"grad_norm": 0.45764897709254765,
|
|
"learning_rate": 3.859604596552074e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31234073638916016,
|
|
"step": 830,
|
|
"valid_targets_mean": 8294.1,
|
|
"valid_targets_min": 4643
|
|
},
|
|
{
|
|
"epoch": 1.4674868189806678,
|
|
"grad_norm": 0.4819891269065734,
|
|
"learning_rate": 3.8563604836711474e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3018292188644409,
|
|
"step": 835,
|
|
"valid_targets_mean": 7926.1,
|
|
"valid_targets_min": 7208
|
|
},
|
|
{
|
|
"epoch": 1.476274165202109,
|
|
"grad_norm": 0.4548945218238035,
|
|
"learning_rate": 3.8530807121012446e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30471688508987427,
|
|
"step": 840,
|
|
"valid_targets_mean": 7810.9,
|
|
"valid_targets_min": 7383
|
|
},
|
|
{
|
|
"epoch": 1.4850615114235501,
|
|
"grad_norm": 0.5069039765101178,
|
|
"learning_rate": 3.849765344843251e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32281768321990967,
|
|
"step": 845,
|
|
"valid_targets_mean": 7744.0,
|
|
"valid_targets_min": 6792
|
|
},
|
|
{
|
|
"epoch": 1.4938488576449913,
|
|
"grad_norm": 0.4353328346005066,
|
|
"learning_rate": 3.8464144455818065e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31245318055152893,
|
|
"step": 850,
|
|
"valid_targets_mean": 8749.6,
|
|
"valid_targets_min": 6404
|
|
},
|
|
{
|
|
"epoch": 1.5026362038664325,
|
|
"grad_norm": 0.5154934910092297,
|
|
"learning_rate": 3.8430280786840844e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3092312216758728,
|
|
"step": 855,
|
|
"valid_targets_mean": 7474.7,
|
|
"valid_targets_min": 6443
|
|
},
|
|
{
|
|
"epoch": 1.5114235500878734,
|
|
"grad_norm": 0.4717707322137572,
|
|
"learning_rate": 3.83960630919855e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33252573013305664,
|
|
"step": 860,
|
|
"valid_targets_mean": 7726.3,
|
|
"valid_targets_min": 6303
|
|
},
|
|
{
|
|
"epoch": 1.5202108963093146,
|
|
"grad_norm": 0.4285070301543767,
|
|
"learning_rate": 3.836149202853718e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3279268443584442,
|
|
"step": 865,
|
|
"valid_targets_mean": 7570.2,
|
|
"valid_targets_min": 6998
|
|
},
|
|
{
|
|
"epoch": 1.5289982425307556,
|
|
"grad_norm": 0.5108276009439777,
|
|
"learning_rate": 3.8326568260568835e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33991026878356934,
|
|
"step": 870,
|
|
"valid_targets_mean": 7096.4,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 1.5377855887521967,
|
|
"grad_norm": 0.5141325486455165,
|
|
"learning_rate": 3.82912924589285e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30574706196784973,
|
|
"step": 875,
|
|
"valid_targets_mean": 7673.6,
|
|
"valid_targets_min": 6749
|
|
},
|
|
{
|
|
"epoch": 1.546572934973638,
|
|
"grad_norm": 0.47625262382472705,
|
|
"learning_rate": 3.825566530122638e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31097936630249023,
|
|
"step": 880,
|
|
"valid_targets_mean": 7730.6,
|
|
"valid_targets_min": 6901
|
|
},
|
|
{
|
|
"epoch": 1.555360281195079,
|
|
"grad_norm": 0.5075543716763415,
|
|
"learning_rate": 3.8219687471821896e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3260986804962158,
|
|
"step": 885,
|
|
"valid_targets_mean": 7240.1,
|
|
"valid_targets_min": 2559
|
|
},
|
|
{
|
|
"epoch": 1.5641476274165202,
|
|
"grad_norm": 0.4684436719897207,
|
|
"learning_rate": 3.818335966181045e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3189481496810913,
|
|
"step": 890,
|
|
"valid_targets_mean": 7754.8,
|
|
"valid_targets_min": 6983
|
|
},
|
|
{
|
|
"epoch": 1.5729349736379614,
|
|
"grad_norm": 0.4656673097955027,
|
|
"learning_rate": 3.814668256901022e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31112608313560486,
|
|
"step": 895,
|
|
"valid_targets_mean": 7850.2,
|
|
"valid_targets_min": 6723
|
|
},
|
|
{
|
|
"epoch": 1.5817223198594026,
|
|
"grad_norm": 0.4384250396130361,
|
|
"learning_rate": 3.8109656897948726e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31039461493492126,
|
|
"step": 900,
|
|
"valid_targets_mean": 7324.2,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 1.5905096660808435,
|
|
"grad_norm": 0.44267159794679833,
|
|
"learning_rate": 3.80722833598493e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3088935613632202,
|
|
"step": 905,
|
|
"valid_targets_mean": 7892.1,
|
|
"valid_targets_min": 6749
|
|
},
|
|
{
|
|
"epoch": 1.5992970123022847,
|
|
"grad_norm": 0.47691558808071355,
|
|
"learning_rate": 3.803456267261742e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3185274600982666,
|
|
"step": 910,
|
|
"valid_targets_mean": 7762.7,
|
|
"valid_targets_min": 7394
|
|
},
|
|
{
|
|
"epoch": 1.6080843585237259,
|
|
"grad_norm": 0.4618900297805469,
|
|
"learning_rate": 3.799649556082694e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30390602350234985,
|
|
"step": 915,
|
|
"valid_targets_mean": 7575.7,
|
|
"valid_targets_min": 6779
|
|
},
|
|
{
|
|
"epoch": 1.6168717047451668,
|
|
"grad_norm": 0.48363111062562336,
|
|
"learning_rate": 3.7958082755706135e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3203890323638916,
|
|
"step": 920,
|
|
"valid_targets_mean": 7784.1,
|
|
"valid_targets_min": 6591
|
|
},
|
|
{
|
|
"epoch": 1.625659050966608,
|
|
"grad_norm": 0.4401665133027058,
|
|
"learning_rate": 3.79193249951237e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.326861172914505,
|
|
"step": 925,
|
|
"valid_targets_mean": 7839.1,
|
|
"valid_targets_min": 7149
|
|
},
|
|
{
|
|
"epoch": 1.6344463971880492,
|
|
"grad_norm": 0.4244406950731562,
|
|
"learning_rate": 3.788022302357455e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3063619136810303,
|
|
"step": 930,
|
|
"valid_targets_mean": 8331.9,
|
|
"valid_targets_min": 6867
|
|
},
|
|
{
|
|
"epoch": 1.6432337434094904,
|
|
"grad_norm": 0.5065832005235931,
|
|
"learning_rate": 3.784077759216551e-05,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33527040481567383,
|
|
"step": 935,
|
|
"valid_targets_mean": 8369.5,
|
|
"valid_targets_min": 6595
|
|
},
|
|
{
|
|
"epoch": 1.6520210896309315,
|
|
"grad_norm": 0.38976211524282217,
|
|
"learning_rate": 3.7800989458600906e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2886279821395874,
|
|
"step": 940,
|
|
"valid_targets_mean": 7513.3,
|
|
"valid_targets_min": 2858
|
|
},
|
|
{
|
|
"epoch": 1.6608084358523727,
|
|
"grad_norm": 0.4624244101065446,
|
|
"learning_rate": 3.776085938716801e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3003271222114563,
|
|
"step": 945,
|
|
"valid_targets_mean": 7507.2,
|
|
"valid_targets_min": 6571
|
|
},
|
|
{
|
|
"epoch": 1.6695957820738139,
|
|
"grad_norm": 0.49790210807369195,
|
|
"learning_rate": 3.7720388148722336e-05,
|
|
"loss": 0.3157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3175293803215027,
|
|
"step": 950,
|
|
"valid_targets_mean": 8316.7,
|
|
"valid_targets_min": 6720
|
|
},
|
|
{
|
|
"epoch": 1.6783831282952548,
|
|
"grad_norm": 0.440482172085147,
|
|
"learning_rate": 3.7679576520672866e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30894142389297485,
|
|
"step": 955,
|
|
"valid_targets_mean": 7769.9,
|
|
"valid_targets_min": 6443
|
|
},
|
|
{
|
|
"epoch": 1.687170474516696,
|
|
"grad_norm": 0.4375270693805628,
|
|
"learning_rate": 3.76384252869671e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30674490332603455,
|
|
"step": 960,
|
|
"valid_targets_mean": 7664.5,
|
|
"valid_targets_min": 6381
|
|
},
|
|
{
|
|
"epoch": 1.695957820738137,
|
|
"grad_norm": 0.4268514111495579,
|
|
"learning_rate": 3.7596935238076e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31081438064575195,
|
|
"step": 965,
|
|
"valid_targets_mean": 7781.6,
|
|
"valid_targets_min": 7169
|
|
},
|
|
{
|
|
"epoch": 1.7047451669595781,
|
|
"grad_norm": 0.41848391215544556,
|
|
"learning_rate": 3.755510717097877e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3070789575576782,
|
|
"step": 970,
|
|
"valid_targets_mean": 7771.7,
|
|
"valid_targets_min": 6903
|
|
},
|
|
{
|
|
"epoch": 1.7135325131810193,
|
|
"grad_norm": 0.38149791836038105,
|
|
"learning_rate": 3.751294188914763e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30293750762939453,
|
|
"step": 975,
|
|
"valid_targets_mean": 8644.6,
|
|
"valid_targets_min": 6659
|
|
},
|
|
{
|
|
"epoch": 1.7223198594024605,
|
|
"grad_norm": 0.4107500940993902,
|
|
"learning_rate": 3.74704402025323e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.320509672164917,
|
|
"step": 980,
|
|
"valid_targets_mean": 7373.2,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 1.7311072056239016,
|
|
"grad_norm": 0.47456518227912814,
|
|
"learning_rate": 3.742760292754448e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3110986649990082,
|
|
"step": 985,
|
|
"valid_targets_mean": 7610.0,
|
|
"valid_targets_min": 6580
|
|
},
|
|
{
|
|
"epoch": 1.7398945518453428,
|
|
"grad_norm": 0.4804039533298582,
|
|
"learning_rate": 3.738443088704217e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3051522970199585,
|
|
"step": 990,
|
|
"valid_targets_mean": 7695.4,
|
|
"valid_targets_min": 6921
|
|
},
|
|
{
|
|
"epoch": 1.748681898066784,
|
|
"grad_norm": 0.413409711024688,
|
|
"learning_rate": 3.7340924910313856e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31139689683914185,
|
|
"step": 995,
|
|
"valid_targets_mean": 8237.9,
|
|
"valid_targets_min": 5829
|
|
},
|
|
{
|
|
"epoch": 1.757469244288225,
|
|
"grad_norm": 0.48349747289345335,
|
|
"learning_rate": 3.7297085833062567e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30862659215927124,
|
|
"step": 1000,
|
|
"valid_targets_mean": 7671.2,
|
|
"valid_targets_min": 6818
|
|
},
|
|
{
|
|
"epoch": 1.766256590509666,
|
|
"grad_norm": 0.4556312359385941,
|
|
"learning_rate": 3.725291449738984e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3119673728942871,
|
|
"step": 1005,
|
|
"valid_targets_mean": 7820.5,
|
|
"valid_targets_min": 6973
|
|
},
|
|
{
|
|
"epoch": 1.775043936731107,
|
|
"grad_norm": 0.4732905673565096,
|
|
"learning_rate": 3.720841175177954e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128761649131775,
|
|
"step": 1010,
|
|
"valid_targets_mean": 7487.6,
|
|
"valid_targets_min": 6949
|
|
},
|
|
{
|
|
"epoch": 1.7838312829525482,
|
|
"grad_norm": 0.4360920490955908,
|
|
"learning_rate": 3.716357845108157e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.312790185213089,
|
|
"step": 1015,
|
|
"valid_targets_mean": 7345.1,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 1.7926186291739894,
|
|
"grad_norm": 0.3929791909563157,
|
|
"learning_rate": 3.711841545649543e-05,
|
|
"loss": 0.313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3148069977760315,
|
|
"step": 1020,
|
|
"valid_targets_mean": 8261.2,
|
|
"valid_targets_min": 7082
|
|
},
|
|
{
|
|
"epoch": 1.8014059753954306,
|
|
"grad_norm": 0.48748675844958406,
|
|
"learning_rate": 3.707292363555368e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32383400201797485,
|
|
"step": 1025,
|
|
"valid_targets_mean": 7643.5,
|
|
"valid_targets_min": 6629
|
|
},
|
|
{
|
|
"epoch": 1.8101933216168717,
|
|
"grad_norm": 0.41484975733955637,
|
|
"learning_rate": 3.702710386210531e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31957483291625977,
|
|
"step": 1030,
|
|
"valid_targets_mean": 7626.9,
|
|
"valid_targets_min": 6961
|
|
},
|
|
{
|
|
"epoch": 1.818980667838313,
|
|
"grad_norm": 0.4464507451663518,
|
|
"learning_rate": 3.698095701629888e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.326063334941864,
|
|
"step": 1035,
|
|
"valid_targets_mean": 8113.2,
|
|
"valid_targets_min": 7451
|
|
},
|
|
{
|
|
"epoch": 1.827768014059754,
|
|
"grad_norm": 0.45364640899190156,
|
|
"learning_rate": 3.6934483984565684e-05,
|
|
"loss": 0.3138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3152083158493042,
|
|
"step": 1040,
|
|
"valid_targets_mean": 8248.0,
|
|
"valid_targets_min": 6075
|
|
},
|
|
{
|
|
"epoch": 1.836555360281195,
|
|
"grad_norm": 0.42820910363555187,
|
|
"learning_rate": 3.688768565960271e-05,
|
|
"loss": 0.3142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32458916306495667,
|
|
"step": 1045,
|
|
"valid_targets_mean": 8503.2,
|
|
"valid_targets_min": 7300
|
|
},
|
|
{
|
|
"epoch": 1.8453427065026362,
|
|
"grad_norm": 0.43586147124853236,
|
|
"learning_rate": 3.6840562940355456e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.304810106754303,
|
|
"step": 1050,
|
|
"valid_targets_mean": 7810.0,
|
|
"valid_targets_min": 6889
|
|
},
|
|
{
|
|
"epoch": 1.8541300527240774,
|
|
"grad_norm": 0.428268483581941,
|
|
"learning_rate": 3.679311673200069e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3349687457084656,
|
|
"step": 1055,
|
|
"valid_targets_mean": 8437.2,
|
|
"valid_targets_min": 6462
|
|
},
|
|
{
|
|
"epoch": 1.8629173989455183,
|
|
"grad_norm": 0.456980102686238,
|
|
"learning_rate": 3.674534794592907e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3136439919471741,
|
|
"step": 1060,
|
|
"valid_targets_mean": 7714.3,
|
|
"valid_targets_min": 7094
|
|
},
|
|
{
|
|
"epoch": 1.8717047451669595,
|
|
"grad_norm": 0.38144903531815577,
|
|
"learning_rate": 3.66972574997276e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29975107312202454,
|
|
"step": 1065,
|
|
"valid_targets_mean": 9710.9,
|
|
"valid_targets_min": 7367
|
|
},
|
|
{
|
|
"epoch": 1.8804920913884007,
|
|
"grad_norm": 0.43469010555959436,
|
|
"learning_rate": 3.664884631716207e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3089476227760315,
|
|
"step": 1070,
|
|
"valid_targets_mean": 8081.8,
|
|
"valid_targets_min": 6677
|
|
},
|
|
{
|
|
"epoch": 1.8892794376098418,
|
|
"grad_norm": 0.4334009291737787,
|
|
"learning_rate": 3.660011532815923e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3097480237483978,
|
|
"step": 1075,
|
|
"valid_targets_mean": 8850.0,
|
|
"valid_targets_min": 7388
|
|
},
|
|
{
|
|
"epoch": 1.898066783831283,
|
|
"grad_norm": 0.44431763802201296,
|
|
"learning_rate": 3.655106546878898e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3016049265861511,
|
|
"step": 1080,
|
|
"valid_targets_mean": 7862.3,
|
|
"valid_targets_min": 7087
|
|
},
|
|
{
|
|
"epoch": 1.9068541300527242,
|
|
"grad_norm": 0.41948895342338693,
|
|
"learning_rate": 3.6501697681246395e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30723023414611816,
|
|
"step": 1085,
|
|
"valid_targets_mean": 7580.1,
|
|
"valid_targets_min": 6580
|
|
},
|
|
{
|
|
"epoch": 1.9156414762741654,
|
|
"grad_norm": 0.4010740667676957,
|
|
"learning_rate": 3.645201291383359e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.311009019613266,
|
|
"step": 1090,
|
|
"valid_targets_mean": 7486.8,
|
|
"valid_targets_min": 5588
|
|
},
|
|
{
|
|
"epoch": 1.9244288224956063,
|
|
"grad_norm": 0.45122299388247816,
|
|
"learning_rate": 3.640201212094153e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3017200529575348,
|
|
"step": 1095,
|
|
"valid_targets_mean": 7640.5,
|
|
"valid_targets_min": 6763
|
|
},
|
|
{
|
|
"epoch": 1.9332161687170475,
|
|
"grad_norm": 0.48626565470320493,
|
|
"learning_rate": 3.635169626303168e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3185763955116272,
|
|
"step": 1100,
|
|
"valid_targets_mean": 7626.3,
|
|
"valid_targets_min": 6673
|
|
},
|
|
{
|
|
"epoch": 1.9420035149384884,
|
|
"grad_norm": 0.4048826995530806,
|
|
"learning_rate": 3.630106630661757e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30462396144866943,
|
|
"step": 1105,
|
|
"valid_targets_mean": 8426.6,
|
|
"valid_targets_min": 6835
|
|
},
|
|
{
|
|
"epoch": 1.9507908611599296,
|
|
"grad_norm": 0.416307359936687,
|
|
"learning_rate": 3.625012322424623e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30712780356407166,
|
|
"step": 1110,
|
|
"valid_targets_mean": 8117.8,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 1.9595782073813708,
|
|
"grad_norm": 0.3993336489052123,
|
|
"learning_rate": 3.619886799447951e-05,
|
|
"loss": 0.3147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3055635690689087,
|
|
"step": 1115,
|
|
"valid_targets_mean": 8382.2,
|
|
"valid_targets_min": 6900
|
|
},
|
|
{
|
|
"epoch": 1.968365553602812,
|
|
"grad_norm": 0.42615246784494065,
|
|
"learning_rate": 3.6147301601875254e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3055585026741028,
|
|
"step": 1120,
|
|
"valid_targets_mean": 8223.8,
|
|
"valid_targets_min": 6858
|
|
},
|
|
{
|
|
"epoch": 1.9771528998242531,
|
|
"grad_norm": 0.4490236584014622,
|
|
"learning_rate": 3.609542503696842e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31063467264175415,
|
|
"step": 1125,
|
|
"valid_targets_mean": 8464.8,
|
|
"valid_targets_min": 5375
|
|
},
|
|
{
|
|
"epoch": 1.9859402460456943,
|
|
"grad_norm": 0.42772252919816145,
|
|
"learning_rate": 3.6043239296252044e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3156036138534546,
|
|
"step": 1130,
|
|
"valid_targets_mean": 7510.9,
|
|
"valid_targets_min": 6567
|
|
},
|
|
{
|
|
"epoch": 1.9947275922671355,
|
|
"grad_norm": 0.39384984278326785,
|
|
"learning_rate": 3.59907453821581e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3085365295410156,
|
|
"step": 1135,
|
|
"valid_targets_mean": 7640.1,
|
|
"valid_targets_min": 6847
|
|
},
|
|
{
|
|
"epoch": 2.0035149384885766,
|
|
"grad_norm": 0.44225905296388024,
|
|
"learning_rate": 3.593794430303824e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31107211112976074,
|
|
"step": 1140,
|
|
"valid_targets_mean": 7554.0,
|
|
"valid_targets_min": 5191
|
|
},
|
|
{
|
|
"epoch": 2.0123022847100174,
|
|
"grad_norm": 0.4575942842423528,
|
|
"learning_rate": 3.588483707314442e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3130021095275879,
|
|
"step": 1145,
|
|
"valid_targets_mean": 8116.2,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 2.0210896309314585,
|
|
"grad_norm": 0.5030867471723578,
|
|
"learning_rate": 3.583142471260943e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3067258596420288,
|
|
"step": 1150,
|
|
"valid_targets_mean": 7758.1,
|
|
"valid_targets_min": 6711
|
|
},
|
|
{
|
|
"epoch": 2.0298769771528997,
|
|
"grad_norm": 0.39211236477001193,
|
|
"learning_rate": 3.577770824742728e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978590726852417,
|
|
"step": 1155,
|
|
"valid_targets_mean": 7327.9,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 2.038664323374341,
|
|
"grad_norm": 0.45256141444384423,
|
|
"learning_rate": 3.5723688709433514e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29615986347198486,
|
|
"step": 1160,
|
|
"valid_targets_mean": 7482.5,
|
|
"valid_targets_min": 5768
|
|
},
|
|
{
|
|
"epoch": 2.047451669595782,
|
|
"grad_norm": 0.46676125736458673,
|
|
"learning_rate": 3.566936713628538e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3241825997829437,
|
|
"step": 1165,
|
|
"valid_targets_mean": 9016.9,
|
|
"valid_targets_min": 6517
|
|
},
|
|
{
|
|
"epoch": 2.0562390158172232,
|
|
"grad_norm": 0.4187877737697206,
|
|
"learning_rate": 3.561474457144189e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3071352243423462,
|
|
"step": 1170,
|
|
"valid_targets_mean": 7480.2,
|
|
"valid_targets_min": 4237
|
|
},
|
|
{
|
|
"epoch": 2.0650263620386644,
|
|
"grad_norm": 0.3518349882625782,
|
|
"learning_rate": 3.55598220641438e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3100239038467407,
|
|
"step": 1175,
|
|
"valid_targets_mean": 8664.9,
|
|
"valid_targets_min": 6995
|
|
},
|
|
{
|
|
"epoch": 2.0738137082601056,
|
|
"grad_norm": 0.36943905423071605,
|
|
"learning_rate": 3.550460066939339e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29586344957351685,
|
|
"step": 1180,
|
|
"valid_targets_mean": 7798.0,
|
|
"valid_targets_min": 6979
|
|
},
|
|
{
|
|
"epoch": 2.0826010544815468,
|
|
"grad_norm": 0.42787395567576747,
|
|
"learning_rate": 3.544908144793431e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3031949996948242,
|
|
"step": 1185,
|
|
"valid_targets_mean": 7733.5,
|
|
"valid_targets_min": 6670
|
|
},
|
|
{
|
|
"epoch": 2.0913884007029875,
|
|
"grad_norm": 0.43552604464567035,
|
|
"learning_rate": 3.539326546623111e-05,
|
|
"loss": 0.3088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30863067507743835,
|
|
"step": 1190,
|
|
"valid_targets_mean": 7751.4,
|
|
"valid_targets_min": 6757
|
|
},
|
|
{
|
|
"epoch": 2.1001757469244287,
|
|
"grad_norm": 0.4294105847217134,
|
|
"learning_rate": 3.533715379644877e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29311901330947876,
|
|
"step": 1195,
|
|
"valid_targets_mean": 7684.4,
|
|
"valid_targets_min": 7058
|
|
},
|
|
{
|
|
"epoch": 2.10896309314587,
|
|
"grad_norm": 0.4207247429632229,
|
|
"learning_rate": 3.528074751643215e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2985104024410248,
|
|
"step": 1200,
|
|
"valid_targets_mean": 7601.2,
|
|
"valid_targets_min": 6562
|
|
},
|
|
{
|
|
"epoch": 2.117750439367311,
|
|
"grad_norm": 0.3732874962554062,
|
|
"learning_rate": 3.5224047709685236e-05,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3063611388206482,
|
|
"step": 1205,
|
|
"valid_targets_mean": 7761.4,
|
|
"valid_targets_min": 7049
|
|
},
|
|
{
|
|
"epoch": 2.126537785588752,
|
|
"grad_norm": 0.448230951970861,
|
|
"learning_rate": 3.516705546535036e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3147931694984436,
|
|
"step": 1210,
|
|
"valid_targets_mean": 7646.8,
|
|
"valid_targets_min": 6300
|
|
},
|
|
{
|
|
"epoch": 2.1353251318101933,
|
|
"grad_norm": 0.4834980094825441,
|
|
"learning_rate": 3.510977187818725e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965982258319855,
|
|
"step": 1215,
|
|
"valid_targets_mean": 7572.7,
|
|
"valid_targets_min": 6467
|
|
},
|
|
{
|
|
"epoch": 2.1441124780316345,
|
|
"grad_norm": 0.3844913906138571,
|
|
"learning_rate": 3.5052198048552034e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3124872148036957,
|
|
"step": 1220,
|
|
"valid_targets_mean": 8456.2,
|
|
"valid_targets_min": 7123
|
|
},
|
|
{
|
|
"epoch": 2.1528998242530757,
|
|
"grad_norm": 0.4471021182585841,
|
|
"learning_rate": 3.499433508237607e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3014095723628998,
|
|
"step": 1225,
|
|
"valid_targets_mean": 7605.4,
|
|
"valid_targets_min": 6890
|
|
},
|
|
{
|
|
"epoch": 2.161687170474517,
|
|
"grad_norm": 0.39860058420156974,
|
|
"learning_rate": 3.493618409114473e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.312033474445343,
|
|
"step": 1230,
|
|
"valid_targets_mean": 7788.5,
|
|
"valid_targets_min": 7179
|
|
},
|
|
{
|
|
"epoch": 2.1704745166959576,
|
|
"grad_norm": 0.4467572727911706,
|
|
"learning_rate": 3.487774619187603e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30640843510627747,
|
|
"step": 1235,
|
|
"valid_targets_mean": 7773.8,
|
|
"valid_targets_min": 7052
|
|
},
|
|
{
|
|
"epoch": 2.1792618629173988,
|
|
"grad_norm": 0.3997396765921527,
|
|
"learning_rate": 3.4819022507099184e-05,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31913310289382935,
|
|
"step": 1240,
|
|
"valid_targets_mean": 8639.9,
|
|
"valid_targets_min": 7285
|
|
},
|
|
{
|
|
"epoch": 2.18804920913884,
|
|
"grad_norm": 0.4000641776955986,
|
|
"learning_rate": 3.476001416483304e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31555551290512085,
|
|
"step": 1245,
|
|
"valid_targets_mean": 9298.2,
|
|
"valid_targets_min": 7236
|
|
},
|
|
{
|
|
"epoch": 2.196836555360281,
|
|
"grad_norm": 0.41208383607345306,
|
|
"learning_rate": 3.470072229856441e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31802457571029663,
|
|
"step": 1250,
|
|
"valid_targets_mean": 8020.3,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 2.2056239015817223,
|
|
"grad_norm": 0.4195318237967421,
|
|
"learning_rate": 3.4641148047226296e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073643445968628,
|
|
"step": 1255,
|
|
"valid_targets_mean": 7782.1,
|
|
"valid_targets_min": 6665
|
|
},
|
|
{
|
|
"epoch": 2.2144112478031635,
|
|
"grad_norm": 0.40213398462418887,
|
|
"learning_rate": 3.458129255517601e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3027478754520416,
|
|
"step": 1260,
|
|
"valid_targets_mean": 7899.5,
|
|
"valid_targets_min": 4269
|
|
},
|
|
{
|
|
"epoch": 2.2231985940246046,
|
|
"grad_norm": 0.42889700659379176,
|
|
"learning_rate": 3.45211569721732e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3163566589355469,
|
|
"step": 1265,
|
|
"valid_targets_mean": 7810.5,
|
|
"valid_targets_min": 7187
|
|
},
|
|
{
|
|
"epoch": 2.231985940246046,
|
|
"grad_norm": 0.46094118123987604,
|
|
"learning_rate": 3.446074245335775e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2999768853187561,
|
|
"step": 1270,
|
|
"valid_targets_mean": 7666.0,
|
|
"valid_targets_min": 7186
|
|
},
|
|
{
|
|
"epoch": 2.240773286467487,
|
|
"grad_norm": 0.3786878856595145,
|
|
"learning_rate": 3.440005015922764e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2929559350013733,
|
|
"step": 1275,
|
|
"valid_targets_mean": 7560.2,
|
|
"valid_targets_min": 6789
|
|
},
|
|
{
|
|
"epoch": 2.249560632688928,
|
|
"grad_norm": 0.40495935423491064,
|
|
"learning_rate": 3.433908125561655e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30050337314605713,
|
|
"step": 1280,
|
|
"valid_targets_mean": 8417.1,
|
|
"valid_targets_min": 5375
|
|
},
|
|
{
|
|
"epoch": 2.2583479789103693,
|
|
"grad_norm": 0.4933512810932113,
|
|
"learning_rate": 3.427783691367158e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3097958564758301,
|
|
"step": 1285,
|
|
"valid_targets_mean": 7632.8,
|
|
"valid_targets_min": 6582
|
|
},
|
|
{
|
|
"epoch": 2.26713532513181,
|
|
"grad_norm": 0.45052501289258173,
|
|
"learning_rate": 3.4216318309830695e-05,
|
|
"loss": 0.3098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31090953946113586,
|
|
"step": 1290,
|
|
"valid_targets_mean": 7552.1,
|
|
"valid_targets_min": 6602
|
|
},
|
|
{
|
|
"epoch": 2.275922671353251,
|
|
"grad_norm": 0.40827626046366344,
|
|
"learning_rate": 3.4154526625800136e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.316741943359375,
|
|
"step": 1295,
|
|
"valid_targets_mean": 8403.2,
|
|
"valid_targets_min": 7188
|
|
},
|
|
{
|
|
"epoch": 2.2847100175746924,
|
|
"grad_norm": 0.3901372214673228,
|
|
"learning_rate": 3.40924630485317e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291498601436615,
|
|
"step": 1300,
|
|
"valid_targets_mean": 8731.1,
|
|
"valid_targets_min": 6914
|
|
},
|
|
{
|
|
"epoch": 2.2934973637961336,
|
|
"grad_norm": 0.4607594936052616,
|
|
"learning_rate": 3.4030128770199986e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31470581889152527,
|
|
"step": 1305,
|
|
"valid_targets_mean": 7488.9,
|
|
"valid_targets_min": 6678
|
|
},
|
|
{
|
|
"epoch": 2.3022847100175747,
|
|
"grad_norm": 0.4353904042040713,
|
|
"learning_rate": 3.3967524988179463e-05,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3008217513561249,
|
|
"step": 1310,
|
|
"valid_targets_mean": 7801.6,
|
|
"valid_targets_min": 7101
|
|
},
|
|
{
|
|
"epoch": 2.311072056239016,
|
|
"grad_norm": 0.3982786148979641,
|
|
"learning_rate": 3.390465290502146e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3014106750488281,
|
|
"step": 1315,
|
|
"valid_targets_mean": 8752.8,
|
|
"valid_targets_min": 7398
|
|
},
|
|
{
|
|
"epoch": 2.319859402460457,
|
|
"grad_norm": 0.3931812519197794,
|
|
"learning_rate": 3.3841513728431116e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3188680410385132,
|
|
"step": 1320,
|
|
"valid_targets_mean": 8942.9,
|
|
"valid_targets_min": 7262
|
|
},
|
|
{
|
|
"epoch": 2.3286467486818982,
|
|
"grad_norm": 0.34925229293914345,
|
|
"learning_rate": 3.3778108671244117e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3098894953727722,
|
|
"step": 1325,
|
|
"valid_targets_mean": 9421.9,
|
|
"valid_targets_min": 7103
|
|
},
|
|
{
|
|
"epoch": 2.3374340949033394,
|
|
"grad_norm": 0.4079734971148553,
|
|
"learning_rate": 3.371443895140343e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31103235483169556,
|
|
"step": 1330,
|
|
"valid_targets_mean": 7780.8,
|
|
"valid_targets_min": 7072
|
|
},
|
|
{
|
|
"epoch": 2.34622144112478,
|
|
"grad_norm": 0.4403653941114043,
|
|
"learning_rate": 3.365050579193594e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30240386724472046,
|
|
"step": 1335,
|
|
"valid_targets_mean": 7246.3,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 2.3550087873462213,
|
|
"grad_norm": 0.4258857128840449,
|
|
"learning_rate": 3.3586310420928884e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3061680793762207,
|
|
"step": 1340,
|
|
"valid_targets_mean": 7177.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 2.3637961335676625,
|
|
"grad_norm": 0.39912825352364323,
|
|
"learning_rate": 3.352185407150632e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30649226903915405,
|
|
"step": 1345,
|
|
"valid_targets_mean": 7716.2,
|
|
"valid_targets_min": 7084
|
|
},
|
|
{
|
|
"epoch": 2.3725834797891037,
|
|
"grad_norm": 0.38375183285614406,
|
|
"learning_rate": 3.3457137981805427e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3068873882293701,
|
|
"step": 1350,
|
|
"valid_targets_mean": 7820.7,
|
|
"valid_targets_min": 6638
|
|
},
|
|
{
|
|
"epoch": 2.381370826010545,
|
|
"grad_norm": 0.4734888672679818,
|
|
"learning_rate": 3.3392163394952694e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996982932090759,
|
|
"step": 1355,
|
|
"valid_targets_mean": 7900.8,
|
|
"valid_targets_min": 7090
|
|
},
|
|
{
|
|
"epoch": 2.390158172231986,
|
|
"grad_norm": 0.4257074824478925,
|
|
"learning_rate": 3.3326931559040084e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3033141493797302,
|
|
"step": 1360,
|
|
"valid_targets_mean": 8719.5,
|
|
"valid_targets_min": 6891
|
|
},
|
|
{
|
|
"epoch": 2.398945518453427,
|
|
"grad_norm": 0.3983063674915521,
|
|
"learning_rate": 3.326144372710102e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3087483048439026,
|
|
"step": 1365,
|
|
"valid_targets_mean": 8207.4,
|
|
"valid_targets_min": 5826
|
|
},
|
|
{
|
|
"epoch": 2.4077328646748684,
|
|
"grad_norm": 0.38937049144098784,
|
|
"learning_rate": 3.3195701157086354e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29816991090774536,
|
|
"step": 1370,
|
|
"valid_targets_mean": 7648.9,
|
|
"valid_targets_min": 7040
|
|
},
|
|
{
|
|
"epoch": 2.4165202108963095,
|
|
"grad_norm": 0.41374920307885404,
|
|
"learning_rate": 3.312970511184017e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29219090938568115,
|
|
"step": 1375,
|
|
"valid_targets_mean": 7774.8,
|
|
"valid_targets_min": 6954
|
|
},
|
|
{
|
|
"epoch": 2.4253075571177503,
|
|
"grad_norm": 0.39068918900663724,
|
|
"learning_rate": 3.306345685907553e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.305525004863739,
|
|
"step": 1380,
|
|
"valid_targets_mean": 7558.7,
|
|
"valid_targets_min": 6434
|
|
},
|
|
{
|
|
"epoch": 2.4340949033391914,
|
|
"grad_norm": 0.371881435123146,
|
|
"learning_rate": 3.299695767135016e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3058660626411438,
|
|
"step": 1385,
|
|
"valid_targets_mean": 7739.2,
|
|
"valid_targets_min": 6924
|
|
},
|
|
{
|
|
"epoch": 2.4428822495606326,
|
|
"grad_norm": 0.38458825321625745,
|
|
"learning_rate": 3.293020882604194e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30228298902511597,
|
|
"step": 1390,
|
|
"valid_targets_mean": 8752.3,
|
|
"valid_targets_min": 6899
|
|
},
|
|
{
|
|
"epoch": 2.4516695957820738,
|
|
"grad_norm": 0.43595201707714715,
|
|
"learning_rate": 3.2863211605324416e-05,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30637264251708984,
|
|
"step": 1395,
|
|
"valid_targets_mean": 8108.2,
|
|
"valid_targets_min": 6679
|
|
},
|
|
{
|
|
"epoch": 2.460456942003515,
|
|
"grad_norm": 0.3837177685039735,
|
|
"learning_rate": 3.2795967296142185e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002673387527466,
|
|
"step": 1400,
|
|
"valid_targets_mean": 8512.9,
|
|
"valid_targets_min": 7227
|
|
},
|
|
{
|
|
"epoch": 2.469244288224956,
|
|
"grad_norm": 0.38223835626371655,
|
|
"learning_rate": 3.272847719018612e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2972228527069092,
|
|
"step": 1405,
|
|
"valid_targets_mean": 8684.6,
|
|
"valid_targets_min": 7125
|
|
},
|
|
{
|
|
"epoch": 2.4780316344463973,
|
|
"grad_norm": 0.3777714976567161,
|
|
"learning_rate": 3.2660742583868585e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3080081045627594,
|
|
"step": 1410,
|
|
"valid_targets_mean": 8428.3,
|
|
"valid_targets_min": 6596
|
|
},
|
|
{
|
|
"epoch": 2.4868189806678385,
|
|
"grad_norm": 0.4311154393993648,
|
|
"learning_rate": 3.259276477829855e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28569990396499634,
|
|
"step": 1415,
|
|
"valid_targets_mean": 7225.0,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 2.4956063268892796,
|
|
"grad_norm": 0.3830582502669011,
|
|
"learning_rate": 3.2524545079256554e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976120114326477,
|
|
"step": 1420,
|
|
"valid_targets_mean": 7598.9,
|
|
"valid_targets_min": 6837
|
|
},
|
|
{
|
|
"epoch": 2.5043936731107204,
|
|
"grad_norm": 0.4094112468037244,
|
|
"learning_rate": 3.2456084797169695e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30441173911094666,
|
|
"step": 1425,
|
|
"valid_targets_mean": 7598.8,
|
|
"valid_targets_min": 6490
|
|
},
|
|
{
|
|
"epoch": 2.5131810193321615,
|
|
"grad_norm": 0.38103563079986086,
|
|
"learning_rate": 3.238738524708636e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29411470890045166,
|
|
"step": 1430,
|
|
"valid_targets_mean": 8039.4,
|
|
"valid_targets_min": 6835
|
|
},
|
|
{
|
|
"epoch": 2.5219683655536027,
|
|
"grad_norm": 0.35589307131604775,
|
|
"learning_rate": 3.2318447748651054e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29991233348846436,
|
|
"step": 1435,
|
|
"valid_targets_mean": 7943.8,
|
|
"valid_targets_min": 7488
|
|
},
|
|
{
|
|
"epoch": 2.530755711775044,
|
|
"grad_norm": 0.43714729308284495,
|
|
"learning_rate": 3.2249273626079005e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3009275794029236,
|
|
"step": 1440,
|
|
"valid_targets_mean": 7560.6,
|
|
"valid_targets_min": 6313
|
|
},
|
|
{
|
|
"epoch": 2.539543057996485,
|
|
"grad_norm": 0.41461305927012404,
|
|
"learning_rate": 3.217986420813073e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30489039421081543,
|
|
"step": 1445,
|
|
"valid_targets_mean": 7940.9,
|
|
"valid_targets_min": 7048
|
|
},
|
|
{
|
|
"epoch": 2.5483304042179262,
|
|
"grad_norm": 0.39013671195410066,
|
|
"learning_rate": 3.211022082808652e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3109816908836365,
|
|
"step": 1450,
|
|
"valid_targets_mean": 7563.9,
|
|
"valid_targets_min": 6580
|
|
},
|
|
{
|
|
"epoch": 2.5571177504393674,
|
|
"grad_norm": 0.4147791452090644,
|
|
"learning_rate": 3.204034482372081e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3086916506290436,
|
|
"step": 1455,
|
|
"valid_targets_mean": 7745.0,
|
|
"valid_targets_min": 7157
|
|
},
|
|
{
|
|
"epoch": 2.5659050966608086,
|
|
"grad_norm": 0.3864245220712862,
|
|
"learning_rate": 3.197023753727655e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3015146255493164,
|
|
"step": 1460,
|
|
"valid_targets_mean": 8278.2,
|
|
"valid_targets_min": 6836
|
|
},
|
|
{
|
|
"epoch": 2.5746924428822497,
|
|
"grad_norm": 0.3974199065384782,
|
|
"learning_rate": 3.18999003154393e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3117097020149231,
|
|
"step": 1465,
|
|
"valid_targets_mean": 7764.9,
|
|
"valid_targets_min": 6630
|
|
},
|
|
{
|
|
"epoch": 2.5834797891036905,
|
|
"grad_norm": 0.4243565407207841,
|
|
"learning_rate": 3.1829334509311496e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30094391107559204,
|
|
"step": 1470,
|
|
"valid_targets_mean": 7812.8,
|
|
"valid_targets_min": 6861
|
|
},
|
|
{
|
|
"epoch": 2.5922671353251316,
|
|
"grad_norm": 0.40284583779979793,
|
|
"learning_rate": 3.175854147438638e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32183435559272766,
|
|
"step": 1475,
|
|
"valid_targets_mean": 8270.9,
|
|
"valid_targets_min": 6766
|
|
},
|
|
{
|
|
"epoch": 2.601054481546573,
|
|
"grad_norm": 0.39808650070592766,
|
|
"learning_rate": 3.1687522570522055e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073437213897705,
|
|
"step": 1480,
|
|
"valid_targets_mean": 7517.8,
|
|
"valid_targets_min": 3926
|
|
},
|
|
{
|
|
"epoch": 2.609841827768014,
|
|
"grad_norm": 0.4603673602872501,
|
|
"learning_rate": 3.1616279161915295e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31150487065315247,
|
|
"step": 1485,
|
|
"valid_targets_mean": 7706.1,
|
|
"valid_targets_min": 6610
|
|
},
|
|
{
|
|
"epoch": 2.618629173989455,
|
|
"grad_norm": 0.38907395491747443,
|
|
"learning_rate": 3.1544812617075384e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30694928765296936,
|
|
"step": 1490,
|
|
"valid_targets_mean": 7400.9,
|
|
"valid_targets_min": 6690
|
|
},
|
|
{
|
|
"epoch": 2.6274165202108963,
|
|
"grad_norm": 0.37857824502065196,
|
|
"learning_rate": 3.14731243087978e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30862754583358765,
|
|
"step": 1495,
|
|
"valid_targets_mean": 7636.0,
|
|
"valid_targets_min": 6961
|
|
},
|
|
{
|
|
"epoch": 2.6362038664323375,
|
|
"grad_norm": 0.37364134233828533,
|
|
"learning_rate": 3.140121561413789e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2982473373413086,
|
|
"step": 1500,
|
|
"valid_targets_mean": 8630.9,
|
|
"valid_targets_min": 7356
|
|
},
|
|
{
|
|
"epoch": 2.6449912126537787,
|
|
"grad_norm": 0.37894045420149564,
|
|
"learning_rate": 3.132908791438434e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.311771959066391,
|
|
"step": 1505,
|
|
"valid_targets_mean": 7846.6,
|
|
"valid_targets_min": 7249
|
|
},
|
|
{
|
|
"epoch": 2.65377855887522,
|
|
"grad_norm": 0.39359306830855695,
|
|
"learning_rate": 3.125674259503273e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3034271001815796,
|
|
"step": 1510,
|
|
"valid_targets_mean": 7687.1,
|
|
"valid_targets_min": 6152
|
|
},
|
|
{
|
|
"epoch": 2.6625659050966606,
|
|
"grad_norm": 0.3198160782063454,
|
|
"learning_rate": 3.1184181045758854e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969781160354614,
|
|
"step": 1515,
|
|
"valid_targets_mean": 8732.8,
|
|
"valid_targets_min": 6926
|
|
},
|
|
{
|
|
"epoch": 2.671353251318102,
|
|
"grad_norm": 0.43721287392537767,
|
|
"learning_rate": 3.111140466039205e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31991803646087646,
|
|
"step": 1520,
|
|
"valid_targets_mean": 8241.2,
|
|
"valid_targets_min": 7027
|
|
},
|
|
{
|
|
"epoch": 2.680140597539543,
|
|
"grad_norm": 0.3759484607611042,
|
|
"learning_rate": 3.103841483688844e-05,
|
|
"loss": 0.3088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31809115409851074,
|
|
"step": 1525,
|
|
"valid_targets_mean": 7407.9,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 2.688927943760984,
|
|
"grad_norm": 0.38227597925355516,
|
|
"learning_rate": 3.096521297730406e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.296236515045166,
|
|
"step": 1530,
|
|
"valid_targets_mean": 7841.4,
|
|
"valid_targets_min": 7067
|
|
},
|
|
{
|
|
"epoch": 2.6977152899824253,
|
|
"grad_norm": 0.4440775072110787,
|
|
"learning_rate": 3.089180048776792e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31530511379241943,
|
|
"step": 1535,
|
|
"valid_targets_mean": 7497.6,
|
|
"valid_targets_min": 6947
|
|
},
|
|
{
|
|
"epoch": 2.7065026362038664,
|
|
"grad_norm": 0.4044154429897798,
|
|
"learning_rate": 3.081817877845502e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.313349187374115,
|
|
"step": 1540,
|
|
"valid_targets_mean": 7869.1,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 2.7152899824253076,
|
|
"grad_norm": 0.35095011195614556,
|
|
"learning_rate": 3.074434926355923e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30906468629837036,
|
|
"step": 1545,
|
|
"valid_targets_mean": 7672.9,
|
|
"valid_targets_min": 6969
|
|
},
|
|
{
|
|
"epoch": 2.724077328646749,
|
|
"grad_norm": 0.37377485319005543,
|
|
"learning_rate": 3.0670313361266165e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3094746768474579,
|
|
"step": 1550,
|
|
"valid_targets_mean": 9361.6,
|
|
"valid_targets_min": 6725
|
|
},
|
|
{
|
|
"epoch": 2.73286467486819,
|
|
"grad_norm": 0.39609784345383675,
|
|
"learning_rate": 3.05960724937259e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3020004630088806,
|
|
"step": 1555,
|
|
"valid_targets_mean": 7593.3,
|
|
"valid_targets_min": 6153
|
|
},
|
|
{
|
|
"epoch": 2.7416520210896307,
|
|
"grad_norm": 0.39372450689875543,
|
|
"learning_rate": 3.0521628087025675e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30635249614715576,
|
|
"step": 1560,
|
|
"valid_targets_mean": 7280.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 2.7504393673110723,
|
|
"grad_norm": 0.4546147869833877,
|
|
"learning_rate": 3.04469815711625e-05,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31082868576049805,
|
|
"step": 1565,
|
|
"valid_targets_mean": 7271.9,
|
|
"valid_targets_min": 2103
|
|
},
|
|
{
|
|
"epoch": 2.759226713532513,
|
|
"grad_norm": 0.385653663876873,
|
|
"learning_rate": 3.037213438001569e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3087978959083557,
|
|
"step": 1570,
|
|
"valid_targets_mean": 7855.4,
|
|
"valid_targets_min": 6830
|
|
},
|
|
{
|
|
"epoch": 2.768014059753954,
|
|
"grad_norm": 0.3638152210796052,
|
|
"learning_rate": 3.0297087951319315e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870416045188904,
|
|
"step": 1575,
|
|
"valid_targets_mean": 8318.2,
|
|
"valid_targets_min": 7204
|
|
},
|
|
{
|
|
"epoch": 2.7768014059753954,
|
|
"grad_norm": 0.40554492088420635,
|
|
"learning_rate": 3.0221843726634587e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3099740445613861,
|
|
"step": 1580,
|
|
"valid_targets_mean": 7761.8,
|
|
"valid_targets_min": 7044
|
|
},
|
|
{
|
|
"epoch": 2.7855887521968365,
|
|
"grad_norm": 0.4369178667308916,
|
|
"learning_rate": 3.0146403151322156e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3091489374637604,
|
|
"step": 1585,
|
|
"valid_targets_mean": 7639.2,
|
|
"valid_targets_min": 7064
|
|
},
|
|
{
|
|
"epoch": 2.7943760984182777,
|
|
"grad_norm": 0.3801491855074197,
|
|
"learning_rate": 3.0070767674514355e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32035768032073975,
|
|
"step": 1590,
|
|
"valid_targets_mean": 7785.9,
|
|
"valid_targets_min": 7097
|
|
},
|
|
{
|
|
"epoch": 2.803163444639719,
|
|
"grad_norm": 0.3575958406948729,
|
|
"learning_rate": 2.999493874908738e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2941886782646179,
|
|
"step": 1595,
|
|
"valid_targets_mean": 8453.9,
|
|
"valid_targets_min": 6573
|
|
},
|
|
{
|
|
"epoch": 2.81195079086116,
|
|
"grad_norm": 0.40910005037027813,
|
|
"learning_rate": 2.9918917831633362e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30973154306411743,
|
|
"step": 1600,
|
|
"valid_targets_mean": 7694.0,
|
|
"valid_targets_min": 7177
|
|
},
|
|
{
|
|
"epoch": 2.820738137082601,
|
|
"grad_norm": 0.40322666910100285,
|
|
"learning_rate": 2.9842706382432375e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2833307385444641,
|
|
"step": 1605,
|
|
"valid_targets_mean": 7846.4,
|
|
"valid_targets_min": 6911
|
|
},
|
|
{
|
|
"epoch": 2.8295254833040424,
|
|
"grad_norm": 0.4328858344953751,
|
|
"learning_rate": 2.9766305865424435e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29956915974617004,
|
|
"step": 1610,
|
|
"valid_targets_mean": 7706.2,
|
|
"valid_targets_min": 6440
|
|
},
|
|
{
|
|
"epoch": 2.838312829525483,
|
|
"grad_norm": 0.3862746865257901,
|
|
"learning_rate": 2.9689717748181313e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30747565627098083,
|
|
"step": 1615,
|
|
"valid_targets_mean": 7839.2,
|
|
"valid_targets_min": 6714
|
|
},
|
|
{
|
|
"epoch": 2.8471001757469243,
|
|
"grad_norm": 0.35814076275660855,
|
|
"learning_rate": 2.9612943501878412e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956044673919678,
|
|
"step": 1620,
|
|
"valid_targets_mean": 8089.6,
|
|
"valid_targets_min": 6923
|
|
},
|
|
{
|
|
"epoch": 2.8558875219683655,
|
|
"grad_norm": 0.3575476615779183,
|
|
"learning_rate": 2.953598460126645e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2984451949596405,
|
|
"step": 1625,
|
|
"valid_targets_mean": 7583.4,
|
|
"valid_targets_min": 5811
|
|
},
|
|
{
|
|
"epoch": 2.8646748681898067,
|
|
"grad_norm": 0.364251095654717,
|
|
"learning_rate": 2.945884252464316e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29613763093948364,
|
|
"step": 1630,
|
|
"valid_targets_mean": 7839.7,
|
|
"valid_targets_min": 7332
|
|
},
|
|
{
|
|
"epoch": 2.873462214411248,
|
|
"grad_norm": 0.37913775865436633,
|
|
"learning_rate": 2.938151875382489e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3077976107597351,
|
|
"step": 1635,
|
|
"valid_targets_mean": 7706.9,
|
|
"valid_targets_min": 6864
|
|
},
|
|
{
|
|
"epoch": 2.882249560632689,
|
|
"grad_norm": 0.3502794052061173,
|
|
"learning_rate": 2.9304014774118153e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29528719186782837,
|
|
"step": 1640,
|
|
"valid_targets_mean": 9250.9,
|
|
"valid_targets_min": 6288
|
|
},
|
|
{
|
|
"epoch": 2.89103690685413,
|
|
"grad_norm": 0.40036405725266105,
|
|
"learning_rate": 2.922633207429106e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30659887194633484,
|
|
"step": 1645,
|
|
"valid_targets_mean": 7710.6,
|
|
"valid_targets_min": 7033
|
|
},
|
|
{
|
|
"epoch": 2.899824253075571,
|
|
"grad_norm": 0.3954524870419218,
|
|
"learning_rate": 2.9148472146544757e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851497232913971,
|
|
"step": 1650,
|
|
"valid_targets_mean": 7339.9,
|
|
"valid_targets_min": 2858
|
|
},
|
|
{
|
|
"epoch": 2.9086115992970125,
|
|
"grad_norm": 0.4016749884162129,
|
|
"learning_rate": 2.907043648648474e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3091953992843628,
|
|
"step": 1655,
|
|
"valid_targets_mean": 8124.5,
|
|
"valid_targets_min": 7189
|
|
},
|
|
{
|
|
"epoch": 2.9173989455184532,
|
|
"grad_norm": 0.34302336945820733,
|
|
"learning_rate": 2.8992226593092135e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.290097177028656,
|
|
"step": 1660,
|
|
"valid_targets_mean": 8428.9,
|
|
"valid_targets_min": 6961
|
|
},
|
|
{
|
|
"epoch": 2.9261862917398944,
|
|
"grad_norm": 0.3689837782516461,
|
|
"learning_rate": 2.8913843968694907e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28554922342300415,
|
|
"step": 1665,
|
|
"valid_targets_mean": 7704.6,
|
|
"valid_targets_min": 6749
|
|
},
|
|
{
|
|
"epoch": 2.9349736379613356,
|
|
"grad_norm": 0.3737483155346716,
|
|
"learning_rate": 2.8835290118938995e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31530293822288513,
|
|
"step": 1670,
|
|
"valid_targets_mean": 8266.5,
|
|
"valid_targets_min": 5479
|
|
},
|
|
{
|
|
"epoch": 2.9437609841827768,
|
|
"grad_norm": 0.37716353251821727,
|
|
"learning_rate": 2.8756566552759392e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30051523447036743,
|
|
"step": 1675,
|
|
"valid_targets_mean": 8571.9,
|
|
"valid_targets_min": 6853
|
|
},
|
|
{
|
|
"epoch": 2.952548330404218,
|
|
"grad_norm": 0.3436480692484697,
|
|
"learning_rate": 2.8677674782351164e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3064003586769104,
|
|
"step": 1680,
|
|
"valid_targets_mean": 8446.5,
|
|
"valid_targets_min": 6955
|
|
},
|
|
{
|
|
"epoch": 2.961335676625659,
|
|
"grad_norm": 0.36327514641467545,
|
|
"learning_rate": 2.85986163231404e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.323203444480896,
|
|
"step": 1685,
|
|
"valid_targets_mean": 8383.3,
|
|
"valid_targets_min": 6713
|
|
},
|
|
{
|
|
"epoch": 2.9701230228471003,
|
|
"grad_norm": 0.36618297685578105,
|
|
"learning_rate": 2.85193926937551e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3078553378582001,
|
|
"step": 1690,
|
|
"valid_targets_mean": 7677.2,
|
|
"valid_targets_min": 6942
|
|
},
|
|
{
|
|
"epoch": 2.9789103690685415,
|
|
"grad_norm": 0.4015501740277631,
|
|
"learning_rate": 2.8440005415996e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30818092823028564,
|
|
"step": 1695,
|
|
"valid_targets_mean": 8363.5,
|
|
"valid_targets_min": 7110
|
|
},
|
|
{
|
|
"epoch": 2.9876977152899826,
|
|
"grad_norm": 0.37457441630305677,
|
|
"learning_rate": 2.8360456014807353e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29534947872161865,
|
|
"step": 1700,
|
|
"valid_targets_mean": 7426.0,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 2.9964850615114234,
|
|
"grad_norm": 0.3589323367469997,
|
|
"learning_rate": 2.828074601824762e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29085928201675415,
|
|
"step": 1705,
|
|
"valid_targets_mean": 7558.2,
|
|
"valid_targets_min": 6764
|
|
},
|
|
{
|
|
"epoch": 3.0052724077328645,
|
|
"grad_norm": 0.3843537170537307,
|
|
"learning_rate": 2.820087695746015e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3022106885910034,
|
|
"step": 1710,
|
|
"valid_targets_mean": 7672.3,
|
|
"valid_targets_min": 6491
|
|
},
|
|
{
|
|
"epoch": 3.0140597539543057,
|
|
"grad_norm": 0.39461466590743977,
|
|
"learning_rate": 2.8120850366643707e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965931296348572,
|
|
"step": 1715,
|
|
"valid_targets_mean": 7666.1,
|
|
"valid_targets_min": 7020
|
|
},
|
|
{
|
|
"epoch": 3.022847100175747,
|
|
"grad_norm": 0.3823839208720008,
|
|
"learning_rate": 2.804066778302307e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29734212160110474,
|
|
"step": 1720,
|
|
"valid_targets_mean": 7955.2,
|
|
"valid_targets_min": 6914
|
|
},
|
|
{
|
|
"epoch": 3.031634446397188,
|
|
"grad_norm": 0.37794281371658167,
|
|
"learning_rate": 2.7960330746819462e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3050759434700012,
|
|
"step": 1725,
|
|
"valid_targets_mean": 7589.2,
|
|
"valid_targets_min": 6396
|
|
},
|
|
{
|
|
"epoch": 3.040421792618629,
|
|
"grad_norm": 0.3834396543175821,
|
|
"learning_rate": 2.7879840801220967e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29034867882728577,
|
|
"step": 1730,
|
|
"valid_targets_mean": 7920.9,
|
|
"valid_targets_min": 7403
|
|
},
|
|
{
|
|
"epoch": 3.0492091388400704,
|
|
"grad_norm": 0.39698828721280216,
|
|
"learning_rate": 2.7799199492352893e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2966341972351074,
|
|
"step": 1735,
|
|
"valid_targets_mean": 7744.3,
|
|
"valid_targets_min": 7105
|
|
},
|
|
{
|
|
"epoch": 3.0579964850615116,
|
|
"grad_norm": 0.3324961667234077,
|
|
"learning_rate": 2.7718408369248093e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2905833125114441,
|
|
"step": 1740,
|
|
"valid_targets_mean": 7800.8,
|
|
"valid_targets_min": 6576
|
|
},
|
|
{
|
|
"epoch": 3.0667838312829527,
|
|
"grad_norm": 0.3824356525112284,
|
|
"learning_rate": 2.7637468983817167e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2963569462299347,
|
|
"step": 1745,
|
|
"valid_targets_mean": 7915.6,
|
|
"valid_targets_min": 7207
|
|
},
|
|
{
|
|
"epoch": 3.0755711775043935,
|
|
"grad_norm": 0.40122996258740906,
|
|
"learning_rate": 2.7556382890818682e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2929661273956299,
|
|
"step": 1750,
|
|
"valid_targets_mean": 7726.7,
|
|
"valid_targets_min": 7103
|
|
},
|
|
{
|
|
"epoch": 3.0843585237258346,
|
|
"grad_norm": 0.3492966408385829,
|
|
"learning_rate": 2.7475151647829314e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30222034454345703,
|
|
"step": 1755,
|
|
"valid_targets_mean": 7625.8,
|
|
"valid_targets_min": 7081
|
|
},
|
|
{
|
|
"epoch": 3.093145869947276,
|
|
"grad_norm": 0.35717867792128094,
|
|
"learning_rate": 2.7393776815213903e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30564767122268677,
|
|
"step": 1760,
|
|
"valid_targets_mean": 8366.2,
|
|
"valid_targets_min": 7331
|
|
},
|
|
{
|
|
"epoch": 3.101933216168717,
|
|
"grad_norm": 0.3836591053710375,
|
|
"learning_rate": 2.731225995609548e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30902570486068726,
|
|
"step": 1765,
|
|
"valid_targets_mean": 7731.9,
|
|
"valid_targets_min": 6950
|
|
},
|
|
{
|
|
"epoch": 3.110720562390158,
|
|
"grad_norm": 0.37827048413037573,
|
|
"learning_rate": 2.723060263632527e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3042601943016052,
|
|
"step": 1770,
|
|
"valid_targets_mean": 7609.2,
|
|
"valid_targets_min": 6740
|
|
},
|
|
{
|
|
"epoch": 3.1195079086115993,
|
|
"grad_norm": 0.4074776201300159,
|
|
"learning_rate": 2.714880642445259e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30564433336257935,
|
|
"step": 1775,
|
|
"valid_targets_mean": 7801.8,
|
|
"valid_targets_min": 6795
|
|
},
|
|
{
|
|
"epoch": 3.1282952548330405,
|
|
"grad_norm": 0.3994404944539335,
|
|
"learning_rate": 2.706687289169472e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3061908185482025,
|
|
"step": 1780,
|
|
"valid_targets_mean": 7779.4,
|
|
"valid_targets_min": 7008
|
|
},
|
|
{
|
|
"epoch": 3.1370826010544817,
|
|
"grad_norm": 0.36913628829380746,
|
|
"learning_rate": 2.698480361190674e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005139231681824,
|
|
"step": 1785,
|
|
"valid_targets_mean": 7761.8,
|
|
"valid_targets_min": 6357
|
|
},
|
|
{
|
|
"epoch": 3.145869947275923,
|
|
"grad_norm": 0.3804227293169359,
|
|
"learning_rate": 2.6902600161551273e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2909207046031952,
|
|
"step": 1790,
|
|
"valid_targets_mean": 7628.8,
|
|
"valid_targets_min": 6808
|
|
},
|
|
{
|
|
"epoch": 3.1546572934973636,
|
|
"grad_norm": 0.38125704699521695,
|
|
"learning_rate": 2.6820264119668215e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2874659299850464,
|
|
"step": 1795,
|
|
"valid_targets_mean": 7767.1,
|
|
"valid_targets_min": 7159
|
|
},
|
|
{
|
|
"epoch": 3.1634446397188047,
|
|
"grad_norm": 0.3894373804595348,
|
|
"learning_rate": 2.6737797067844403e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2974281907081604,
|
|
"step": 1800,
|
|
"valid_targets_mean": 7478.9,
|
|
"valid_targets_min": 6390
|
|
},
|
|
{
|
|
"epoch": 3.172231985940246,
|
|
"grad_norm": 0.4032779429245858,
|
|
"learning_rate": 2.6655200590183235e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2894044816493988,
|
|
"step": 1805,
|
|
"valid_targets_mean": 7810.5,
|
|
"valid_targets_min": 6847
|
|
},
|
|
{
|
|
"epoch": 3.181019332161687,
|
|
"grad_norm": 0.39036275005771637,
|
|
"learning_rate": 2.6572476273274253e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30358803272247314,
|
|
"step": 1810,
|
|
"valid_targets_mean": 7659.1,
|
|
"valid_targets_min": 6626
|
|
},
|
|
{
|
|
"epoch": 3.1898066783831283,
|
|
"grad_norm": 0.4033198742934059,
|
|
"learning_rate": 2.6489625706162626e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30405354499816895,
|
|
"step": 1815,
|
|
"valid_targets_mean": 7780.9,
|
|
"valid_targets_min": 6679
|
|
},
|
|
{
|
|
"epoch": 3.1985940246045694,
|
|
"grad_norm": 0.3896938362514825,
|
|
"learning_rate": 2.6406650480318675e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3113795518875122,
|
|
"step": 1820,
|
|
"valid_targets_mean": 7647.8,
|
|
"valid_targets_min": 7075
|
|
},
|
|
{
|
|
"epoch": 3.2073813708260106,
|
|
"grad_norm": 0.3622062948493469,
|
|
"learning_rate": 2.6323552189607277e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.306094765663147,
|
|
"step": 1825,
|
|
"valid_targets_mean": 8025.8,
|
|
"valid_targets_min": 6326
|
|
},
|
|
{
|
|
"epoch": 3.2161687170474518,
|
|
"grad_norm": 0.4064555280023506,
|
|
"learning_rate": 2.6240332430257254e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2962179183959961,
|
|
"step": 1830,
|
|
"valid_targets_mean": 7738.6,
|
|
"valid_targets_min": 7306
|
|
},
|
|
{
|
|
"epoch": 3.224956063268893,
|
|
"grad_norm": 0.3633555051355748,
|
|
"learning_rate": 2.61569928008307e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3019287884235382,
|
|
"step": 1835,
|
|
"valid_targets_mean": 8410.9,
|
|
"valid_targets_min": 6567
|
|
},
|
|
{
|
|
"epoch": 3.233743409490334,
|
|
"grad_norm": 0.40839867807158176,
|
|
"learning_rate": 2.6073534902192295e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2952163517475128,
|
|
"step": 1840,
|
|
"valid_targets_mean": 7896.7,
|
|
"valid_targets_min": 7093
|
|
},
|
|
{
|
|
"epoch": 3.242530755711775,
|
|
"grad_norm": 0.38608334234814473,
|
|
"learning_rate": 2.598996033747855e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31258517503738403,
|
|
"step": 1845,
|
|
"valid_targets_mean": 8522.1,
|
|
"valid_targets_min": 7245
|
|
},
|
|
{
|
|
"epoch": 3.251318101933216,
|
|
"grad_norm": 0.37127080551402336,
|
|
"learning_rate": 2.5906270712066972e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3096095621585846,
|
|
"step": 1850,
|
|
"valid_targets_mean": 8455.1,
|
|
"valid_targets_min": 7211
|
|
},
|
|
{
|
|
"epoch": 3.260105448154657,
|
|
"grad_norm": 0.40216557395027774,
|
|
"learning_rate": 2.5822467633545305e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29827120900154114,
|
|
"step": 1855,
|
|
"valid_targets_mean": 7628.1,
|
|
"valid_targets_min": 6542
|
|
},
|
|
{
|
|
"epoch": 3.2688927943760984,
|
|
"grad_norm": 0.42644338680649757,
|
|
"learning_rate": 2.5738552711680575e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996917963027954,
|
|
"step": 1860,
|
|
"valid_targets_mean": 7396.6,
|
|
"valid_targets_min": 3281
|
|
},
|
|
{
|
|
"epoch": 3.2776801405975395,
|
|
"grad_norm": 0.3822821646569032,
|
|
"learning_rate": 2.5654527558388212e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30134713649749756,
|
|
"step": 1865,
|
|
"valid_targets_mean": 7653.9,
|
|
"valid_targets_min": 6719
|
|
},
|
|
{
|
|
"epoch": 3.2864674868189807,
|
|
"grad_norm": 0.37847842670001375,
|
|
"learning_rate": 2.5570393787701063e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3066447973251343,
|
|
"step": 1870,
|
|
"valid_targets_mean": 7816.2,
|
|
"valid_targets_min": 6870
|
|
},
|
|
{
|
|
"epoch": 3.295254833040422,
|
|
"grad_norm": 0.3655419814859327,
|
|
"learning_rate": 2.5486153015738412e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3025107979774475,
|
|
"step": 1875,
|
|
"valid_targets_mean": 9417.5,
|
|
"valid_targets_min": 6596
|
|
},
|
|
{
|
|
"epoch": 3.304042179261863,
|
|
"grad_norm": 0.35812507396382093,
|
|
"learning_rate": 2.5401806860674912e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30119413137435913,
|
|
"step": 1880,
|
|
"valid_targets_mean": 8851.4,
|
|
"valid_targets_min": 7650
|
|
},
|
|
{
|
|
"epoch": 3.3128295254833042,
|
|
"grad_norm": 0.33731159764531526,
|
|
"learning_rate": 2.531735694270951e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901797890663147,
|
|
"step": 1885,
|
|
"valid_targets_mean": 8558.3,
|
|
"valid_targets_min": 7196
|
|
},
|
|
{
|
|
"epoch": 3.3216168717047454,
|
|
"grad_norm": 0.35924506975423315,
|
|
"learning_rate": 2.5232804884034336e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28924497961997986,
|
|
"step": 1890,
|
|
"valid_targets_mean": 7621.5,
|
|
"valid_targets_min": 5768
|
|
},
|
|
{
|
|
"epoch": 3.330404217926186,
|
|
"grad_norm": 0.4077167610712946,
|
|
"learning_rate": 2.514815230880353e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2894204258918762,
|
|
"step": 1895,
|
|
"valid_targets_mean": 7784.2,
|
|
"valid_targets_min": 7082
|
|
},
|
|
{
|
|
"epoch": 3.3391915641476273,
|
|
"grad_norm": 0.40138393301127867,
|
|
"learning_rate": 2.506340084310204e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30550941824913025,
|
|
"step": 1900,
|
|
"valid_targets_mean": 8926.8,
|
|
"valid_targets_min": 7692
|
|
},
|
|
{
|
|
"epoch": 3.3479789103690685,
|
|
"grad_norm": 0.42423632974581116,
|
|
"learning_rate": 2.497855211491441e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2958449423313141,
|
|
"step": 1905,
|
|
"valid_targets_mean": 7398.2,
|
|
"valid_targets_min": 6157
|
|
},
|
|
{
|
|
"epoch": 3.3567662565905096,
|
|
"grad_norm": 0.39701449366081437,
|
|
"learning_rate": 2.4893607754093467e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3140870928764343,
|
|
"step": 1910,
|
|
"valid_targets_mean": 7346.4,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 3.365553602811951,
|
|
"grad_norm": 0.4032970315145587,
|
|
"learning_rate": 2.4808569392329066e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2921169400215149,
|
|
"step": 1915,
|
|
"valid_targets_mean": 7636.4,
|
|
"valid_targets_min": 7047
|
|
},
|
|
{
|
|
"epoch": 3.374340949033392,
|
|
"grad_norm": 0.37613825543264184,
|
|
"learning_rate": 2.4723438663116692e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2972247004508972,
|
|
"step": 1920,
|
|
"valid_targets_mean": 7798.5,
|
|
"valid_targets_min": 7169
|
|
},
|
|
{
|
|
"epoch": 3.383128295254833,
|
|
"grad_norm": 0.3473944568774364,
|
|
"learning_rate": 2.4638217201726128e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29673564434051514,
|
|
"step": 1925,
|
|
"valid_targets_mean": 8248.6,
|
|
"valid_targets_min": 7295
|
|
},
|
|
{
|
|
"epoch": 3.3919156414762743,
|
|
"grad_norm": 0.37921512732872087,
|
|
"learning_rate": 2.4552906645170024e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3123595714569092,
|
|
"step": 1930,
|
|
"valid_targets_mean": 7522.9,
|
|
"valid_targets_min": 6181
|
|
},
|
|
{
|
|
"epoch": 3.4007029876977155,
|
|
"grad_norm": 0.4060893190215372,
|
|
"learning_rate": 2.4467508632172443e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30219995975494385,
|
|
"step": 1935,
|
|
"valid_targets_mean": 7820.9,
|
|
"valid_targets_min": 7374
|
|
},
|
|
{
|
|
"epoch": 3.4094903339191562,
|
|
"grad_norm": 0.38638436375125973,
|
|
"learning_rate": 2.4382024803137396e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2929654121398926,
|
|
"step": 1940,
|
|
"valid_targets_mean": 7747.4,
|
|
"valid_targets_min": 6818
|
|
},
|
|
{
|
|
"epoch": 3.4182776801405974,
|
|
"grad_norm": 0.3953451484079772,
|
|
"learning_rate": 2.429645680011733e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302202969789505,
|
|
"step": 1945,
|
|
"valid_targets_mean": 8351.3,
|
|
"valid_targets_min": 6862
|
|
},
|
|
{
|
|
"epoch": 3.4270650263620386,
|
|
"grad_norm": 0.3751593823197104,
|
|
"learning_rate": 2.4210806266781594e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3071412146091461,
|
|
"step": 1950,
|
|
"valid_targets_mean": 7740.3,
|
|
"valid_targets_min": 6935
|
|
},
|
|
{
|
|
"epoch": 3.4358523725834798,
|
|
"grad_norm": 0.36196563194913356,
|
|
"learning_rate": 2.412507484838482e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920036315917969,
|
|
"step": 1955,
|
|
"valid_targets_mean": 7653.2,
|
|
"valid_targets_min": 6818
|
|
},
|
|
{
|
|
"epoch": 3.444639718804921,
|
|
"grad_norm": 0.37091777373640833,
|
|
"learning_rate": 2.4039264191735395e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2962215542793274,
|
|
"step": 1960,
|
|
"valid_targets_mean": 7831.8,
|
|
"valid_targets_min": 6595
|
|
},
|
|
{
|
|
"epoch": 3.453427065026362,
|
|
"grad_norm": 0.390591605400152,
|
|
"learning_rate": 2.395337594516377e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2922114133834839,
|
|
"step": 1965,
|
|
"valid_targets_mean": 7379.8,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 3.4622144112478033,
|
|
"grad_norm": 0.369413337310835,
|
|
"learning_rate": 2.3867411758490816e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2982785403728485,
|
|
"step": 1970,
|
|
"valid_targets_mean": 7606.5,
|
|
"valid_targets_min": 4247
|
|
},
|
|
{
|
|
"epoch": 3.4710017574692444,
|
|
"grad_norm": 0.405455002612328,
|
|
"learning_rate": 2.378137328299613e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30318358540534973,
|
|
"step": 1975,
|
|
"valid_targets_mean": 7526.5,
|
|
"valid_targets_min": 7074
|
|
},
|
|
{
|
|
"epoch": 3.4797891036906856,
|
|
"grad_norm": 0.3634587702657081,
|
|
"learning_rate": 2.3695262171386318e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3018918037414551,
|
|
"step": 1980,
|
|
"valid_targets_mean": 7679.5,
|
|
"valid_targets_min": 6946
|
|
},
|
|
{
|
|
"epoch": 3.4885764499121263,
|
|
"grad_norm": 0.34154368112209427,
|
|
"learning_rate": 2.3609080077763258e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2998007535934448,
|
|
"step": 1985,
|
|
"valid_targets_mean": 8390.5,
|
|
"valid_targets_min": 7163
|
|
},
|
|
{
|
|
"epoch": 3.4973637961335675,
|
|
"grad_norm": 0.3511764990003454,
|
|
"learning_rate": 2.3522828657592295e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29999998211860657,
|
|
"step": 1990,
|
|
"valid_targets_mean": 7749.6,
|
|
"valid_targets_min": 6803
|
|
},
|
|
{
|
|
"epoch": 3.5061511423550087,
|
|
"grad_norm": 0.34837512795153164,
|
|
"learning_rate": 2.3436509567670483e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29551586508750916,
|
|
"step": 1995,
|
|
"valid_targets_mean": 8445.1,
|
|
"valid_targets_min": 6882
|
|
},
|
|
{
|
|
"epoch": 3.51493848857645,
|
|
"grad_norm": 0.3593792510717582,
|
|
"learning_rate": 2.335012446609473e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29875677824020386,
|
|
"step": 2000,
|
|
"valid_targets_mean": 7767.0,
|
|
"valid_targets_min": 6766
|
|
},
|
|
{
|
|
"epoch": 3.523725834797891,
|
|
"grad_norm": 0.34649869648554,
|
|
"learning_rate": 2.326367501222996e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3036854565143585,
|
|
"step": 2005,
|
|
"valid_targets_mean": 7762.8,
|
|
"valid_targets_min": 7027
|
|
},
|
|
{
|
|
"epoch": 3.532513181019332,
|
|
"grad_norm": 0.3444149291424148,
|
|
"learning_rate": 2.317716286667723e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285929799079895,
|
|
"step": 2010,
|
|
"valid_targets_mean": 8649.1,
|
|
"valid_targets_min": 7412
|
|
},
|
|
{
|
|
"epoch": 3.5413005272407734,
|
|
"grad_norm": 0.3626950990864704,
|
|
"learning_rate": 2.3090589691241842e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3122712969779968,
|
|
"step": 2015,
|
|
"valid_targets_mean": 8208.6,
|
|
"valid_targets_min": 7202
|
|
},
|
|
{
|
|
"epoch": 3.5500878734622145,
|
|
"grad_norm": 0.40986706588615096,
|
|
"learning_rate": 2.300395714890142e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29588234424591064,
|
|
"step": 2020,
|
|
"valid_targets_mean": 8427.4,
|
|
"valid_targets_min": 6603
|
|
},
|
|
{
|
|
"epoch": 3.5588752196836557,
|
|
"grad_norm": 0.44018019011979087,
|
|
"learning_rate": 2.291726690377394e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2984328866004944,
|
|
"step": 2025,
|
|
"valid_targets_mean": 7692.3,
|
|
"valid_targets_min": 4237
|
|
},
|
|
{
|
|
"epoch": 3.5676625659050965,
|
|
"grad_norm": 0.33525487653600955,
|
|
"learning_rate": 2.283052062108582e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2910841107368469,
|
|
"step": 2030,
|
|
"valid_targets_mean": 7992.1,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 3.5764499121265376,
|
|
"grad_norm": 0.41470532934154464,
|
|
"learning_rate": 2.2743719967139884e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30168616771698,
|
|
"step": 2035,
|
|
"valid_targets_mean": 7738.9,
|
|
"valid_targets_min": 6976
|
|
},
|
|
{
|
|
"epoch": 3.585237258347979,
|
|
"grad_norm": 0.3816942129631596,
|
|
"learning_rate": 2.265686660928336e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30841612815856934,
|
|
"step": 2040,
|
|
"valid_targets_mean": 7594.2,
|
|
"valid_targets_min": 6528
|
|
},
|
|
{
|
|
"epoch": 3.59402460456942,
|
|
"grad_norm": 0.3922269375009551,
|
|
"learning_rate": 2.2569962215875866e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996993064880371,
|
|
"step": 2045,
|
|
"valid_targets_mean": 7590.4,
|
|
"valid_targets_min": 6690
|
|
},
|
|
{
|
|
"epoch": 3.602811950790861,
|
|
"grad_norm": 0.3980707323221537,
|
|
"learning_rate": 2.248300845625737e-05,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.303586483001709,
|
|
"step": 2050,
|
|
"valid_targets_mean": 7318.8,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 3.6115992970123023,
|
|
"grad_norm": 0.3961947116955539,
|
|
"learning_rate": 2.2396007000716105e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29648107290267944,
|
|
"step": 2055,
|
|
"valid_targets_mean": 7136.8,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 3.6203866432337435,
|
|
"grad_norm": 0.3702084207366189,
|
|
"learning_rate": 2.2308959520456485e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861528992652893,
|
|
"step": 2060,
|
|
"valid_targets_mean": 7680.1,
|
|
"valid_targets_min": 7175
|
|
},
|
|
{
|
|
"epoch": 3.6291739894551847,
|
|
"grad_norm": 0.3562202580176658,
|
|
"learning_rate": 2.2221867687567012e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2883298993110657,
|
|
"step": 2065,
|
|
"valid_targets_mean": 7557.6,
|
|
"valid_targets_min": 6670
|
|
},
|
|
{
|
|
"epoch": 3.637961335676626,
|
|
"grad_norm": 0.34916487177672517,
|
|
"learning_rate": 2.2134733174988164e-05,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3074985146522522,
|
|
"step": 2070,
|
|
"valid_targets_mean": 7415.9,
|
|
"valid_targets_min": 5588
|
|
},
|
|
{
|
|
"epoch": 3.6467486818980666,
|
|
"grad_norm": 0.328771161047517,
|
|
"learning_rate": 2.204755765648024e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930622100830078,
|
|
"step": 2075,
|
|
"valid_targets_mean": 8362.1,
|
|
"valid_targets_min": 6188
|
|
},
|
|
{
|
|
"epoch": 3.655536028119508,
|
|
"grad_norm": 0.32265693739853746,
|
|
"learning_rate": 2.196034280659122e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2902243733406067,
|
|
"step": 2080,
|
|
"valid_targets_mean": 8383.9,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 3.664323374340949,
|
|
"grad_norm": 0.34293162883651207,
|
|
"learning_rate": 2.187309030062459e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29204216599464417,
|
|
"step": 2085,
|
|
"valid_targets_mean": 7703.4,
|
|
"valid_targets_min": 6982
|
|
},
|
|
{
|
|
"epoch": 3.67311072056239,
|
|
"grad_norm": 0.368430645097028,
|
|
"learning_rate": 2.17858018146072e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30315840244293213,
|
|
"step": 2090,
|
|
"valid_targets_mean": 7791.8,
|
|
"valid_targets_min": 6947
|
|
},
|
|
{
|
|
"epoch": 3.6818980667838312,
|
|
"grad_norm": 0.36604149908644223,
|
|
"learning_rate": 2.1698479025256996e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2964887022972107,
|
|
"step": 2095,
|
|
"valid_targets_mean": 7566.7,
|
|
"valid_targets_min": 7005
|
|
},
|
|
{
|
|
"epoch": 3.6906854130052724,
|
|
"grad_norm": 0.3479378144053659,
|
|
"learning_rate": 2.1611123609950885e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27931052446365356,
|
|
"step": 2100,
|
|
"valid_targets_mean": 7593.7,
|
|
"valid_targets_min": 6713
|
|
},
|
|
{
|
|
"epoch": 3.6994727592267136,
|
|
"grad_norm": 0.3561427535078435,
|
|
"learning_rate": 2.1523737246692467e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2985917925834656,
|
|
"step": 2105,
|
|
"valid_targets_mean": 7797.1,
|
|
"valid_targets_min": 6688
|
|
},
|
|
{
|
|
"epoch": 3.7082601054481548,
|
|
"grad_norm": 0.34500911426625497,
|
|
"learning_rate": 2.1436321614079833e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996689975261688,
|
|
"step": 2110,
|
|
"valid_targets_mean": 7744.1,
|
|
"valid_targets_min": 6637
|
|
},
|
|
{
|
|
"epoch": 3.717047451669596,
|
|
"grad_norm": 0.35773921165040784,
|
|
"learning_rate": 2.134887839127328e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2833252251148224,
|
|
"step": 2115,
|
|
"valid_targets_mean": 7583.2,
|
|
"valid_targets_min": 6776
|
|
},
|
|
{
|
|
"epoch": 3.7258347978910367,
|
|
"grad_norm": 0.3553759973101524,
|
|
"learning_rate": 2.1261409257963122e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27806031703948975,
|
|
"step": 2120,
|
|
"valid_targets_mean": 7864.3,
|
|
"valid_targets_min": 6850
|
|
},
|
|
{
|
|
"epoch": 3.7346221441124783,
|
|
"grad_norm": 0.34050580593703944,
|
|
"learning_rate": 2.1173915894337357e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3134038746356964,
|
|
"step": 2125,
|
|
"valid_targets_mean": 8492.7,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 3.743409490333919,
|
|
"grad_norm": 0.34663435278517646,
|
|
"learning_rate": 2.1086399981049422e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2960328757762909,
|
|
"step": 2130,
|
|
"valid_targets_mean": 8056.3,
|
|
"valid_targets_min": 7053
|
|
},
|
|
{
|
|
"epoch": 3.75219683655536,
|
|
"grad_norm": 0.3460448048393109,
|
|
"learning_rate": 2.0998863199185925e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29417964816093445,
|
|
"step": 2135,
|
|
"valid_targets_mean": 7704.6,
|
|
"valid_targets_min": 6840
|
|
},
|
|
{
|
|
"epoch": 3.7609841827768014,
|
|
"grad_norm": 0.3597630093679228,
|
|
"learning_rate": 2.0911307230234326e-05,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29962360858917236,
|
|
"step": 2140,
|
|
"valid_targets_mean": 8447.3,
|
|
"valid_targets_min": 7102
|
|
},
|
|
{
|
|
"epoch": 3.7697715289982425,
|
|
"grad_norm": 0.3472767049797148,
|
|
"learning_rate": 2.0823733756050654e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956916093826294,
|
|
"step": 2145,
|
|
"valid_targets_mean": 8690.3,
|
|
"valid_targets_min": 6901
|
|
},
|
|
{
|
|
"epoch": 3.7785588752196837,
|
|
"grad_norm": 0.3467387362608841,
|
|
"learning_rate": 2.073614445882718e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29842609167099,
|
|
"step": 2150,
|
|
"valid_targets_mean": 8387.1,
|
|
"valid_targets_min": 7325
|
|
},
|
|
{
|
|
"epoch": 3.787346221441125,
|
|
"grad_norm": 0.33445920251252786,
|
|
"learning_rate": 2.064854102106014e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935226261615753,
|
|
"step": 2155,
|
|
"valid_targets_mean": 7921.4,
|
|
"valid_targets_min": 5375
|
|
},
|
|
{
|
|
"epoch": 3.796133567662566,
|
|
"grad_norm": 0.40128928390340646,
|
|
"learning_rate": 2.056092512551738e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3062954843044281,
|
|
"step": 2160,
|
|
"valid_targets_mean": 7697.6,
|
|
"valid_targets_min": 6973
|
|
},
|
|
{
|
|
"epoch": 3.8049209138840068,
|
|
"grad_norm": 0.3338208595633261,
|
|
"learning_rate": 2.0473298455206057e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3123404383659363,
|
|
"step": 2165,
|
|
"valid_targets_mean": 10194.6,
|
|
"valid_targets_min": 7270
|
|
},
|
|
{
|
|
"epoch": 3.8137082601054484,
|
|
"grad_norm": 0.36818192579005726,
|
|
"learning_rate": 2.0385662693340286e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.312699556350708,
|
|
"step": 2170,
|
|
"valid_targets_mean": 8341.1,
|
|
"valid_targets_min": 6541
|
|
},
|
|
{
|
|
"epoch": 3.822495606326889,
|
|
"grad_norm": 0.38235082701821926,
|
|
"learning_rate": 2.0298019523308827e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968581020832062,
|
|
"step": 2175,
|
|
"valid_targets_mean": 7806.9,
|
|
"valid_targets_min": 6590
|
|
},
|
|
{
|
|
"epoch": 3.8312829525483303,
|
|
"grad_norm": 0.35616734722609394,
|
|
"learning_rate": 2.0210370628642758e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3092958927154541,
|
|
"step": 2180,
|
|
"valid_targets_mean": 8949.9,
|
|
"valid_targets_min": 6994
|
|
},
|
|
{
|
|
"epoch": 3.8400702987697715,
|
|
"grad_norm": 0.36230882317460195,
|
|
"learning_rate": 2.0122717692983095e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30545303225517273,
|
|
"step": 2185,
|
|
"valid_targets_mean": 8836.3,
|
|
"valid_targets_min": 7478
|
|
},
|
|
{
|
|
"epoch": 3.8488576449912126,
|
|
"grad_norm": 0.369585223854352,
|
|
"learning_rate": 2.0035062400048487e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29880499839782715,
|
|
"step": 2190,
|
|
"valid_targets_mean": 7757.9,
|
|
"valid_targets_min": 7021
|
|
},
|
|
{
|
|
"epoch": 3.857644991212654,
|
|
"grad_norm": 0.355006341475912,
|
|
"learning_rate": 1.9947406433602875e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3043487071990967,
|
|
"step": 2195,
|
|
"valid_targets_mean": 8424.7,
|
|
"valid_targets_min": 7119
|
|
},
|
|
{
|
|
"epoch": 3.866432337434095,
|
|
"grad_norm": 0.35860000458499863,
|
|
"learning_rate": 1.985975147742313e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29896068572998047,
|
|
"step": 2200,
|
|
"valid_targets_mean": 7631.4,
|
|
"valid_targets_min": 6526
|
|
},
|
|
{
|
|
"epoch": 3.875219683655536,
|
|
"grad_norm": 0.3595590287255001,
|
|
"learning_rate": 1.9772099215266716e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3074376583099365,
|
|
"step": 2205,
|
|
"valid_targets_mean": 8199.1,
|
|
"valid_targets_min": 6673
|
|
},
|
|
{
|
|
"epoch": 3.884007029876977,
|
|
"grad_norm": 0.3337123618202419,
|
|
"learning_rate": 1.9684451330839336e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3159179091453552,
|
|
"step": 2210,
|
|
"valid_targets_mean": 8646.1,
|
|
"valid_targets_min": 7196
|
|
},
|
|
{
|
|
"epoch": 3.8927943760984185,
|
|
"grad_norm": 0.3375866724477434,
|
|
"learning_rate": 1.959680950776262e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30195510387420654,
|
|
"step": 2215,
|
|
"valid_targets_mean": 8551.9,
|
|
"valid_targets_min": 6344
|
|
},
|
|
{
|
|
"epoch": 3.9015817223198592,
|
|
"grad_norm": 0.3917560295159304,
|
|
"learning_rate": 1.950917542954176e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2983649671077728,
|
|
"step": 2220,
|
|
"valid_targets_mean": 8389.0,
|
|
"valid_targets_min": 6800
|
|
},
|
|
{
|
|
"epoch": 3.9103690685413004,
|
|
"grad_norm": 0.32762131645955955,
|
|
"learning_rate": 1.9421550779533173e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28323426842689514,
|
|
"step": 2225,
|
|
"valid_targets_mean": 7616.5,
|
|
"valid_targets_min": 7052
|
|
},
|
|
{
|
|
"epoch": 3.9191564147627416,
|
|
"grad_norm": 0.3590043216289149,
|
|
"learning_rate": 1.933393724091218e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843273878097534,
|
|
"step": 2230,
|
|
"valid_targets_mean": 7437.1,
|
|
"valid_targets_min": 6760
|
|
},
|
|
{
|
|
"epoch": 3.9279437609841827,
|
|
"grad_norm": 0.3377129248179601,
|
|
"learning_rate": 1.924633649664065e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3038652539253235,
|
|
"step": 2235,
|
|
"valid_targets_mean": 7922.2,
|
|
"valid_targets_min": 6763
|
|
},
|
|
{
|
|
"epoch": 3.936731107205624,
|
|
"grad_norm": 0.3795385389616872,
|
|
"learning_rate": 1.9158750229434703e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3075161576271057,
|
|
"step": 2240,
|
|
"valid_targets_mean": 7685.2,
|
|
"valid_targets_min": 6322
|
|
},
|
|
{
|
|
"epoch": 3.945518453427065,
|
|
"grad_norm": 0.34801917030646934,
|
|
"learning_rate": 1.9071180121732356e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30163663625717163,
|
|
"step": 2245,
|
|
"valid_targets_mean": 7734.0,
|
|
"valid_targets_min": 5781
|
|
},
|
|
{
|
|
"epoch": 3.9543057996485063,
|
|
"grad_norm": 0.3710316888949028,
|
|
"learning_rate": 1.898362785566122e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2933363914489746,
|
|
"step": 2250,
|
|
"valid_targets_mean": 7705.3,
|
|
"valid_targets_min": 7006
|
|
},
|
|
{
|
|
"epoch": 3.9630931458699474,
|
|
"grad_norm": 0.36369463382133627,
|
|
"learning_rate": 1.8896095113006203e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2916035056114197,
|
|
"step": 2255,
|
|
"valid_targets_mean": 7839.6,
|
|
"valid_targets_min": 7064
|
|
},
|
|
{
|
|
"epoch": 3.9718804920913886,
|
|
"grad_norm": 0.3602525091263,
|
|
"learning_rate": 1.8808583575177172e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2959211468696594,
|
|
"step": 2260,
|
|
"valid_targets_mean": 7561.2,
|
|
"valid_targets_min": 6388
|
|
},
|
|
{
|
|
"epoch": 3.9806678383128293,
|
|
"grad_norm": 0.3915867253300481,
|
|
"learning_rate": 1.8721094923176676e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.294384241104126,
|
|
"step": 2265,
|
|
"valid_targets_mean": 7656.9,
|
|
"valid_targets_min": 7114
|
|
},
|
|
{
|
|
"epoch": 3.9894551845342705,
|
|
"grad_norm": 0.33401393363437887,
|
|
"learning_rate": 1.863363083756766e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978602647781372,
|
|
"step": 2270,
|
|
"valid_targets_mean": 7726.6,
|
|
"valid_targets_min": 5183
|
|
},
|
|
{
|
|
"epoch": 3.9982425307557117,
|
|
"grad_norm": 0.3565162475058981,
|
|
"learning_rate": 1.854619299844117e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30783626437187195,
|
|
"step": 2275,
|
|
"valid_targets_mean": 7563.9,
|
|
"valid_targets_min": 6744
|
|
},
|
|
{
|
|
"epoch": 4.007029876977153,
|
|
"grad_norm": 0.3605847836372147,
|
|
"learning_rate": 1.845878308538408e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2928239107131958,
|
|
"step": 2280,
|
|
"valid_targets_mean": 7678.9,
|
|
"valid_targets_min": 6670
|
|
},
|
|
{
|
|
"epoch": 4.015817223198594,
|
|
"grad_norm": 0.3622677156022337,
|
|
"learning_rate": 1.8371402777446833e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29661375284194946,
|
|
"step": 2285,
|
|
"valid_targets_mean": 7731.3,
|
|
"valid_targets_min": 6753
|
|
},
|
|
{
|
|
"epoch": 4.024604569420035,
|
|
"grad_norm": 0.3476786557845217,
|
|
"learning_rate": 1.8284053753111205e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29199156165122986,
|
|
"step": 2290,
|
|
"valid_targets_mean": 8399.1,
|
|
"valid_targets_min": 6665
|
|
},
|
|
{
|
|
"epoch": 4.033391915641476,
|
|
"grad_norm": 0.36611453493835344,
|
|
"learning_rate": 1.8196737690258034e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935590147972107,
|
|
"step": 2295,
|
|
"valid_targets_mean": 7679.2,
|
|
"valid_targets_min": 5811
|
|
},
|
|
{
|
|
"epoch": 4.042179261862917,
|
|
"grad_norm": 0.3691938793291058,
|
|
"learning_rate": 1.810945626613501e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29997485876083374,
|
|
"step": 2300,
|
|
"valid_targets_mean": 7750.9,
|
|
"valid_targets_min": 6803
|
|
},
|
|
{
|
|
"epoch": 4.050966608084359,
|
|
"grad_norm": 0.31535572991582766,
|
|
"learning_rate": 1.8022211157324446e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30525609850883484,
|
|
"step": 2305,
|
|
"valid_targets_mean": 9915.7,
|
|
"valid_targets_min": 6960
|
|
},
|
|
{
|
|
"epoch": 4.059753954305799,
|
|
"grad_norm": 0.312524298994893,
|
|
"learning_rate": 1.7935004039711078e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.303733229637146,
|
|
"step": 2310,
|
|
"valid_targets_mean": 8488.7,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 4.068541300527241,
|
|
"grad_norm": 0.3380666197036135,
|
|
"learning_rate": 1.7847836588449873e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29684263467788696,
|
|
"step": 2315,
|
|
"valid_targets_mean": 7753.6,
|
|
"valid_targets_min": 6947
|
|
},
|
|
{
|
|
"epoch": 4.077328646748682,
|
|
"grad_norm": 0.3491856380684149,
|
|
"learning_rate": 1.7760710477933846e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.296021044254303,
|
|
"step": 2320,
|
|
"valid_targets_mean": 8416.9,
|
|
"valid_targets_min": 6714
|
|
},
|
|
{
|
|
"epoch": 4.086115992970123,
|
|
"grad_norm": 0.3377305735422485,
|
|
"learning_rate": 1.7673627381761907e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2817150056362152,
|
|
"step": 2325,
|
|
"valid_targets_mean": 8393.1,
|
|
"valid_targets_min": 6642
|
|
},
|
|
{
|
|
"epoch": 4.094903339191564,
|
|
"grad_norm": 0.3585848827276999,
|
|
"learning_rate": 1.7586588972706703e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930801510810852,
|
|
"step": 2330,
|
|
"valid_targets_mean": 7160.6,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 4.103690685413005,
|
|
"grad_norm": 0.3369104092071973,
|
|
"learning_rate": 1.749959692268249e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29731500148773193,
|
|
"step": 2335,
|
|
"valid_targets_mean": 8363.5,
|
|
"valid_targets_min": 6850
|
|
},
|
|
{
|
|
"epoch": 4.1124780316344465,
|
|
"grad_norm": 0.3539275994219718,
|
|
"learning_rate": 1.741265290271302e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28852784633636475,
|
|
"step": 2340,
|
|
"valid_targets_mean": 7780.0,
|
|
"valid_targets_min": 7083
|
|
},
|
|
{
|
|
"epoch": 4.121265377855887,
|
|
"grad_norm": 0.3607445678030254,
|
|
"learning_rate": 1.7325758582899442e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3095434010028839,
|
|
"step": 2345,
|
|
"valid_targets_mean": 7643.3,
|
|
"valid_targets_min": 6791
|
|
},
|
|
{
|
|
"epoch": 4.130052724077329,
|
|
"grad_norm": 0.3961086551803289,
|
|
"learning_rate": 1.7238915632388198e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3048168420791626,
|
|
"step": 2350,
|
|
"valid_targets_mean": 7675.9,
|
|
"valid_targets_min": 6670
|
|
},
|
|
{
|
|
"epoch": 4.1388400702987695,
|
|
"grad_norm": 0.3293769346899659,
|
|
"learning_rate": 1.7152125719339e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3010752201080322,
|
|
"step": 2355,
|
|
"valid_targets_mean": 9228.5,
|
|
"valid_targets_min": 6075
|
|
},
|
|
{
|
|
"epoch": 4.147627416520211,
|
|
"grad_norm": 0.35490134598296946,
|
|
"learning_rate": 1.7065390510892767e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289789080619812,
|
|
"step": 2360,
|
|
"valid_targets_mean": 7724.8,
|
|
"valid_targets_min": 6916
|
|
},
|
|
{
|
|
"epoch": 4.156414762741652,
|
|
"grad_norm": 0.35645129004789955,
|
|
"learning_rate": 1.69787116731396e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2860274314880371,
|
|
"step": 2365,
|
|
"valid_targets_mean": 7424.2,
|
|
"valid_targets_min": 3281
|
|
},
|
|
{
|
|
"epoch": 4.1652021089630935,
|
|
"grad_norm": 0.36641430970748606,
|
|
"learning_rate": 1.6892090871086772e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29338160157203674,
|
|
"step": 2370,
|
|
"valid_targets_mean": 7637.6,
|
|
"valid_targets_min": 7054
|
|
},
|
|
{
|
|
"epoch": 4.173989455184534,
|
|
"grad_norm": 0.3582363581783471,
|
|
"learning_rate": 1.680552976862676e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29446232318878174,
|
|
"step": 2375,
|
|
"valid_targets_mean": 7540.2,
|
|
"valid_targets_min": 6573
|
|
},
|
|
{
|
|
"epoch": 4.182776801405975,
|
|
"grad_norm": 0.3343312433323364,
|
|
"learning_rate": 1.671903002850528e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29724544286727905,
|
|
"step": 2380,
|
|
"valid_targets_mean": 7826.4,
|
|
"valid_targets_min": 7064
|
|
},
|
|
{
|
|
"epoch": 4.191564147627417,
|
|
"grad_norm": 0.34146365708841836,
|
|
"learning_rate": 1.6632593312289326e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28881704807281494,
|
|
"step": 2385,
|
|
"valid_targets_mean": 8713.6,
|
|
"valid_targets_min": 7182
|
|
},
|
|
{
|
|
"epoch": 4.200351493848857,
|
|
"grad_norm": 0.3793519885366359,
|
|
"learning_rate": 1.6546221280335283e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3088436424732208,
|
|
"step": 2390,
|
|
"valid_targets_mean": 7500.3,
|
|
"valid_targets_min": 6973
|
|
},
|
|
{
|
|
"epoch": 4.209138840070299,
|
|
"grad_norm": 0.3261860918301623,
|
|
"learning_rate": 1.6459915591757026e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2880277633666992,
|
|
"step": 2395,
|
|
"valid_targets_mean": 8629.6,
|
|
"valid_targets_min": 6451
|
|
},
|
|
{
|
|
"epoch": 4.21792618629174,
|
|
"grad_norm": 0.33627803896949293,
|
|
"learning_rate": 1.6373677904394038e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29634952545166016,
|
|
"step": 2400,
|
|
"valid_targets_mean": 8985.0,
|
|
"valid_targets_min": 6850
|
|
},
|
|
{
|
|
"epoch": 4.226713532513181,
|
|
"grad_norm": 0.35027190738818986,
|
|
"learning_rate": 1.628750987477957e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28951579332351685,
|
|
"step": 2405,
|
|
"valid_targets_mean": 7849.4,
|
|
"valid_targets_min": 7153
|
|
},
|
|
{
|
|
"epoch": 4.235500878734622,
|
|
"grad_norm": 0.3371494119478246,
|
|
"learning_rate": 1.6201413158108823e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29981040954589844,
|
|
"step": 2410,
|
|
"valid_targets_mean": 7603.4,
|
|
"valid_targets_min": 6746
|
|
},
|
|
{
|
|
"epoch": 4.244288224956064,
|
|
"grad_norm": 0.32995317258621537,
|
|
"learning_rate": 1.6115389408207168e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2904054522514343,
|
|
"step": 2415,
|
|
"valid_targets_mean": 8291.9,
|
|
"valid_targets_min": 6637
|
|
},
|
|
{
|
|
"epoch": 4.253075571177504,
|
|
"grad_norm": 0.3609691770401668,
|
|
"learning_rate": 1.6029440277498333e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953414022922516,
|
|
"step": 2420,
|
|
"valid_targets_mean": 7441.1,
|
|
"valid_targets_min": 4269
|
|
},
|
|
{
|
|
"epoch": 4.261862917398945,
|
|
"grad_norm": 0.34173193821882997,
|
|
"learning_rate": 1.594356741697271e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2966964840888977,
|
|
"step": 2425,
|
|
"valid_targets_mean": 8248.4,
|
|
"valid_targets_min": 3803
|
|
},
|
|
{
|
|
"epoch": 4.270650263620387,
|
|
"grad_norm": 0.37725438835202146,
|
|
"learning_rate": 1.5857772476155634e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29200655221939087,
|
|
"step": 2430,
|
|
"valid_targets_mean": 7750.5,
|
|
"valid_targets_min": 6770
|
|
},
|
|
{
|
|
"epoch": 4.279437609841827,
|
|
"grad_norm": 0.3496762203635566,
|
|
"learning_rate": 1.577205710307566e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2777150869369507,
|
|
"step": 2435,
|
|
"valid_targets_mean": 7676.3,
|
|
"valid_targets_min": 7223
|
|
},
|
|
{
|
|
"epoch": 4.288224956063269,
|
|
"grad_norm": 0.3885029388879682,
|
|
"learning_rate": 1.5686422944232952e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29064977169036865,
|
|
"step": 2440,
|
|
"valid_targets_mean": 7624.4,
|
|
"valid_targets_min": 6504
|
|
},
|
|
{
|
|
"epoch": 4.29701230228471,
|
|
"grad_norm": 0.34950891257387595,
|
|
"learning_rate": 1.5600871644567633e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3030955493450165,
|
|
"step": 2445,
|
|
"valid_targets_mean": 8336.4,
|
|
"valid_targets_min": 7183
|
|
},
|
|
{
|
|
"epoch": 4.305799648506151,
|
|
"grad_norm": 0.3758714793081841,
|
|
"learning_rate": 1.5515404847428185e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3020630478858948,
|
|
"step": 2450,
|
|
"valid_targets_mean": 7686.5,
|
|
"valid_targets_min": 6825
|
|
},
|
|
{
|
|
"epoch": 4.314586994727592,
|
|
"grad_norm": 0.3640031717897144,
|
|
"learning_rate": 1.5430024194539882e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913421392440796,
|
|
"step": 2455,
|
|
"valid_targets_mean": 8199.2,
|
|
"valid_targets_min": 6796
|
|
},
|
|
{
|
|
"epoch": 4.323374340949034,
|
|
"grad_norm": 0.34866882466970534,
|
|
"learning_rate": 1.534473132597327e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2923296093940735,
|
|
"step": 2460,
|
|
"valid_targets_mean": 7538.4,
|
|
"valid_targets_min": 6140
|
|
},
|
|
{
|
|
"epoch": 4.3321616871704745,
|
|
"grad_norm": 0.33398689156760114,
|
|
"learning_rate": 1.5259527880112656e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2966546416282654,
|
|
"step": 2465,
|
|
"valid_targets_mean": 7731.1,
|
|
"valid_targets_min": 6753
|
|
},
|
|
{
|
|
"epoch": 4.340949033391915,
|
|
"grad_norm": 0.3911676097472629,
|
|
"learning_rate": 1.5174415493624621e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30364346504211426,
|
|
"step": 2470,
|
|
"valid_targets_mean": 7488.8,
|
|
"valid_targets_min": 6749
|
|
},
|
|
{
|
|
"epoch": 4.349736379613357,
|
|
"grad_norm": 0.36769878489272523,
|
|
"learning_rate": 1.5089395801426596e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28855615854263306,
|
|
"step": 2475,
|
|
"valid_targets_mean": 7546.9,
|
|
"valid_targets_min": 6861
|
|
},
|
|
{
|
|
"epoch": 4.3585237258347975,
|
|
"grad_norm": 0.3412416489679569,
|
|
"learning_rate": 1.500447043665546e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2983751893043518,
|
|
"step": 2480,
|
|
"valid_targets_mean": 8474.9,
|
|
"valid_targets_min": 5952
|
|
},
|
|
{
|
|
"epoch": 4.367311072056239,
|
|
"grad_norm": 0.33005482718780915,
|
|
"learning_rate": 1.4919641030636171e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2833927869796753,
|
|
"step": 2485,
|
|
"valid_targets_mean": 8681.6,
|
|
"valid_targets_min": 7316
|
|
},
|
|
{
|
|
"epoch": 4.37609841827768,
|
|
"grad_norm": 0.3533106228979608,
|
|
"learning_rate": 1.4834909212850393e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28979846835136414,
|
|
"step": 2490,
|
|
"valid_targets_mean": 7579.9,
|
|
"valid_targets_min": 6740
|
|
},
|
|
{
|
|
"epoch": 4.3848857644991215,
|
|
"grad_norm": 0.3399076207790794,
|
|
"learning_rate": 1.475027661090525e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28529787063598633,
|
|
"step": 2495,
|
|
"valid_targets_mean": 7799.6,
|
|
"valid_targets_min": 7114
|
|
},
|
|
{
|
|
"epoch": 4.393673110720562,
|
|
"grad_norm": 0.312265946283216,
|
|
"learning_rate": 1.4665744850502035e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2880713641643524,
|
|
"step": 2500,
|
|
"valid_targets_mean": 8861.7,
|
|
"valid_targets_min": 6847
|
|
},
|
|
{
|
|
"epoch": 4.402460456942004,
|
|
"grad_norm": 0.3634215352563139,
|
|
"learning_rate": 1.4581315555404975e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29981136322021484,
|
|
"step": 2505,
|
|
"valid_targets_mean": 7694.9,
|
|
"valid_targets_min": 6288
|
|
},
|
|
{
|
|
"epoch": 4.411247803163445,
|
|
"grad_norm": 0.33381823311042097,
|
|
"learning_rate": 1.4496990347410056e-05,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3036734461784363,
|
|
"step": 2510,
|
|
"valid_targets_mean": 8840.7,
|
|
"valid_targets_min": 7064
|
|
},
|
|
{
|
|
"epoch": 4.420035149384886,
|
|
"grad_norm": 0.3609301558527897,
|
|
"learning_rate": 1.4412770846313857e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2926355004310608,
|
|
"step": 2515,
|
|
"valid_targets_mean": 7695.9,
|
|
"valid_targets_min": 6847
|
|
},
|
|
{
|
|
"epoch": 4.428822495606327,
|
|
"grad_norm": 0.34936926923864897,
|
|
"learning_rate": 1.432865866988245e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30009007453918457,
|
|
"step": 2520,
|
|
"valid_targets_mean": 8763.0,
|
|
"valid_targets_min": 7017
|
|
},
|
|
{
|
|
"epoch": 4.437609841827768,
|
|
"grad_norm": 0.36541173520341647,
|
|
"learning_rate": 1.4244655433820294e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28389137983322144,
|
|
"step": 2525,
|
|
"valid_targets_mean": 7828.1,
|
|
"valid_targets_min": 7234
|
|
},
|
|
{
|
|
"epoch": 4.446397188049209,
|
|
"grad_norm": 0.3235817236180097,
|
|
"learning_rate": 1.4160762751739245e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28554970026016235,
|
|
"step": 2530,
|
|
"valid_targets_mean": 8569.7,
|
|
"valid_targets_min": 6711
|
|
},
|
|
{
|
|
"epoch": 4.45518453427065,
|
|
"grad_norm": 0.37288004350052983,
|
|
"learning_rate": 1.4076982235127523e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2974032759666443,
|
|
"step": 2535,
|
|
"valid_targets_mean": 7637.2,
|
|
"valid_targets_min": 6968
|
|
},
|
|
{
|
|
"epoch": 4.463971880492092,
|
|
"grad_norm": 0.36992647709739473,
|
|
"learning_rate": 1.399331549331878e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29612836241722107,
|
|
"step": 2540,
|
|
"valid_targets_mean": 7321.6,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 4.472759226713532,
|
|
"grad_norm": 0.36203015252759363,
|
|
"learning_rate": 1.390976413346116e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28595858812332153,
|
|
"step": 2545,
|
|
"valid_targets_mean": 9075.9,
|
|
"valid_targets_min": 7270
|
|
},
|
|
{
|
|
"epoch": 4.481546572934974,
|
|
"grad_norm": 0.3399703611080894,
|
|
"learning_rate": 1.3826329760486462e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2931751608848572,
|
|
"step": 2550,
|
|
"valid_targets_mean": 7748.2,
|
|
"valid_targets_min": 7333
|
|
},
|
|
{
|
|
"epoch": 4.490333919156415,
|
|
"grad_norm": 0.3620118102981346,
|
|
"learning_rate": 1.3743013977079289e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2936304807662964,
|
|
"step": 2555,
|
|
"valid_targets_mean": 8341.4,
|
|
"valid_targets_min": 6909
|
|
},
|
|
{
|
|
"epoch": 4.499121265377856,
|
|
"grad_norm": 0.33151273542929843,
|
|
"learning_rate": 1.3659818383646242e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2981259524822235,
|
|
"step": 2560,
|
|
"valid_targets_mean": 7763.2,
|
|
"valid_targets_min": 7005
|
|
},
|
|
{
|
|
"epoch": 4.507908611599297,
|
|
"grad_norm": 0.3408783337428309,
|
|
"learning_rate": 1.3576744578285235e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30304306745529175,
|
|
"step": 2565,
|
|
"valid_targets_mean": 7672.8,
|
|
"valid_targets_min": 7217
|
|
},
|
|
{
|
|
"epoch": 4.516695957820739,
|
|
"grad_norm": 0.34010772862058963,
|
|
"learning_rate": 1.3493794156754744e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2977759540081024,
|
|
"step": 2570,
|
|
"valid_targets_mean": 7593.9,
|
|
"valid_targets_min": 6016
|
|
},
|
|
{
|
|
"epoch": 4.525483304042179,
|
|
"grad_norm": 0.34600139704727106,
|
|
"learning_rate": 1.3410968712443185e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2835003137588501,
|
|
"step": 2575,
|
|
"valid_targets_mean": 8259.8,
|
|
"valid_targets_min": 7135
|
|
},
|
|
{
|
|
"epoch": 4.53427065026362,
|
|
"grad_norm": 0.38063980661062224,
|
|
"learning_rate": 1.3328269836338285e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2990835905075073,
|
|
"step": 2580,
|
|
"valid_targets_mean": 7738.0,
|
|
"valid_targets_min": 7072
|
|
},
|
|
{
|
|
"epoch": 4.543057996485062,
|
|
"grad_norm": 0.3383400317506876,
|
|
"learning_rate": 1.3245699116996546e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2957487404346466,
|
|
"step": 2585,
|
|
"valid_targets_mean": 7810.8,
|
|
"valid_targets_min": 7374
|
|
},
|
|
{
|
|
"epoch": 4.551845342706502,
|
|
"grad_norm": 0.3411469261818419,
|
|
"learning_rate": 1.316325814051271e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2963276505470276,
|
|
"step": 2590,
|
|
"valid_targets_mean": 8551.5,
|
|
"valid_targets_min": 6880
|
|
},
|
|
{
|
|
"epoch": 4.560632688927944,
|
|
"grad_norm": 0.36267221964327634,
|
|
"learning_rate": 1.3080948490489282e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887016534805298,
|
|
"step": 2595,
|
|
"valid_targets_mean": 7742.6,
|
|
"valid_targets_min": 6916
|
|
},
|
|
{
|
|
"epoch": 4.569420035149385,
|
|
"grad_norm": 0.3192342527402402,
|
|
"learning_rate": 1.2998771748006153e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2999793589115143,
|
|
"step": 2600,
|
|
"valid_targets_mean": 9242.8,
|
|
"valid_targets_min": 6875
|
|
},
|
|
{
|
|
"epoch": 4.578207381370826,
|
|
"grad_norm": 0.35210138034589905,
|
|
"learning_rate": 1.2916729491590191e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29489219188690186,
|
|
"step": 2605,
|
|
"valid_targets_mean": 7644.7,
|
|
"valid_targets_min": 6891
|
|
},
|
|
{
|
|
"epoch": 4.586994727592267,
|
|
"grad_norm": 0.35250185727295646,
|
|
"learning_rate": 1.2834823297184932e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3076801300048828,
|
|
"step": 2610,
|
|
"valid_targets_mean": 7650.2,
|
|
"valid_targets_min": 6787
|
|
},
|
|
{
|
|
"epoch": 4.595782073813709,
|
|
"grad_norm": 0.3588600288438206,
|
|
"learning_rate": 1.2753054738120303e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28878769278526306,
|
|
"step": 2615,
|
|
"valid_targets_mean": 7817.5,
|
|
"valid_targets_min": 7146
|
|
},
|
|
{
|
|
"epoch": 4.6045694200351495,
|
|
"grad_norm": 0.34081363508124407,
|
|
"learning_rate": 1.267142538508241e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2886558771133423,
|
|
"step": 2620,
|
|
"valid_targets_mean": 7755.6,
|
|
"valid_targets_min": 6890
|
|
},
|
|
{
|
|
"epoch": 4.61335676625659,
|
|
"grad_norm": 0.3562288164925693,
|
|
"learning_rate": 1.258993680608337e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29327595233917236,
|
|
"step": 2625,
|
|
"valid_targets_mean": 7758.2,
|
|
"valid_targets_min": 6882
|
|
},
|
|
{
|
|
"epoch": 4.622144112478032,
|
|
"grad_norm": 0.35026490484322587,
|
|
"learning_rate": 1.2508590566431149e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29490119218826294,
|
|
"step": 2630,
|
|
"valid_targets_mean": 7648.2,
|
|
"valid_targets_min": 7020
|
|
},
|
|
{
|
|
"epoch": 4.6309314586994725,
|
|
"grad_norm": 0.3358053567829388,
|
|
"learning_rate": 1.2427388228699561e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2979782223701477,
|
|
"step": 2635,
|
|
"valid_targets_mean": 7701.8,
|
|
"valid_targets_min": 7259
|
|
},
|
|
{
|
|
"epoch": 4.639718804920914,
|
|
"grad_norm": 0.34062942673912844,
|
|
"learning_rate": 1.2346331352698206e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2931850850582123,
|
|
"step": 2640,
|
|
"valid_targets_mean": 7778.1,
|
|
"valid_targets_min": 6715
|
|
},
|
|
{
|
|
"epoch": 4.648506151142355,
|
|
"grad_norm": 0.3432994977430127,
|
|
"learning_rate": 1.2265421495442524e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29120543599128723,
|
|
"step": 2645,
|
|
"valid_targets_mean": 7699.0,
|
|
"valid_targets_min": 6975
|
|
},
|
|
{
|
|
"epoch": 4.6572934973637965,
|
|
"grad_norm": 0.3234100940050823,
|
|
"learning_rate": 1.2184660211123878e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2826187014579773,
|
|
"step": 2650,
|
|
"valid_targets_mean": 8331.6,
|
|
"valid_targets_min": 2881
|
|
},
|
|
{
|
|
"epoch": 4.666080843585237,
|
|
"grad_norm": 0.3376680880965843,
|
|
"learning_rate": 1.2104049051079706e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29047930240631104,
|
|
"step": 2655,
|
|
"valid_targets_mean": 7689.2,
|
|
"valid_targets_min": 6146
|
|
},
|
|
{
|
|
"epoch": 4.674868189806679,
|
|
"grad_norm": 0.33738468635114716,
|
|
"learning_rate": 1.2023589563763731e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29389703273773193,
|
|
"step": 2660,
|
|
"valid_targets_mean": 8689.8,
|
|
"valid_targets_min": 7342
|
|
},
|
|
{
|
|
"epoch": 4.68365553602812,
|
|
"grad_norm": 0.34363822100044406,
|
|
"learning_rate": 1.1943283294716181e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29355084896087646,
|
|
"step": 2665,
|
|
"valid_targets_mean": 7720.2,
|
|
"valid_targets_min": 6622
|
|
},
|
|
{
|
|
"epoch": 4.69244288224956,
|
|
"grad_norm": 0.3353093732478869,
|
|
"learning_rate": 1.1863131786534146e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2888862192630768,
|
|
"step": 2670,
|
|
"valid_targets_mean": 7782.6,
|
|
"valid_targets_min": 7169
|
|
},
|
|
{
|
|
"epoch": 4.701230228471002,
|
|
"grad_norm": 0.37334062851684074,
|
|
"learning_rate": 1.1783136578841926e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29228097200393677,
|
|
"step": 2675,
|
|
"valid_targets_mean": 7612.3,
|
|
"valid_targets_min": 6837
|
|
},
|
|
{
|
|
"epoch": 4.710017574692443,
|
|
"grad_norm": 0.3310503918639298,
|
|
"learning_rate": 1.1703299208261447e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2942769229412079,
|
|
"step": 2680,
|
|
"valid_targets_mean": 7763.1,
|
|
"valid_targets_min": 6840
|
|
},
|
|
{
|
|
"epoch": 4.718804920913884,
|
|
"grad_norm": 0.3395372943848242,
|
|
"learning_rate": 1.1623621208382759e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782164514064789,
|
|
"step": 2685,
|
|
"valid_targets_mean": 7669.9,
|
|
"valid_targets_min": 7123
|
|
},
|
|
{
|
|
"epoch": 4.727592267135325,
|
|
"grad_norm": 0.3804812360317412,
|
|
"learning_rate": 1.1544104109734578e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29553931951522827,
|
|
"step": 2690,
|
|
"valid_targets_mean": 7181.2,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 4.736379613356767,
|
|
"grad_norm": 0.3311195492100797,
|
|
"learning_rate": 1.1464749439754872e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2889626622200012,
|
|
"step": 2695,
|
|
"valid_targets_mean": 7681.3,
|
|
"valid_targets_min": 7275
|
|
},
|
|
{
|
|
"epoch": 4.745166959578207,
|
|
"grad_norm": 0.36177414094290317,
|
|
"learning_rate": 1.1385558722761525e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815544009208679,
|
|
"step": 2700,
|
|
"valid_targets_mean": 7833.3,
|
|
"valid_targets_min": 6433
|
|
},
|
|
{
|
|
"epoch": 4.753954305799649,
|
|
"grad_norm": 0.36212081133708707,
|
|
"learning_rate": 1.1306533479923065e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29274970293045044,
|
|
"step": 2705,
|
|
"valid_targets_mean": 7638.4,
|
|
"valid_targets_min": 6999
|
|
},
|
|
{
|
|
"epoch": 4.76274165202109,
|
|
"grad_norm": 0.3462449009677967,
|
|
"learning_rate": 1.1227675229229453e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29539358615875244,
|
|
"step": 2710,
|
|
"valid_targets_mean": 7612.1,
|
|
"valid_targets_min": 6986
|
|
},
|
|
{
|
|
"epoch": 4.77152899824253,
|
|
"grad_norm": 0.3535258801165218,
|
|
"learning_rate": 1.1148985485462882e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30066198110580444,
|
|
"step": 2715,
|
|
"valid_targets_mean": 8542.9,
|
|
"valid_targets_min": 6921
|
|
},
|
|
{
|
|
"epoch": 4.780316344463972,
|
|
"grad_norm": 0.3343590007067174,
|
|
"learning_rate": 1.1070465760168746e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772175073623657,
|
|
"step": 2720,
|
|
"valid_targets_mean": 7717.8,
|
|
"valid_targets_min": 6688
|
|
},
|
|
{
|
|
"epoch": 4.789103690685413,
|
|
"grad_norm": 0.3298794655275155,
|
|
"learning_rate": 1.0992117561626521e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3084215521812439,
|
|
"step": 2725,
|
|
"valid_targets_mean": 7942.3,
|
|
"valid_targets_min": 7273
|
|
},
|
|
{
|
|
"epoch": 4.797891036906854,
|
|
"grad_norm": 0.35627041067347104,
|
|
"learning_rate": 1.0913942394820882e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3061642646789551,
|
|
"step": 2730,
|
|
"valid_targets_mean": 8432.6,
|
|
"valid_targets_min": 6496
|
|
},
|
|
{
|
|
"epoch": 4.806678383128295,
|
|
"grad_norm": 0.3442328956232023,
|
|
"learning_rate": 1.0835941761412725e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.299866646528244,
|
|
"step": 2735,
|
|
"valid_targets_mean": 7752.8,
|
|
"valid_targets_min": 6153
|
|
},
|
|
{
|
|
"epoch": 4.815465729349737,
|
|
"grad_norm": 0.3253332284940007,
|
|
"learning_rate": 1.0758117159710343e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28083181381225586,
|
|
"step": 2740,
|
|
"valid_targets_mean": 8234.2,
|
|
"valid_targets_min": 6249
|
|
},
|
|
{
|
|
"epoch": 4.824253075571177,
|
|
"grad_norm": 0.33511730474947343,
|
|
"learning_rate": 1.0680470084640682e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2995944917201996,
|
|
"step": 2745,
|
|
"valid_targets_mean": 7667.0,
|
|
"valid_targets_min": 6791
|
|
},
|
|
{
|
|
"epoch": 4.833040421792619,
|
|
"grad_norm": 0.35583495623108913,
|
|
"learning_rate": 1.0603002027720544e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29249030351638794,
|
|
"step": 2750,
|
|
"valid_targets_mean": 7892.6,
|
|
"valid_targets_min": 7323
|
|
},
|
|
{
|
|
"epoch": 4.84182776801406,
|
|
"grad_norm": 0.32745370080185743,
|
|
"learning_rate": 1.0525714477028032e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2977753281593323,
|
|
"step": 2755,
|
|
"valid_targets_mean": 7558.4,
|
|
"valid_targets_min": 6979
|
|
},
|
|
{
|
|
"epoch": 4.8506151142355005,
|
|
"grad_norm": 0.3777877026804973,
|
|
"learning_rate": 1.044860891717388e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29842567443847656,
|
|
"step": 2760,
|
|
"valid_targets_mean": 7716.7,
|
|
"valid_targets_min": 7014
|
|
},
|
|
{
|
|
"epoch": 4.859402460456942,
|
|
"grad_norm": 0.35088020099221523,
|
|
"learning_rate": 1.0371686829273015e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2985210120677948,
|
|
"step": 2765,
|
|
"valid_targets_mean": 7565.1,
|
|
"valid_targets_min": 6922
|
|
},
|
|
{
|
|
"epoch": 4.868189806678383,
|
|
"grad_norm": 0.35904390189923535,
|
|
"learning_rate": 1.029494969091601e-05,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2912167012691498,
|
|
"step": 2770,
|
|
"valid_targets_mean": 7680.2,
|
|
"valid_targets_min": 6763
|
|
},
|
|
{
|
|
"epoch": 4.8769771528998245,
|
|
"grad_norm": 0.33201225829312675,
|
|
"learning_rate": 1.0218398976140809e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27736738324165344,
|
|
"step": 2775,
|
|
"valid_targets_mean": 7764.4,
|
|
"valid_targets_min": 6648
|
|
},
|
|
{
|
|
"epoch": 4.885764499121265,
|
|
"grad_norm": 0.35091376493216436,
|
|
"learning_rate": 1.0142036155404322e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956676483154297,
|
|
"step": 2780,
|
|
"valid_targets_mean": 7626.8,
|
|
"valid_targets_min": 6490
|
|
},
|
|
{
|
|
"epoch": 4.894551845342707,
|
|
"grad_norm": 0.3452862859027685,
|
|
"learning_rate": 1.0065862695554248e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2862079441547394,
|
|
"step": 2785,
|
|
"valid_targets_mean": 7456.8,
|
|
"valid_targets_min": 6528
|
|
},
|
|
{
|
|
"epoch": 4.9033391915641475,
|
|
"grad_norm": 0.33774992294028156,
|
|
"learning_rate": 9.989880059800832e-06,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29728958010673523,
|
|
"step": 2790,
|
|
"valid_targets_mean": 7651.0,
|
|
"valid_targets_min": 7114
|
|
},
|
|
{
|
|
"epoch": 4.912126537785589,
|
|
"grad_norm": 0.317502819584272,
|
|
"learning_rate": 9.914089707688835e-06,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291983962059021,
|
|
"step": 2795,
|
|
"valid_targets_mean": 8589.1,
|
|
"valid_targets_min": 7083
|
|
},
|
|
{
|
|
"epoch": 4.92091388400703,
|
|
"grad_norm": 0.3201813183564017,
|
|
"learning_rate": 9.838493095069418e-06,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711242735385895,
|
|
"step": 2800,
|
|
"valid_targets_mean": 8602.6,
|
|
"valid_targets_min": 6905
|
|
},
|
|
{
|
|
"epoch": 4.929701230228471,
|
|
"grad_norm": 0.3327302214786523,
|
|
"learning_rate": 9.763091674072225e-06,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2929172217845917,
|
|
"step": 2805,
|
|
"valid_targets_mean": 7599.5,
|
|
"valid_targets_min": 6065
|
|
},
|
|
{
|
|
"epoch": 4.938488576449912,
|
|
"grad_norm": 0.33452661554480145,
|
|
"learning_rate": 9.687886893077498e-06,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3053194284439087,
|
|
"step": 2810,
|
|
"valid_targets_mean": 7785.0,
|
|
"valid_targets_min": 6518
|
|
},
|
|
{
|
|
"epoch": 4.947275922671353,
|
|
"grad_norm": 0.35014091058387753,
|
|
"learning_rate": 9.612880196688207e-06,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2884902358055115,
|
|
"step": 2815,
|
|
"valid_targets_mean": 8222.5,
|
|
"valid_targets_min": 6895
|
|
},
|
|
{
|
|
"epoch": 4.956063268892795,
|
|
"grad_norm": 0.32704238423920323,
|
|
"learning_rate": 9.538073025702361e-06,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2998591363430023,
|
|
"step": 2820,
|
|
"valid_targets_mean": 7775.8,
|
|
"valid_targets_min": 6685
|
|
},
|
|
{
|
|
"epoch": 4.964850615114235,
|
|
"grad_norm": 0.3298336299808867,
|
|
"learning_rate": 9.463466817085268e-06,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2903066575527191,
|
|
"step": 2825,
|
|
"valid_targets_mean": 8447.8,
|
|
"valid_targets_min": 6800
|
|
},
|
|
{
|
|
"epoch": 4.973637961335677,
|
|
"grad_norm": 0.3354261754806601,
|
|
"learning_rate": 9.389063003941991e-06,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.288758784532547,
|
|
"step": 2830,
|
|
"valid_targets_mean": 7734.7,
|
|
"valid_targets_min": 6344
|
|
},
|
|
{
|
|
"epoch": 4.982425307557118,
|
|
"grad_norm": 0.3143248716857657,
|
|
"learning_rate": 9.314863015489781e-06,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.292439728975296,
|
|
"step": 2835,
|
|
"valid_targets_mean": 8252.8,
|
|
"valid_targets_min": 6557
|
|
},
|
|
{
|
|
"epoch": 4.991212653778559,
|
|
"grad_norm": 0.350198149823658,
|
|
"learning_rate": 9.240868277030637e-06,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28888189792633057,
|
|
"step": 2840,
|
|
"valid_targets_mean": 7591.3,
|
|
"valid_targets_min": 6767
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.3433654464608369,
|
|
"learning_rate": 9.16708020992392e-06,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28854185342788696,
|
|
"step": 2845,
|
|
"valid_targets_mean": 7708.3,
|
|
"valid_targets_min": 6862
|
|
},
|
|
{
|
|
"epoch": 5.008787346221441,
|
|
"grad_norm": 0.35408106613717644,
|
|
"learning_rate": 9.093500231559076e-06,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2872666120529175,
|
|
"step": 2850,
|
|
"valid_targets_mean": 7559.2,
|
|
"valid_targets_min": 6375
|
|
},
|
|
{
|
|
"epoch": 5.017574692442882,
|
|
"grad_norm": 0.34119774313343076,
|
|
"learning_rate": 9.020129755328369e-06,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2759493291378021,
|
|
"step": 2855,
|
|
"valid_targets_mean": 8318.3,
|
|
"valid_targets_min": 6850
|
|
},
|
|
{
|
|
"epoch": 5.026362038664323,
|
|
"grad_norm": 0.3390569587906135,
|
|
"learning_rate": 8.94697019059977e-06,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29109516739845276,
|
|
"step": 2860,
|
|
"valid_targets_mean": 7784.2,
|
|
"valid_targets_min": 5826
|
|
},
|
|
{
|
|
"epoch": 5.035149384885765,
|
|
"grad_norm": 0.3143917948420686,
|
|
"learning_rate": 8.874022942689844e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29419422149658203,
|
|
"step": 2865,
|
|
"valid_targets_mean": 9353.3,
|
|
"valid_targets_min": 6895
|
|
},
|
|
{
|
|
"epoch": 5.043936731107205,
|
|
"grad_norm": 0.3510435013122092,
|
|
"learning_rate": 8.801289412836811e-06,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27901768684387207,
|
|
"step": 2870,
|
|
"valid_targets_mean": 7662.0,
|
|
"valid_targets_min": 6862
|
|
},
|
|
{
|
|
"epoch": 5.052724077328647,
|
|
"grad_norm": 0.3664757048422861,
|
|
"learning_rate": 8.728770998173567e-06,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3106905221939087,
|
|
"step": 2875,
|
|
"valid_targets_mean": 8406.5,
|
|
"valid_targets_min": 7211
|
|
},
|
|
{
|
|
"epoch": 5.061511423550088,
|
|
"grad_norm": 0.34806791171223833,
|
|
"learning_rate": 8.656469091700882e-06,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856566607952118,
|
|
"step": 2880,
|
|
"valid_targets_mean": 8157.8,
|
|
"valid_targets_min": 6785
|
|
},
|
|
{
|
|
"epoch": 5.070298769771529,
|
|
"grad_norm": 0.3530629680648787,
|
|
"learning_rate": 8.584385082260658e-06,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898819148540497,
|
|
"step": 2885,
|
|
"valid_targets_mean": 7758.8,
|
|
"valid_targets_min": 6637
|
|
},
|
|
{
|
|
"epoch": 5.07908611599297,
|
|
"grad_norm": 0.3289134833546327,
|
|
"learning_rate": 8.512520354509196e-06,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844212055206299,
|
|
"step": 2890,
|
|
"valid_targets_mean": 8317.2,
|
|
"valid_targets_min": 6801
|
|
},
|
|
{
|
|
"epoch": 5.087873462214411,
|
|
"grad_norm": 0.3228881599417552,
|
|
"learning_rate": 8.440876288890663e-06,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3117586374282837,
|
|
"step": 2895,
|
|
"valid_targets_mean": 8214.1,
|
|
"valid_targets_min": 6788
|
|
},
|
|
{
|
|
"epoch": 5.0966608084358525,
|
|
"grad_norm": 0.35703463023856147,
|
|
"learning_rate": 8.369454261610516e-06,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3001853823661804,
|
|
"step": 2900,
|
|
"valid_targets_mean": 7620.3,
|
|
"valid_targets_min": 6337
|
|
},
|
|
{
|
|
"epoch": 5.105448154657293,
|
|
"grad_norm": 0.33663048411660607,
|
|
"learning_rate": 8.298255644609132e-06,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2892005443572998,
|
|
"step": 2905,
|
|
"valid_targets_mean": 8009.3,
|
|
"valid_targets_min": 7512
|
|
},
|
|
{
|
|
"epoch": 5.114235500878735,
|
|
"grad_norm": 0.35478320721312395,
|
|
"learning_rate": 8.227281805535361e-06,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28927919268608093,
|
|
"step": 2910,
|
|
"valid_targets_mean": 7800.7,
|
|
"valid_targets_min": 7136
|
|
},
|
|
{
|
|
"epoch": 5.1230228471001755,
|
|
"grad_norm": 0.31733616916369456,
|
|
"learning_rate": 8.156534107720362e-06,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2826761305332184,
|
|
"step": 2915,
|
|
"valid_targets_mean": 8438.5,
|
|
"valid_targets_min": 6931
|
|
},
|
|
{
|
|
"epoch": 5.131810193321617,
|
|
"grad_norm": 0.3272931666609286,
|
|
"learning_rate": 8.086013910151334e-06,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28670844435691833,
|
|
"step": 2920,
|
|
"valid_targets_mean": 7635.8,
|
|
"valid_targets_min": 4770
|
|
},
|
|
{
|
|
"epoch": 5.140597539543058,
|
|
"grad_norm": 0.32440078909074577,
|
|
"learning_rate": 8.015722567445463e-06,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28524067997932434,
|
|
"step": 2925,
|
|
"valid_targets_mean": 8409.3,
|
|
"valid_targets_min": 6404
|
|
},
|
|
{
|
|
"epoch": 5.1493848857644995,
|
|
"grad_norm": 0.32721388026046155,
|
|
"learning_rate": 7.94566142982385e-06,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29583433270454407,
|
|
"step": 2930,
|
|
"valid_targets_mean": 7790.6,
|
|
"valid_targets_min": 6812
|
|
},
|
|
{
|
|
"epoch": 5.15817223198594,
|
|
"grad_norm": 0.3363730851372013,
|
|
"learning_rate": 7.87583184308564e-06,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30835360288619995,
|
|
"step": 2935,
|
|
"valid_targets_mean": 7662.2,
|
|
"valid_targets_min": 7033
|
|
},
|
|
{
|
|
"epoch": 5.166959578207381,
|
|
"grad_norm": 0.3376536343834321,
|
|
"learning_rate": 7.806235148582102e-06,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28265395760536194,
|
|
"step": 2940,
|
|
"valid_targets_mean": 7519.6,
|
|
"valid_targets_min": 6717
|
|
},
|
|
{
|
|
"epoch": 5.175746924428823,
|
|
"grad_norm": 0.33596721819738473,
|
|
"learning_rate": 7.736872683190906e-06,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899321913719177,
|
|
"step": 2945,
|
|
"valid_targets_mean": 7700.2,
|
|
"valid_targets_min": 6626
|
|
},
|
|
{
|
|
"epoch": 5.184534270650263,
|
|
"grad_norm": 0.33848326395647726,
|
|
"learning_rate": 7.66774577929045e-06,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899055480957031,
|
|
"step": 2950,
|
|
"valid_targets_mean": 7385.9,
|
|
"valid_targets_min": 6481
|
|
},
|
|
{
|
|
"epoch": 5.193321616871705,
|
|
"grad_norm": 0.31778300423572214,
|
|
"learning_rate": 7.598855764734217e-06,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29572445154190063,
|
|
"step": 2955,
|
|
"valid_targets_mean": 8673.2,
|
|
"valid_targets_min": 6638
|
|
},
|
|
{
|
|
"epoch": 5.202108963093146,
|
|
"grad_norm": 0.36268069305068357,
|
|
"learning_rate": 7.530203962825331e-06,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2874404191970825,
|
|
"step": 2960,
|
|
"valid_targets_mean": 7848.9,
|
|
"valid_targets_min": 7071
|
|
},
|
|
{
|
|
"epoch": 5.210896309314587,
|
|
"grad_norm": 0.3466580768009033,
|
|
"learning_rate": 7.461791692291085e-06,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2909945845603943,
|
|
"step": 2965,
|
|
"valid_targets_mean": 7791.6,
|
|
"valid_targets_min": 6440
|
|
},
|
|
{
|
|
"epoch": 5.219683655536028,
|
|
"grad_norm": 0.3547485147268179,
|
|
"learning_rate": 7.3936202672576485e-06,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2970777750015259,
|
|
"step": 2970,
|
|
"valid_targets_mean": 7487.3,
|
|
"valid_targets_min": 6296
|
|
},
|
|
{
|
|
"epoch": 5.22847100175747,
|
|
"grad_norm": 0.3496834096686977,
|
|
"learning_rate": 7.3256909972247994e-06,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900811433792114,
|
|
"step": 2975,
|
|
"valid_targets_mean": 7673.6,
|
|
"valid_targets_min": 6741
|
|
},
|
|
{
|
|
"epoch": 5.23725834797891,
|
|
"grad_norm": 0.35893075162077176,
|
|
"learning_rate": 7.258005187040773e-06,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27755802869796753,
|
|
"step": 2980,
|
|
"valid_targets_mean": 7717.8,
|
|
"valid_targets_min": 6803
|
|
},
|
|
{
|
|
"epoch": 5.246045694200351,
|
|
"grad_norm": 0.32560513681456177,
|
|
"learning_rate": 7.190564136877207e-06,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2886160612106323,
|
|
"step": 2985,
|
|
"valid_targets_mean": 8614.1,
|
|
"valid_targets_min": 7106
|
|
},
|
|
{
|
|
"epoch": 5.254833040421793,
|
|
"grad_norm": 0.33155285929884837,
|
|
"learning_rate": 7.123369142204175e-06,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29632484912872314,
|
|
"step": 2990,
|
|
"valid_targets_mean": 8658.5,
|
|
"valid_targets_min": 6773
|
|
},
|
|
{
|
|
"epoch": 5.263620386643233,
|
|
"grad_norm": 0.32276511243341377,
|
|
"learning_rate": 7.0564214937652645e-06,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822602689266205,
|
|
"step": 2995,
|
|
"valid_targets_mean": 7759.1,
|
|
"valid_targets_min": 6526
|
|
},
|
|
{
|
|
"epoch": 5.272407732864675,
|
|
"grad_norm": 0.3225226923127115,
|
|
"learning_rate": 6.989722477552836e-06,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978208661079407,
|
|
"step": 3000,
|
|
"valid_targets_mean": 8548.8,
|
|
"valid_targets_min": 6610
|
|
},
|
|
{
|
|
"epoch": 5.281195079086116,
|
|
"grad_norm": 0.3205604405307989,
|
|
"learning_rate": 6.923273374783264e-06,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28816479444503784,
|
|
"step": 3005,
|
|
"valid_targets_mean": 7833.6,
|
|
"valid_targets_min": 7200
|
|
},
|
|
{
|
|
"epoch": 5.289982425307557,
|
|
"grad_norm": 0.3340979255742489,
|
|
"learning_rate": 6.857075461872382e-06,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28635507822036743,
|
|
"step": 3010,
|
|
"valid_targets_mean": 7720.8,
|
|
"valid_targets_min": 6787
|
|
},
|
|
{
|
|
"epoch": 5.298769771528998,
|
|
"grad_norm": 0.3298534039786586,
|
|
"learning_rate": 6.7911300104109155e-06,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2974523901939392,
|
|
"step": 3015,
|
|
"valid_targets_mean": 7698.7,
|
|
"valid_targets_min": 6677
|
|
},
|
|
{
|
|
"epoch": 5.30755711775044,
|
|
"grad_norm": 0.31223988004936154,
|
|
"learning_rate": 6.725438287140076e-06,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27365636825561523,
|
|
"step": 3020,
|
|
"valid_targets_mean": 7658.8,
|
|
"valid_targets_min": 7101
|
|
},
|
|
{
|
|
"epoch": 5.31634446397188,
|
|
"grad_norm": 0.36712903682229575,
|
|
"learning_rate": 6.6600015539272535e-06,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002887964248657,
|
|
"step": 3025,
|
|
"valid_targets_mean": 8573.8,
|
|
"valid_targets_min": 6856
|
|
},
|
|
{
|
|
"epoch": 5.325131810193321,
|
|
"grad_norm": 0.3245992026219487,
|
|
"learning_rate": 6.594821067741719e-06,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2914707362651825,
|
|
"step": 3030,
|
|
"valid_targets_mean": 7622.3,
|
|
"valid_targets_min": 6711
|
|
},
|
|
{
|
|
"epoch": 5.333919156414763,
|
|
"grad_norm": 0.39660825238437414,
|
|
"learning_rate": 6.529898080630552e-06,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2940986752510071,
|
|
"step": 3035,
|
|
"valid_targets_mean": 7386.1,
|
|
"valid_targets_min": 6117
|
|
},
|
|
{
|
|
"epoch": 5.3427065026362035,
|
|
"grad_norm": 0.3520229343804998,
|
|
"learning_rate": 6.465233839694511e-06,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2902158498764038,
|
|
"step": 3040,
|
|
"valid_targets_mean": 7599.2,
|
|
"valid_targets_min": 6327
|
|
},
|
|
{
|
|
"epoch": 5.351493848857645,
|
|
"grad_norm": 0.3448557608828672,
|
|
"learning_rate": 6.400829587064163e-06,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28602784872055054,
|
|
"step": 3045,
|
|
"valid_targets_mean": 7692.1,
|
|
"valid_targets_min": 6804
|
|
},
|
|
{
|
|
"epoch": 5.360281195079086,
|
|
"grad_norm": 0.32636497092417743,
|
|
"learning_rate": 6.3366865598759285e-06,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29605281352996826,
|
|
"step": 3050,
|
|
"valid_targets_mean": 9081.4,
|
|
"valid_targets_min": 6809
|
|
},
|
|
{
|
|
"epoch": 5.3690685413005275,
|
|
"grad_norm": 0.3248539671966316,
|
|
"learning_rate": 6.272805990248412e-06,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.288605272769928,
|
|
"step": 3055,
|
|
"valid_targets_mean": 8345.0,
|
|
"valid_targets_min": 6517
|
|
},
|
|
{
|
|
"epoch": 5.377855887521968,
|
|
"grad_norm": 0.3735371901400926,
|
|
"learning_rate": 6.209189105258661e-06,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30450695753097534,
|
|
"step": 3060,
|
|
"valid_targets_mean": 7874.4,
|
|
"valid_targets_min": 6518
|
|
},
|
|
{
|
|
"epoch": 5.38664323374341,
|
|
"grad_norm": 0.3163172048772757,
|
|
"learning_rate": 6.145837126918652e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29120033979415894,
|
|
"step": 3065,
|
|
"valid_targets_mean": 7761.3,
|
|
"valid_targets_min": 6781
|
|
},
|
|
{
|
|
"epoch": 5.3954305799648505,
|
|
"grad_norm": 0.33063399197041154,
|
|
"learning_rate": 6.082751272151759e-06,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947753071784973,
|
|
"step": 3070,
|
|
"valid_targets_mean": 8314.8,
|
|
"valid_targets_min": 5829
|
|
},
|
|
{
|
|
"epoch": 5.404217926186292,
|
|
"grad_norm": 0.31418920675480283,
|
|
"learning_rate": 6.019932752769437e-06,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27883777022361755,
|
|
"step": 3075,
|
|
"valid_targets_mean": 8160.8,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 5.413005272407733,
|
|
"grad_norm": 0.32573627836397306,
|
|
"learning_rate": 5.957382775447897e-06,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2857188284397125,
|
|
"step": 3080,
|
|
"valid_targets_mean": 7901.0,
|
|
"valid_targets_min": 7341
|
|
},
|
|
{
|
|
"epoch": 5.421792618629174,
|
|
"grad_norm": 0.3271273381515411,
|
|
"learning_rate": 5.895102541704944e-06,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29406070709228516,
|
|
"step": 3085,
|
|
"valid_targets_mean": 8175.0,
|
|
"valid_targets_min": 6945
|
|
},
|
|
{
|
|
"epoch": 5.430579964850615,
|
|
"grad_norm": 0.3293770941002359,
|
|
"learning_rate": 5.833093247876918e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789419889450073,
|
|
"step": 3090,
|
|
"valid_targets_mean": 7789.3,
|
|
"valid_targets_min": 7058
|
|
},
|
|
{
|
|
"epoch": 5.439367311072056,
|
|
"grad_norm": 0.323465702779747,
|
|
"learning_rate": 5.771356085095669e-06,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898038923740387,
|
|
"step": 3095,
|
|
"valid_targets_mean": 7763.9,
|
|
"valid_targets_min": 6742
|
|
},
|
|
{
|
|
"epoch": 5.448154657293498,
|
|
"grad_norm": 0.32735629336514316,
|
|
"learning_rate": 5.709892239265733e-06,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28642818331718445,
|
|
"step": 3100,
|
|
"valid_targets_mean": 7992.3,
|
|
"valid_targets_min": 6744
|
|
},
|
|
{
|
|
"epoch": 5.456942003514938,
|
|
"grad_norm": 0.3298517561331294,
|
|
"learning_rate": 5.648702891041486e-06,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27885934710502625,
|
|
"step": 3105,
|
|
"valid_targets_mean": 7718.9,
|
|
"valid_targets_min": 6823
|
|
},
|
|
{
|
|
"epoch": 5.46572934973638,
|
|
"grad_norm": 0.30381787832315504,
|
|
"learning_rate": 5.5877892158045335e-06,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.297435998916626,
|
|
"step": 3110,
|
|
"valid_targets_mean": 8274.4,
|
|
"valid_targets_min": 5183
|
|
},
|
|
{
|
|
"epoch": 5.474516695957821,
|
|
"grad_norm": 0.33243040819665437,
|
|
"learning_rate": 5.527152383641074e-06,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2871898412704468,
|
|
"step": 3115,
|
|
"valid_targets_mean": 7849.6,
|
|
"valid_targets_min": 7020
|
|
},
|
|
{
|
|
"epoch": 5.483304042179262,
|
|
"grad_norm": 0.31558816610328144,
|
|
"learning_rate": 5.4667935593194635e-06,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.280933678150177,
|
|
"step": 3120,
|
|
"valid_targets_mean": 7688.6,
|
|
"valid_targets_min": 6730
|
|
},
|
|
{
|
|
"epoch": 5.492091388400703,
|
|
"grad_norm": 0.3356246422039742,
|
|
"learning_rate": 5.406713902267812e-06,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29142051935195923,
|
|
"step": 3125,
|
|
"valid_targets_mean": 7132.3,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 5.500878734622145,
|
|
"grad_norm": 0.3252334996601614,
|
|
"learning_rate": 5.346914566551746e-06,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2894833981990814,
|
|
"step": 3130,
|
|
"valid_targets_mean": 7597.6,
|
|
"valid_targets_min": 6517
|
|
},
|
|
{
|
|
"epoch": 5.509666080843585,
|
|
"grad_norm": 0.30440359940776446,
|
|
"learning_rate": 5.2873967008522006e-06,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2687520980834961,
|
|
"step": 3135,
|
|
"valid_targets_mean": 8643.2,
|
|
"valid_targets_min": 6955
|
|
},
|
|
{
|
|
"epoch": 5.518453427065026,
|
|
"grad_norm": 0.33056583512431303,
|
|
"learning_rate": 5.228161448443394e-06,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3093627691268921,
|
|
"step": 3140,
|
|
"valid_targets_mean": 7529.2,
|
|
"valid_targets_min": 6591
|
|
},
|
|
{
|
|
"epoch": 5.527240773286468,
|
|
"grad_norm": 0.3267302005382248,
|
|
"learning_rate": 5.169209947170824e-06,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30785757303237915,
|
|
"step": 3145,
|
|
"valid_targets_mean": 8656.3,
|
|
"valid_targets_min": 6753
|
|
},
|
|
{
|
|
"epoch": 5.536028119507908,
|
|
"grad_norm": 0.33200499804961403,
|
|
"learning_rate": 5.1105433294294605e-06,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28961071372032166,
|
|
"step": 3150,
|
|
"valid_targets_mean": 7703.8,
|
|
"valid_targets_min": 6934
|
|
},
|
|
{
|
|
"epoch": 5.54481546572935,
|
|
"grad_norm": 0.33658932971818206,
|
|
"learning_rate": 5.052162722141946e-06,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898900806903839,
|
|
"step": 3155,
|
|
"valid_targets_mean": 7618.9,
|
|
"valid_targets_min": 6719
|
|
},
|
|
{
|
|
"epoch": 5.553602811950791,
|
|
"grad_norm": 0.3104483049157492,
|
|
"learning_rate": 4.994069246736973e-06,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733854055404663,
|
|
"step": 3160,
|
|
"valid_targets_mean": 7678.3,
|
|
"valid_targets_min": 6911
|
|
},
|
|
{
|
|
"epoch": 5.562390158172232,
|
|
"grad_norm": 0.335513612906866,
|
|
"learning_rate": 4.9362640191277526e-06,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28684091567993164,
|
|
"step": 3165,
|
|
"valid_targets_mean": 7808.9,
|
|
"valid_targets_min": 6153
|
|
},
|
|
{
|
|
"epoch": 5.571177504393673,
|
|
"grad_norm": 0.3165281390328353,
|
|
"learning_rate": 4.878748149690544e-06,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29210197925567627,
|
|
"step": 3170,
|
|
"valid_targets_mean": 8098.0,
|
|
"valid_targets_min": 7111
|
|
},
|
|
{
|
|
"epoch": 5.579964850615115,
|
|
"grad_norm": 0.33012053945102526,
|
|
"learning_rate": 4.821522743243377e-06,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29123449325561523,
|
|
"step": 3175,
|
|
"valid_targets_mean": 7574.3,
|
|
"valid_targets_min": 6840
|
|
},
|
|
{
|
|
"epoch": 5.588752196836555,
|
|
"grad_norm": 0.31868590805039276,
|
|
"learning_rate": 4.764588899024763e-06,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26774659752845764,
|
|
"step": 3180,
|
|
"valid_targets_mean": 7745.1,
|
|
"valid_targets_min": 6969
|
|
},
|
|
{
|
|
"epoch": 5.597539543057996,
|
|
"grad_norm": 0.3030971493993999,
|
|
"learning_rate": 4.7079477106726576e-06,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2700444459915161,
|
|
"step": 3185,
|
|
"valid_targets_mean": 8861.1,
|
|
"valid_targets_min": 6313
|
|
},
|
|
{
|
|
"epoch": 5.606326889279438,
|
|
"grad_norm": 0.31712976658654685,
|
|
"learning_rate": 4.6516002662033645e-06,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27993258833885193,
|
|
"step": 3190,
|
|
"valid_targets_mean": 7806.2,
|
|
"valid_targets_min": 6749
|
|
},
|
|
{
|
|
"epoch": 5.6151142355008785,
|
|
"grad_norm": 0.3123065680907856,
|
|
"learning_rate": 4.595547647990725e-06,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28896477818489075,
|
|
"step": 3195,
|
|
"valid_targets_mean": 8011.1,
|
|
"valid_targets_min": 6719
|
|
},
|
|
{
|
|
"epoch": 5.62390158172232,
|
|
"grad_norm": 0.32853539593068376,
|
|
"learning_rate": 4.53979093274526e-06,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29997682571411133,
|
|
"step": 3200,
|
|
"valid_targets_mean": 8496.4,
|
|
"valid_targets_min": 6859
|
|
},
|
|
{
|
|
"epoch": 5.632688927943761,
|
|
"grad_norm": 0.341458589626131,
|
|
"learning_rate": 4.484331191493532e-06,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28483325242996216,
|
|
"step": 3205,
|
|
"valid_targets_mean": 8154.6,
|
|
"valid_targets_min": 7068
|
|
},
|
|
{
|
|
"epoch": 5.6414762741652025,
|
|
"grad_norm": 0.31495834405447776,
|
|
"learning_rate": 4.429169489557528e-06,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851713001728058,
|
|
"step": 3210,
|
|
"valid_targets_mean": 7686.2,
|
|
"valid_targets_min": 6387
|
|
},
|
|
{
|
|
"epoch": 5.650263620386643,
|
|
"grad_norm": 0.29999185071692785,
|
|
"learning_rate": 4.374306886534248e-06,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28340470790863037,
|
|
"step": 3215,
|
|
"valid_targets_mean": 8562.3,
|
|
"valid_targets_min": 6326
|
|
},
|
|
{
|
|
"epoch": 5.659050966608085,
|
|
"grad_norm": 0.3231664440148963,
|
|
"learning_rate": 4.3197444362752994e-06,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838216722011566,
|
|
"step": 3220,
|
|
"valid_targets_mean": 7336.6,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 5.6678383128295255,
|
|
"grad_norm": 0.30672227936405144,
|
|
"learning_rate": 4.265483186866688e-06,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630815804004669,
|
|
"step": 3225,
|
|
"valid_targets_mean": 8641.6,
|
|
"valid_targets_min": 6861
|
|
},
|
|
{
|
|
"epoch": 5.676625659050966,
|
|
"grad_norm": 0.3377876014310014,
|
|
"learning_rate": 4.211524180608672e-06,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870168685913086,
|
|
"step": 3230,
|
|
"valid_targets_mean": 7735.8,
|
|
"valid_targets_min": 6915
|
|
},
|
|
{
|
|
"epoch": 5.685413005272408,
|
|
"grad_norm": 0.31001235576301167,
|
|
"learning_rate": 4.157868453995757e-06,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28175193071365356,
|
|
"step": 3235,
|
|
"valid_targets_mean": 8350.0,
|
|
"valid_targets_min": 6625
|
|
},
|
|
{
|
|
"epoch": 5.694200351493849,
|
|
"grad_norm": 0.31083908749481165,
|
|
"learning_rate": 4.104517037696749e-06,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27868616580963135,
|
|
"step": 3240,
|
|
"valid_targets_mean": 8633.8,
|
|
"valid_targets_min": 7194
|
|
},
|
|
{
|
|
"epoch": 5.70298769771529,
|
|
"grad_norm": 0.32191431288817535,
|
|
"learning_rate": 4.051470956535004e-06,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29141765832901,
|
|
"step": 3245,
|
|
"valid_targets_mean": 7639.6,
|
|
"valid_targets_min": 6344
|
|
},
|
|
{
|
|
"epoch": 5.711775043936731,
|
|
"grad_norm": 0.3396193057167186,
|
|
"learning_rate": 3.998731229468697e-06,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2942695915699005,
|
|
"step": 3250,
|
|
"valid_targets_mean": 7783.9,
|
|
"valid_targets_min": 7334
|
|
},
|
|
{
|
|
"epoch": 5.720562390158173,
|
|
"grad_norm": 0.3092604067668136,
|
|
"learning_rate": 3.94629886957129e-06,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28806155920028687,
|
|
"step": 3255,
|
|
"valid_targets_mean": 7493.6,
|
|
"valid_targets_min": 5869
|
|
},
|
|
{
|
|
"epoch": 5.729349736379613,
|
|
"grad_norm": 0.3292858311037341,
|
|
"learning_rate": 3.8941748840120404e-06,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.290735125541687,
|
|
"step": 3260,
|
|
"valid_targets_mean": 7823.6,
|
|
"valid_targets_min": 7275
|
|
},
|
|
{
|
|
"epoch": 5.738137082601055,
|
|
"grad_norm": 0.32174230635297546,
|
|
"learning_rate": 3.842360274036658e-06,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30042213201522827,
|
|
"step": 3265,
|
|
"valid_targets_mean": 7548.5,
|
|
"valid_targets_min": 6556
|
|
},
|
|
{
|
|
"epoch": 5.746924428822496,
|
|
"grad_norm": 0.3150931499885214,
|
|
"learning_rate": 3.7908560349481072e-06,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.296764612197876,
|
|
"step": 3270,
|
|
"valid_targets_mean": 8671.5,
|
|
"valid_targets_min": 6838
|
|
},
|
|
{
|
|
"epoch": 5.755711775043936,
|
|
"grad_norm": 0.3251774095699638,
|
|
"learning_rate": 3.7396631560874296e-06,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913369834423065,
|
|
"step": 3275,
|
|
"valid_targets_mean": 7595.5,
|
|
"valid_targets_min": 4779
|
|
},
|
|
{
|
|
"epoch": 5.764499121265378,
|
|
"grad_norm": 0.29806093139480805,
|
|
"learning_rate": 3.6887826208147968e-06,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2796759605407715,
|
|
"step": 3280,
|
|
"valid_targets_mean": 8333.2,
|
|
"valid_targets_min": 6872
|
|
},
|
|
{
|
|
"epoch": 5.773286467486819,
|
|
"grad_norm": 0.32196531471289125,
|
|
"learning_rate": 3.6382154064905794e-06,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2924780547618866,
|
|
"step": 3285,
|
|
"valid_targets_mean": 7810.5,
|
|
"valid_targets_min": 6153
|
|
},
|
|
{
|
|
"epoch": 5.78207381370826,
|
|
"grad_norm": 0.31002178625719085,
|
|
"learning_rate": 3.587962484456611e-06,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2780643403530121,
|
|
"step": 3290,
|
|
"valid_targets_mean": 7344.2,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 5.790861159929701,
|
|
"grad_norm": 0.3337355043722275,
|
|
"learning_rate": 3.538024820017467e-06,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.294627845287323,
|
|
"step": 3295,
|
|
"valid_targets_mean": 8312.8,
|
|
"valid_targets_min": 7030
|
|
},
|
|
{
|
|
"epoch": 5.799648506151143,
|
|
"grad_norm": 0.312698283528566,
|
|
"learning_rate": 3.488403372422011e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2884920835494995,
|
|
"step": 3300,
|
|
"valid_targets_mean": 8300.7,
|
|
"valid_targets_min": 6396
|
|
},
|
|
{
|
|
"epoch": 5.808435852372583,
|
|
"grad_norm": 0.3091039350887255,
|
|
"learning_rate": 3.4390990948448867e-06,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812803387641907,
|
|
"step": 3305,
|
|
"valid_targets_mean": 8493.2,
|
|
"valid_targets_min": 6773
|
|
},
|
|
{
|
|
"epoch": 5.817223198594025,
|
|
"grad_norm": 0.31877137936357625,
|
|
"learning_rate": 3.390112934368266e-06,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27986976504325867,
|
|
"step": 3310,
|
|
"valid_targets_mean": 7588.8,
|
|
"valid_targets_min": 6270
|
|
},
|
|
{
|
|
"epoch": 5.826010544815466,
|
|
"grad_norm": 0.3171231931848898,
|
|
"learning_rate": 3.3414458319636124e-06,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28334343433380127,
|
|
"step": 3315,
|
|
"valid_targets_mean": 7865.8,
|
|
"valid_targets_min": 7421
|
|
},
|
|
{
|
|
"epoch": 5.8347978910369065,
|
|
"grad_norm": 0.351307724929885,
|
|
"learning_rate": 3.2930987224736465e-06,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002662658691406,
|
|
"step": 3320,
|
|
"valid_targets_mean": 6947.4,
|
|
"valid_targets_min": 2881
|
|
},
|
|
{
|
|
"epoch": 5.843585237258348,
|
|
"grad_norm": 0.33358711919009776,
|
|
"learning_rate": 3.2450725345943577e-06,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27923858165740967,
|
|
"step": 3325,
|
|
"valid_targets_mean": 7789.2,
|
|
"valid_targets_min": 6921
|
|
},
|
|
{
|
|
"epoch": 5.852372583479789,
|
|
"grad_norm": 0.3414208492312931,
|
|
"learning_rate": 3.197368190857173e-06,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2987247705459595,
|
|
"step": 3330,
|
|
"valid_targets_mean": 7808.4,
|
|
"valid_targets_min": 6834
|
|
},
|
|
{
|
|
"epoch": 5.8611599297012305,
|
|
"grad_norm": 0.3348762946692922,
|
|
"learning_rate": 3.149986607611253e-06,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956831455230713,
|
|
"step": 3335,
|
|
"valid_targets_mean": 7496.1,
|
|
"valid_targets_min": 6456
|
|
},
|
|
{
|
|
"epoch": 5.869947275922671,
|
|
"grad_norm": 0.3448410739256257,
|
|
"learning_rate": 3.102928695005858e-06,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887445390224457,
|
|
"step": 3340,
|
|
"valid_targets_mean": 7771.7,
|
|
"valid_targets_min": 7171
|
|
},
|
|
{
|
|
"epoch": 5.878734622144113,
|
|
"grad_norm": 0.3043444067789503,
|
|
"learning_rate": 3.0561953569729064e-06,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28372079133987427,
|
|
"step": 3345,
|
|
"valid_targets_mean": 7517.8,
|
|
"valid_targets_min": 6720
|
|
},
|
|
{
|
|
"epoch": 5.8875219683655535,
|
|
"grad_norm": 0.3164874212248818,
|
|
"learning_rate": 3.0097874912095636e-06,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29748934507369995,
|
|
"step": 3350,
|
|
"valid_targets_mean": 8516.9,
|
|
"valid_targets_min": 7389
|
|
},
|
|
{
|
|
"epoch": 5.896309314586995,
|
|
"grad_norm": 0.32852926202032856,
|
|
"learning_rate": 2.9637059891610452e-06,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930929660797119,
|
|
"step": 3355,
|
|
"valid_targets_mean": 7919.1,
|
|
"valid_targets_min": 7175
|
|
},
|
|
{
|
|
"epoch": 5.905096660808436,
|
|
"grad_norm": 0.9335255021053942,
|
|
"learning_rate": 2.9179517360034526e-06,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28942394256591797,
|
|
"step": 3360,
|
|
"valid_targets_mean": 7831.2,
|
|
"valid_targets_min": 5375
|
|
},
|
|
{
|
|
"epoch": 5.913884007029877,
|
|
"grad_norm": 0.33951614009792314,
|
|
"learning_rate": 2.872525610626797e-06,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3013302981853485,
|
|
"step": 3365,
|
|
"valid_targets_mean": 8491.2,
|
|
"valid_targets_min": 7200
|
|
},
|
|
{
|
|
"epoch": 5.922671353251318,
|
|
"grad_norm": 0.31883626036243257,
|
|
"learning_rate": 2.827428485618102e-06,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28803277015686035,
|
|
"step": 3370,
|
|
"valid_targets_mean": 8212.4,
|
|
"valid_targets_min": 5811
|
|
},
|
|
{
|
|
"epoch": 5.931458699472759,
|
|
"grad_norm": 0.2878749967770179,
|
|
"learning_rate": 2.7826612272446584e-06,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861100137233734,
|
|
"step": 3375,
|
|
"valid_targets_mean": 8553.2,
|
|
"valid_targets_min": 6438
|
|
},
|
|
{
|
|
"epoch": 5.940246045694201,
|
|
"grad_norm": 0.2985617783397011,
|
|
"learning_rate": 2.738224695437357e-06,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832047641277313,
|
|
"step": 3380,
|
|
"valid_targets_mean": 8549.3,
|
|
"valid_targets_min": 7163
|
|
},
|
|
{
|
|
"epoch": 5.949033391915641,
|
|
"grad_norm": 0.32595062411682413,
|
|
"learning_rate": 2.6941197437742105e-06,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29271990060806274,
|
|
"step": 3385,
|
|
"valid_targets_mean": 7661.8,
|
|
"valid_targets_min": 6983
|
|
},
|
|
{
|
|
"epoch": 5.957820738137083,
|
|
"grad_norm": 0.31020871319992316,
|
|
"learning_rate": 2.650347219463907e-06,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28285855054855347,
|
|
"step": 3390,
|
|
"valid_targets_mean": 8599.2,
|
|
"valid_targets_min": 7033
|
|
},
|
|
{
|
|
"epoch": 5.966608084358524,
|
|
"grad_norm": 0.31645890810904653,
|
|
"learning_rate": 2.6069079633295858e-06,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930651903152466,
|
|
"step": 3395,
|
|
"valid_targets_mean": 7556.0,
|
|
"valid_targets_min": 6435
|
|
},
|
|
{
|
|
"epoch": 5.975395430579965,
|
|
"grad_norm": 0.3164728752307686,
|
|
"learning_rate": 2.563802809792646e-06,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29243242740631104,
|
|
"step": 3400,
|
|
"valid_targets_mean": 7732.3,
|
|
"valid_targets_min": 6723
|
|
},
|
|
{
|
|
"epoch": 5.984182776801406,
|
|
"grad_norm": 0.3400353139390024,
|
|
"learning_rate": 2.52103258685674e-06,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30362796783447266,
|
|
"step": 3405,
|
|
"valid_targets_mean": 7429.9,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 5.992970123022847,
|
|
"grad_norm": 0.3333200137211664,
|
|
"learning_rate": 2.4785981160918703e-06,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824479937553406,
|
|
"step": 3410,
|
|
"valid_targets_mean": 7657.1,
|
|
"valid_targets_min": 7075
|
|
},
|
|
{
|
|
"epoch": 6.001757469244288,
|
|
"grad_norm": 0.3095648198654591,
|
|
"learning_rate": 2.436500212618589e-06,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2807752192020416,
|
|
"step": 3415,
|
|
"valid_targets_mean": 7814.2,
|
|
"valid_targets_min": 6749
|
|
},
|
|
{
|
|
"epoch": 6.010544815465729,
|
|
"grad_norm": 0.3075173199504854,
|
|
"learning_rate": 2.3947396850923664e-06,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2928026616573334,
|
|
"step": 3420,
|
|
"valid_targets_mean": 7933.4,
|
|
"valid_targets_min": 7239
|
|
},
|
|
{
|
|
"epoch": 6.019332161687171,
|
|
"grad_norm": 0.299535909340376,
|
|
"learning_rate": 2.35331733568803e-06,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.292161762714386,
|
|
"step": 3425,
|
|
"valid_targets_mean": 8519.8,
|
|
"valid_targets_min": 6248
|
|
},
|
|
{
|
|
"epoch": 6.028119507908611,
|
|
"grad_norm": 0.32022750824787705,
|
|
"learning_rate": 2.312233960084389e-06,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2915361225605011,
|
|
"step": 3430,
|
|
"valid_targets_mean": 7603.8,
|
|
"valid_targets_min": 6706
|
|
},
|
|
{
|
|
"epoch": 6.036906854130053,
|
|
"grad_norm": 0.33443315293974885,
|
|
"learning_rate": 2.271490347448895e-06,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2938150465488434,
|
|
"step": 3435,
|
|
"valid_targets_mean": 7733.7,
|
|
"valid_targets_min": 7175
|
|
},
|
|
{
|
|
"epoch": 6.045694200351494,
|
|
"grad_norm": 0.3258133355986156,
|
|
"learning_rate": 2.231087280422557e-06,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28791695833206177,
|
|
"step": 3440,
|
|
"valid_targets_mean": 7775.0,
|
|
"valid_targets_min": 7127
|
|
},
|
|
{
|
|
"epoch": 6.054481546572935,
|
|
"grad_norm": 0.3422113528790867,
|
|
"learning_rate": 2.1910255351048426e-06,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2992529273033142,
|
|
"step": 3445,
|
|
"valid_targets_mean": 7713.1,
|
|
"valid_targets_min": 6958
|
|
},
|
|
{
|
|
"epoch": 6.063268892794376,
|
|
"grad_norm": 0.33857272608122346,
|
|
"learning_rate": 2.151305881038814e-06,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28023239970207214,
|
|
"step": 3450,
|
|
"valid_targets_mean": 7220.9,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 6.072056239015817,
|
|
"grad_norm": 0.3093639323935716,
|
|
"learning_rate": 2.1119290811963134e-06,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2893432378768921,
|
|
"step": 3455,
|
|
"valid_targets_mean": 7880.4,
|
|
"valid_targets_min": 6603
|
|
},
|
|
{
|
|
"epoch": 6.080843585237258,
|
|
"grad_norm": 0.30417854651758086,
|
|
"learning_rate": 2.0728958919633337e-06,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2948586642742157,
|
|
"step": 3460,
|
|
"valid_targets_mean": 7946.2,
|
|
"valid_targets_min": 7028
|
|
},
|
|
{
|
|
"epoch": 6.089630931458699,
|
|
"grad_norm": 0.3131048355787601,
|
|
"learning_rate": 2.034207063125473e-06,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28712961077690125,
|
|
"step": 3465,
|
|
"valid_targets_mean": 7673.6,
|
|
"valid_targets_min": 6837
|
|
},
|
|
{
|
|
"epoch": 6.098418277680141,
|
|
"grad_norm": 0.44135555559021356,
|
|
"learning_rate": 1.9958633378535277e-06,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27640682458877563,
|
|
"step": 3470,
|
|
"valid_targets_mean": 8458.7,
|
|
"valid_targets_min": 6296
|
|
},
|
|
{
|
|
"epoch": 6.1072056239015815,
|
|
"grad_norm": 0.3231106733854511,
|
|
"learning_rate": 1.9578654526892425e-06,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28280821442604065,
|
|
"step": 3475,
|
|
"valid_targets_mean": 7895.6,
|
|
"valid_targets_min": 7455
|
|
},
|
|
{
|
|
"epoch": 6.115992970123023,
|
|
"grad_norm": 0.308903440119535,
|
|
"learning_rate": 1.9202141375311335e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2916829288005829,
|
|
"step": 3480,
|
|
"valid_targets_mean": 7596.1,
|
|
"valid_targets_min": 6547
|
|
},
|
|
{
|
|
"epoch": 6.124780316344464,
|
|
"grad_norm": 0.32006985936416427,
|
|
"learning_rate": 1.8829101156204844e-06,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005526065826416,
|
|
"step": 3485,
|
|
"valid_targets_mean": 7917.9,
|
|
"valid_targets_min": 6744
|
|
},
|
|
{
|
|
"epoch": 6.1335676625659055,
|
|
"grad_norm": 0.3266735247756828,
|
|
"learning_rate": 1.8459541035274453e-06,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887221574783325,
|
|
"step": 3490,
|
|
"valid_targets_mean": 7609.4,
|
|
"valid_targets_min": 6882
|
|
},
|
|
{
|
|
"epoch": 6.142355008787346,
|
|
"grad_norm": 0.3030295623224466,
|
|
"learning_rate": 1.8093468111372802e-06,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28173595666885376,
|
|
"step": 3495,
|
|
"valid_targets_mean": 7799.6,
|
|
"valid_targets_min": 6645
|
|
},
|
|
{
|
|
"epoch": 6.151142355008787,
|
|
"grad_norm": 0.3211889651008717,
|
|
"learning_rate": 1.7730889416367115e-06,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866337299346924,
|
|
"step": 3500,
|
|
"valid_targets_mean": 7365.6,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 6.1599297012302285,
|
|
"grad_norm": 0.31769129512356303,
|
|
"learning_rate": 1.73718119150043e-06,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844308614730835,
|
|
"step": 3505,
|
|
"valid_targets_mean": 7739.8,
|
|
"valid_targets_min": 7015
|
|
},
|
|
{
|
|
"epoch": 6.168717047451669,
|
|
"grad_norm": 0.2887437522654917,
|
|
"learning_rate": 1.7016242504777048e-06,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782225012779236,
|
|
"step": 3510,
|
|
"valid_targets_mean": 8382.1,
|
|
"valid_targets_min": 6670
|
|
},
|
|
{
|
|
"epoch": 6.177504393673111,
|
|
"grad_norm": 0.3196770566443562,
|
|
"learning_rate": 1.6664188015791484e-06,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772035002708435,
|
|
"step": 3515,
|
|
"valid_targets_mean": 7498.5,
|
|
"valid_targets_min": 4799
|
|
},
|
|
{
|
|
"epoch": 6.186291739894552,
|
|
"grad_norm": 0.2886453087922719,
|
|
"learning_rate": 1.6315655210635784e-06,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28373849391937256,
|
|
"step": 3520,
|
|
"valid_targets_mean": 9234.0,
|
|
"valid_targets_min": 6924
|
|
},
|
|
{
|
|
"epoch": 6.195079086115993,
|
|
"grad_norm": 0.31452567443023266,
|
|
"learning_rate": 1.5970650784250442e-06,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3046089708805084,
|
|
"step": 3525,
|
|
"valid_targets_mean": 7534.6,
|
|
"valid_targets_min": 6593
|
|
},
|
|
{
|
|
"epoch": 6.203866432337434,
|
|
"grad_norm": 0.33171229659648493,
|
|
"learning_rate": 1.5629181363799517e-06,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.304110050201416,
|
|
"step": 3530,
|
|
"valid_targets_mean": 7414.1,
|
|
"valid_targets_min": 4643
|
|
},
|
|
{
|
|
"epoch": 6.212653778558876,
|
|
"grad_norm": 0.3241057334603984,
|
|
"learning_rate": 1.5291253508543458e-06,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967751622200012,
|
|
"step": 3535,
|
|
"valid_targets_mean": 7636.1,
|
|
"valid_targets_min": 6327
|
|
},
|
|
{
|
|
"epoch": 6.221441124780316,
|
|
"grad_norm": 0.29597922977914076,
|
|
"learning_rate": 1.4956873709713016e-06,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816871106624603,
|
|
"step": 3540,
|
|
"valid_targets_mean": 8233.2,
|
|
"valid_targets_min": 6758
|
|
},
|
|
{
|
|
"epoch": 6.230228471001757,
|
|
"grad_norm": 0.32662913224841694,
|
|
"learning_rate": 1.4626048390384573e-06,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29462772607803345,
|
|
"step": 3545,
|
|
"valid_targets_mean": 7599.8,
|
|
"valid_targets_min": 6835
|
|
},
|
|
{
|
|
"epoch": 6.239015817223199,
|
|
"grad_norm": 0.28643663955838083,
|
|
"learning_rate": 1.4298783905356906e-06,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688906192779541,
|
|
"step": 3550,
|
|
"valid_targets_mean": 8510.6,
|
|
"valid_targets_min": 7169
|
|
},
|
|
{
|
|
"epoch": 6.247803163444639,
|
|
"grad_norm": 0.31827582182584874,
|
|
"learning_rate": 1.3975086541028815e-06,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28534990549087524,
|
|
"step": 3555,
|
|
"valid_targets_mean": 7687.6,
|
|
"valid_targets_min": 6288
|
|
},
|
|
{
|
|
"epoch": 6.256590509666081,
|
|
"grad_norm": 0.2991456687508091,
|
|
"learning_rate": 1.3654962515278692e-06,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2955605089664459,
|
|
"step": 3560,
|
|
"valid_targets_mean": 8628.6,
|
|
"valid_targets_min": 7094
|
|
},
|
|
{
|
|
"epoch": 6.265377855887522,
|
|
"grad_norm": 0.3319859213632947,
|
|
"learning_rate": 1.3338417977344853e-06,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28525975346565247,
|
|
"step": 3565,
|
|
"valid_targets_mean": 7747.8,
|
|
"valid_targets_min": 6890
|
|
},
|
|
{
|
|
"epoch": 6.274165202108963,
|
|
"grad_norm": 0.32759436127422603,
|
|
"learning_rate": 1.302545900770762e-06,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28943735361099243,
|
|
"step": 3570,
|
|
"valid_targets_mean": 8275.8,
|
|
"valid_targets_min": 6800
|
|
},
|
|
{
|
|
"epoch": 6.282952548330404,
|
|
"grad_norm": 0.3306183716934296,
|
|
"learning_rate": 1.2716091617972159e-06,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28983038663864136,
|
|
"step": 3575,
|
|
"valid_targets_mean": 7270.4,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 6.291739894551846,
|
|
"grad_norm": 0.288156104658097,
|
|
"learning_rate": 1.2410321750753518e-06,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2841329574584961,
|
|
"step": 3580,
|
|
"valid_targets_mean": 8245.0,
|
|
"valid_targets_min": 7001
|
|
},
|
|
{
|
|
"epoch": 6.300527240773286,
|
|
"grad_norm": 0.3189483618108163,
|
|
"learning_rate": 1.2108155279562017e-06,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2912044823169708,
|
|
"step": 3585,
|
|
"valid_targets_mean": 8160.7,
|
|
"valid_targets_min": 6868
|
|
},
|
|
{
|
|
"epoch": 6.309314586994727,
|
|
"grad_norm": 0.3162820394259654,
|
|
"learning_rate": 1.180959800869077e-06,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28558826446533203,
|
|
"step": 3590,
|
|
"valid_targets_mean": 7097.2,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 6.318101933216169,
|
|
"grad_norm": 0.3227516251151931,
|
|
"learning_rate": 1.1514655673103857e-06,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29469990730285645,
|
|
"step": 3595,
|
|
"valid_targets_mean": 7176.0,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 6.3268892794376095,
|
|
"grad_norm": 0.30824799317875473,
|
|
"learning_rate": 1.1223333938326486e-06,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2922055125236511,
|
|
"step": 3600,
|
|
"valid_targets_mean": 8607.1,
|
|
"valid_targets_min": 7140
|
|
},
|
|
{
|
|
"epoch": 6.335676625659051,
|
|
"grad_norm": 0.30642048579731107,
|
|
"learning_rate": 1.0935638400335979e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27427494525909424,
|
|
"step": 3605,
|
|
"valid_targets_mean": 7716.3,
|
|
"valid_targets_min": 6915
|
|
},
|
|
{
|
|
"epoch": 6.344463971880492,
|
|
"grad_norm": 0.312070601993189,
|
|
"learning_rate": 1.0651574585454228e-06,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751944065093994,
|
|
"step": 3610,
|
|
"valid_targets_mean": 7600.4,
|
|
"valid_targets_min": 6989
|
|
},
|
|
{
|
|
"epoch": 6.353251318101933,
|
|
"grad_norm": 0.321136362911428,
|
|
"learning_rate": 1.0371147950241745e-06,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2760077714920044,
|
|
"step": 3615,
|
|
"valid_targets_mean": 7750.4,
|
|
"valid_targets_min": 6813
|
|
},
|
|
{
|
|
"epoch": 6.362038664323374,
|
|
"grad_norm": 0.31382693813094326,
|
|
"learning_rate": 1.0094363881392665e-06,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901709973812103,
|
|
"step": 3620,
|
|
"valid_targets_mean": 7908.2,
|
|
"valid_targets_min": 7082
|
|
},
|
|
{
|
|
"epoch": 6.370826010544816,
|
|
"grad_norm": 0.2981295280096082,
|
|
"learning_rate": 9.821227695631386e-07,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29155126214027405,
|
|
"step": 3625,
|
|
"valid_targets_mean": 8371.8,
|
|
"valid_targets_min": 6965
|
|
},
|
|
{
|
|
"epoch": 6.3796133567662565,
|
|
"grad_norm": 0.3109551271191042,
|
|
"learning_rate": 9.551744639610328e-07,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29289332032203674,
|
|
"step": 3630,
|
|
"valid_targets_mean": 7768.1,
|
|
"valid_targets_min": 6719
|
|
},
|
|
{
|
|
"epoch": 6.388400702987697,
|
|
"grad_norm": 0.31434253460561856,
|
|
"learning_rate": 9.285919889809314e-07,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28384581208229065,
|
|
"step": 3635,
|
|
"valid_targets_mean": 7665.1,
|
|
"valid_targets_min": 6037
|
|
},
|
|
{
|
|
"epoch": 6.397188049209139,
|
|
"grad_norm": 0.3092004304420906,
|
|
"learning_rate": 9.023758552435935e-07,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28531426191329956,
|
|
"step": 3640,
|
|
"valid_targets_mean": 7512.6,
|
|
"valid_targets_min": 6270
|
|
},
|
|
{
|
|
"epoch": 6.40597539543058,
|
|
"grad_norm": 0.29452660133273145,
|
|
"learning_rate": 8.765265663327605e-07,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935437560081482,
|
|
"step": 3645,
|
|
"valid_targets_mean": 8569.4,
|
|
"valid_targets_min": 7046
|
|
},
|
|
{
|
|
"epoch": 6.414762741652021,
|
|
"grad_norm": 0.3228870917587924,
|
|
"learning_rate": 8.510446187854793e-07,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30004414916038513,
|
|
"step": 3650,
|
|
"valid_targets_mean": 7375.8,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 6.423550087873462,
|
|
"grad_norm": 0.2939772904058792,
|
|
"learning_rate": 8.259305020825703e-07,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851170003414154,
|
|
"step": 3655,
|
|
"valid_targets_mean": 8229.3,
|
|
"valid_targets_min": 5952
|
|
},
|
|
{
|
|
"epoch": 6.4323374340949035,
|
|
"grad_norm": 0.31427818308611555,
|
|
"learning_rate": 8.011846986392058e-07,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28981322050094604,
|
|
"step": 3660,
|
|
"valid_targets_mean": 7931.1,
|
|
"valid_targets_min": 6975
|
|
},
|
|
{
|
|
"epoch": 6.441124780316344,
|
|
"grad_norm": 0.3041927647921443,
|
|
"learning_rate": 7.768076837956728e-07,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28967010974884033,
|
|
"step": 3665,
|
|
"valid_targets_mean": 8059.9,
|
|
"valid_targets_min": 6901
|
|
},
|
|
{
|
|
"epoch": 6.449912126537786,
|
|
"grad_norm": 0.309905165653539,
|
|
"learning_rate": 7.527999258082052e-07,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293418288230896,
|
|
"step": 3670,
|
|
"valid_targets_mean": 7485.3,
|
|
"valid_targets_min": 6300
|
|
},
|
|
{
|
|
"epoch": 6.458699472759227,
|
|
"grad_norm": 0.3188919596891954,
|
|
"learning_rate": 7.291618858400329e-07,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3023364841938019,
|
|
"step": 3675,
|
|
"valid_targets_mean": 8096.2,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 6.467486818980668,
|
|
"grad_norm": 0.31250436474473514,
|
|
"learning_rate": 7.058940179524842e-07,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28131306171417236,
|
|
"step": 3680,
|
|
"valid_targets_mean": 7808.1,
|
|
"valid_targets_min": 6935
|
|
},
|
|
{
|
|
"epoch": 6.476274165202109,
|
|
"grad_norm": 0.32620003785829454,
|
|
"learning_rate": 6.829967690962802e-07,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2991110682487488,
|
|
"step": 3685,
|
|
"valid_targets_mean": 7430.6,
|
|
"valid_targets_min": 3281
|
|
},
|
|
{
|
|
"epoch": 6.48506151142355,
|
|
"grad_norm": 0.32019753496373277,
|
|
"learning_rate": 6.604705791029586e-07,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2853609025478363,
|
|
"step": 3690,
|
|
"valid_targets_mean": 7720.6,
|
|
"valid_targets_min": 7246
|
|
},
|
|
{
|
|
"epoch": 6.493848857644991,
|
|
"grad_norm": 0.3328672541361672,
|
|
"learning_rate": 6.383158806764056e-07,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3027670383453369,
|
|
"step": 3695,
|
|
"valid_targets_mean": 7264.8,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 6.502636203866432,
|
|
"grad_norm": 0.31555792657873133,
|
|
"learning_rate": 6.165330993845598e-07,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2928489148616791,
|
|
"step": 3700,
|
|
"valid_targets_mean": 7662.8,
|
|
"valid_targets_min": 6781
|
|
},
|
|
{
|
|
"epoch": 6.511423550087874,
|
|
"grad_norm": 0.2997259977928076,
|
|
"learning_rate": 5.951226536512278e-07,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2773165702819824,
|
|
"step": 3705,
|
|
"valid_targets_mean": 7742.9,
|
|
"valid_targets_min": 6895
|
|
},
|
|
{
|
|
"epoch": 6.520210896309314,
|
|
"grad_norm": 0.32133966328124486,
|
|
"learning_rate": 5.740849547480576e-07,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2966843247413635,
|
|
"step": 3710,
|
|
"valid_targets_mean": 7586.9,
|
|
"valid_targets_min": 6442
|
|
},
|
|
{
|
|
"epoch": 6.528998242530756,
|
|
"grad_norm": 0.286972029950041,
|
|
"learning_rate": 5.534204067866178e-07,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870764434337616,
|
|
"step": 3715,
|
|
"valid_targets_mean": 8555.6,
|
|
"valid_targets_min": 6858
|
|
},
|
|
{
|
|
"epoch": 6.537785588752197,
|
|
"grad_norm": 0.3173654710056662,
|
|
"learning_rate": 5.331294067106618e-07,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28059589862823486,
|
|
"step": 3720,
|
|
"valid_targets_mean": 7745.3,
|
|
"valid_targets_min": 6357
|
|
},
|
|
{
|
|
"epoch": 6.546572934973638,
|
|
"grad_norm": 0.30413824614155777,
|
|
"learning_rate": 5.132123442884829e-07,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29954957962036133,
|
|
"step": 3725,
|
|
"valid_targets_mean": 9327.6,
|
|
"valid_targets_min": 6934
|
|
},
|
|
{
|
|
"epoch": 6.555360281195079,
|
|
"grad_norm": 0.3301537920025548,
|
|
"learning_rate": 4.936696021054377e-07,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29152911901474,
|
|
"step": 3730,
|
|
"valid_targets_mean": 7535.8,
|
|
"valid_targets_min": 6846
|
|
},
|
|
{
|
|
"epoch": 6.564147627416521,
|
|
"grad_norm": 0.32086744682083185,
|
|
"learning_rate": 4.7450155555658794e-07,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28596755862236023,
|
|
"step": 3735,
|
|
"valid_targets_mean": 7662.6,
|
|
"valid_targets_min": 6982
|
|
},
|
|
{
|
|
"epoch": 6.572934973637961,
|
|
"grad_norm": 0.3085282182860389,
|
|
"learning_rate": 4.557085728395039e-07,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2964562177658081,
|
|
"step": 3740,
|
|
"valid_targets_mean": 7674.9,
|
|
"valid_targets_min": 6779
|
|
},
|
|
{
|
|
"epoch": 6.581722319859402,
|
|
"grad_norm": 0.3048089993533843,
|
|
"learning_rate": 4.372910149471743e-07,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2835426926612854,
|
|
"step": 3745,
|
|
"valid_targets_mean": 8123.1,
|
|
"valid_targets_min": 6833
|
|
},
|
|
{
|
|
"epoch": 6.590509666080844,
|
|
"grad_norm": 0.30328995717816737,
|
|
"learning_rate": 4.1924923566108336e-07,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29824134707450867,
|
|
"step": 3750,
|
|
"valid_targets_mean": 7895.9,
|
|
"valid_targets_min": 6334
|
|
},
|
|
{
|
|
"epoch": 6.5992970123022845,
|
|
"grad_norm": 0.3237878832597593,
|
|
"learning_rate": 4.015835815444158e-07,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29479193687438965,
|
|
"step": 3755,
|
|
"valid_targets_mean": 7718.5,
|
|
"valid_targets_min": 6643
|
|
},
|
|
{
|
|
"epoch": 6.608084358523726,
|
|
"grad_norm": 0.3084873991845284,
|
|
"learning_rate": 3.842943919353914e-07,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28741276264190674,
|
|
"step": 3760,
|
|
"valid_targets_mean": 7682.5,
|
|
"valid_targets_min": 7047
|
|
},
|
|
{
|
|
"epoch": 6.616871704745167,
|
|
"grad_norm": 0.31846598698805506,
|
|
"learning_rate": 3.6738199894075454e-07,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965297996997833,
|
|
"step": 3765,
|
|
"valid_targets_mean": 7662.4,
|
|
"valid_targets_min": 6642
|
|
},
|
|
{
|
|
"epoch": 6.6256590509666085,
|
|
"grad_norm": 0.3161130165643001,
|
|
"learning_rate": 3.508467274293903e-07,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2934122383594513,
|
|
"step": 3770,
|
|
"valid_targets_mean": 7776.6,
|
|
"valid_targets_min": 6906
|
|
},
|
|
{
|
|
"epoch": 6.634446397188049,
|
|
"grad_norm": 0.318539965119587,
|
|
"learning_rate": 3.3468889502608957e-07,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2931661605834961,
|
|
"step": 3775,
|
|
"valid_targets_mean": 7592.9,
|
|
"valid_targets_min": 7114
|
|
},
|
|
{
|
|
"epoch": 6.643233743409491,
|
|
"grad_norm": 0.3299262714481787,
|
|
"learning_rate": 3.1890881210543625e-07,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29032421112060547,
|
|
"step": 3780,
|
|
"valid_targets_mean": 7454.5,
|
|
"valid_targets_min": 5869
|
|
},
|
|
{
|
|
"epoch": 6.6520210896309315,
|
|
"grad_norm": 0.33860570015403696,
|
|
"learning_rate": 3.0350678178585834e-07,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29499220848083496,
|
|
"step": 3785,
|
|
"valid_targets_mean": 7219.4,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 6.660808435852372,
|
|
"grad_norm": 0.3087571948862897,
|
|
"learning_rate": 2.884830999237953e-07,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27930399775505066,
|
|
"step": 3790,
|
|
"valid_targets_mean": 7931.0,
|
|
"valid_targets_min": 6713
|
|
},
|
|
{
|
|
"epoch": 6.669595782073814,
|
|
"grad_norm": 0.3040127214705321,
|
|
"learning_rate": 2.7383805510802884e-07,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2921474575996399,
|
|
"step": 3795,
|
|
"valid_targets_mean": 7779.0,
|
|
"valid_targets_min": 6741
|
|
},
|
|
{
|
|
"epoch": 6.678383128295255,
|
|
"grad_norm": 0.3383204812705453,
|
|
"learning_rate": 2.5957192865411653e-07,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986317574977875,
|
|
"step": 3800,
|
|
"valid_targets_mean": 7771.4,
|
|
"valid_targets_min": 6909
|
|
},
|
|
{
|
|
"epoch": 6.687170474516696,
|
|
"grad_norm": 0.3145720712599281,
|
|
"learning_rate": 2.4568499459901585e-07,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2892332673072815,
|
|
"step": 3805,
|
|
"valid_targets_mean": 7687.5,
|
|
"valid_targets_min": 6181
|
|
},
|
|
{
|
|
"epoch": 6.695957820738137,
|
|
"grad_norm": 0.3076130863525781,
|
|
"learning_rate": 2.3217751969579315e-07,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29126110672950745,
|
|
"step": 3810,
|
|
"valid_targets_mean": 7834.6,
|
|
"valid_targets_min": 6803
|
|
},
|
|
{
|
|
"epoch": 6.704745166959579,
|
|
"grad_norm": 0.29341564576186163,
|
|
"learning_rate": 2.1904976340852535e-07,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2846061587333679,
|
|
"step": 3815,
|
|
"valid_targets_mean": 9068.6,
|
|
"valid_targets_min": 7232
|
|
},
|
|
{
|
|
"epoch": 6.713532513181019,
|
|
"grad_norm": 0.32891744732904976,
|
|
"learning_rate": 2.0630197790728834e-07,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2955701947212219,
|
|
"step": 3820,
|
|
"valid_targets_mean": 7397.4,
|
|
"valid_targets_min": 3926
|
|
},
|
|
{
|
|
"epoch": 6.722319859402461,
|
|
"grad_norm": 0.30423508236207003,
|
|
"learning_rate": 1.9393440806334096e-07,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28711095452308655,
|
|
"step": 3825,
|
|
"valid_targets_mean": 7855.9,
|
|
"valid_targets_min": 7202
|
|
},
|
|
{
|
|
"epoch": 6.731107205623902,
|
|
"grad_norm": 0.2903631980463757,
|
|
"learning_rate": 1.819472914443998e-07,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27378320693969727,
|
|
"step": 3830,
|
|
"valid_targets_mean": 8380.3,
|
|
"valid_targets_min": 7309
|
|
},
|
|
{
|
|
"epoch": 6.739894551845342,
|
|
"grad_norm": 0.3046077789434804,
|
|
"learning_rate": 1.7034085831008962e-07,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29257017374038696,
|
|
"step": 3835,
|
|
"valid_targets_mean": 8174.4,
|
|
"valid_targets_min": 7042
|
|
},
|
|
{
|
|
"epoch": 6.748681898066784,
|
|
"grad_norm": 0.32891229467930705,
|
|
"learning_rate": 1.5911533160750668e-07,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956711947917938,
|
|
"step": 3840,
|
|
"valid_targets_mean": 7624.0,
|
|
"valid_targets_min": 6117
|
|
},
|
|
{
|
|
"epoch": 6.757469244288225,
|
|
"grad_norm": 0.32114057955257985,
|
|
"learning_rate": 1.4827092696695578e-07,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28788745403289795,
|
|
"step": 3845,
|
|
"valid_targets_mean": 7714.6,
|
|
"valid_targets_min": 7214
|
|
},
|
|
{
|
|
"epoch": 6.766256590509666,
|
|
"grad_norm": 0.309070622824398,
|
|
"learning_rate": 1.3780785269778662e-07,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2973918914794922,
|
|
"step": 3850,
|
|
"valid_targets_mean": 7914.4,
|
|
"valid_targets_min": 7277
|
|
},
|
|
{
|
|
"epoch": 6.775043936731107,
|
|
"grad_norm": 0.30444702757635617,
|
|
"learning_rate": 1.2772630978440836e-07,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28852379322052,
|
|
"step": 3855,
|
|
"valid_targets_mean": 8496.9,
|
|
"valid_targets_min": 6456
|
|
},
|
|
{
|
|
"epoch": 6.783831282952549,
|
|
"grad_norm": 0.31099725831636593,
|
|
"learning_rate": 1.1802649188241921e-07,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.301106333732605,
|
|
"step": 3860,
|
|
"valid_targets_mean": 8705.3,
|
|
"valid_targets_min": 7320
|
|
},
|
|
{
|
|
"epoch": 6.792618629173989,
|
|
"grad_norm": 0.3061079171087977,
|
|
"learning_rate": 1.087085853148917e-07,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869272530078888,
|
|
"step": 3865,
|
|
"valid_targets_mean": 7711.6,
|
|
"valid_targets_min": 6146
|
|
},
|
|
{
|
|
"epoch": 6.801405975395431,
|
|
"grad_norm": 0.31187087252494716,
|
|
"learning_rate": 9.97727690687933e-08,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869059443473816,
|
|
"step": 3870,
|
|
"valid_targets_mean": 7796.6,
|
|
"valid_targets_min": 6426
|
|
},
|
|
{
|
|
"epoch": 6.810193321616872,
|
|
"grad_norm": 0.2948395711008696,
|
|
"learning_rate": 9.121921479154694e-08,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28761497139930725,
|
|
"step": 3875,
|
|
"valid_targets_mean": 8341.2,
|
|
"valid_targets_min": 6642
|
|
},
|
|
{
|
|
"epoch": 6.8189806678383125,
|
|
"grad_norm": 0.3267583846230294,
|
|
"learning_rate": 8.30480867877359e-08,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30055350065231323,
|
|
"step": 3880,
|
|
"valid_targets_mean": 8443.2,
|
|
"valid_targets_min": 7058
|
|
},
|
|
{
|
|
"epoch": 6.827768014059754,
|
|
"grad_norm": 0.33397140582769613,
|
|
"learning_rate": 7.525954201593966e-08,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2964109778404236,
|
|
"step": 3885,
|
|
"valid_targets_mean": 7824.0,
|
|
"valid_targets_min": 7005
|
|
},
|
|
{
|
|
"epoch": 6.836555360281195,
|
|
"grad_norm": 0.28970904880913834,
|
|
"learning_rate": 6.785373008573181e-08,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27363112568855286,
|
|
"step": 3890,
|
|
"valid_targets_mean": 7667.0,
|
|
"valid_targets_min": 7054
|
|
},
|
|
{
|
|
"epoch": 6.845342706502636,
|
|
"grad_norm": 0.330650600657506,
|
|
"learning_rate": 6.083079325479357e-08,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953321933746338,
|
|
"step": 3895,
|
|
"valid_targets_mean": 7530.6,
|
|
"valid_targets_min": 6828
|
|
},
|
|
{
|
|
"epoch": 6.854130052724077,
|
|
"grad_norm": 0.31587378767103247,
|
|
"learning_rate": 5.4190866426195866e-08,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28235381841659546,
|
|
"step": 3900,
|
|
"valid_targets_mean": 7967.4,
|
|
"valid_targets_min": 7182
|
|
},
|
|
{
|
|
"epoch": 6.862917398945519,
|
|
"grad_norm": 0.33377421716490563,
|
|
"learning_rate": 4.793407714579035e-08,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27941250801086426,
|
|
"step": 3905,
|
|
"valid_targets_mean": 7071.4,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 6.8717047451669595,
|
|
"grad_norm": 0.31805688656222625,
|
|
"learning_rate": 4.2060545599773574e-08,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866838276386261,
|
|
"step": 3910,
|
|
"valid_targets_mean": 7444.9,
|
|
"valid_targets_min": 6530
|
|
},
|
|
{
|
|
"epoch": 6.880492091388401,
|
|
"grad_norm": 0.30469628346307415,
|
|
"learning_rate": 3.6570384612368834e-08,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2791711091995239,
|
|
"step": 3915,
|
|
"valid_targets_mean": 7580.8,
|
|
"valid_targets_min": 6620
|
|
},
|
|
{
|
|
"epoch": 6.889279437609842,
|
|
"grad_norm": 0.2982641769682887,
|
|
"learning_rate": 3.146369964366791e-08,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27555572986602783,
|
|
"step": 3920,
|
|
"valid_targets_mean": 8302.6,
|
|
"valid_targets_min": 7055
|
|
},
|
|
{
|
|
"epoch": 6.898066783831283,
|
|
"grad_norm": 0.27981864432775694,
|
|
"learning_rate": 2.674058878759933e-08,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29031240940093994,
|
|
"step": 3925,
|
|
"valid_targets_mean": 9336.2,
|
|
"valid_targets_min": 6065
|
|
},
|
|
{
|
|
"epoch": 6.906854130052724,
|
|
"grad_norm": 0.31699318487313444,
|
|
"learning_rate": 2.240114277004324e-08,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2884048521518707,
|
|
"step": 3930,
|
|
"valid_targets_mean": 7750.0,
|
|
"valid_targets_min": 6825
|
|
},
|
|
{
|
|
"epoch": 6.915641476274165,
|
|
"grad_norm": 0.3150763779616322,
|
|
"learning_rate": 1.844544494709277e-08,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2877803444862366,
|
|
"step": 3935,
|
|
"valid_targets_mean": 7871.9,
|
|
"valid_targets_min": 6823
|
|
},
|
|
{
|
|
"epoch": 6.9244288224956065,
|
|
"grad_norm": 0.3012630929462455,
|
|
"learning_rate": 1.4873571303448687e-08,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717800438404083,
|
|
"step": 3940,
|
|
"valid_targets_mean": 7653.2,
|
|
"valid_targets_min": 6664
|
|
},
|
|
{
|
|
"epoch": 6.933216168717047,
|
|
"grad_norm": 0.317214261259486,
|
|
"learning_rate": 1.1685590450962736e-08,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28537219762802124,
|
|
"step": 3945,
|
|
"valid_targets_mean": 7671.8,
|
|
"valid_targets_min": 6867
|
|
},
|
|
{
|
|
"epoch": 6.942003514938489,
|
|
"grad_norm": 0.29991630247313383,
|
|
"learning_rate": 8.881563627320955e-09,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27608373761177063,
|
|
"step": 3950,
|
|
"valid_targets_mean": 7615.9,
|
|
"valid_targets_min": 6974
|
|
},
|
|
{
|
|
"epoch": 6.95079086115993,
|
|
"grad_norm": 0.2959598145640836,
|
|
"learning_rate": 6.461544694864596e-09,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29918140172958374,
|
|
"step": 3955,
|
|
"valid_targets_mean": 8303.6,
|
|
"valid_targets_min": 6911
|
|
},
|
|
{
|
|
"epoch": 6.959578207381371,
|
|
"grad_norm": 0.30081974960794616,
|
|
"learning_rate": 4.4255801395554075e-09,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28856515884399414,
|
|
"step": 3960,
|
|
"valid_targets_mean": 9260.8,
|
|
"valid_targets_min": 7282
|
|
},
|
|
{
|
|
"epoch": 6.968365553602812,
|
|
"grad_norm": 0.32922681699591744,
|
|
"learning_rate": 2.773709070080788e-09,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29428255558013916,
|
|
"step": 3965,
|
|
"valid_targets_mean": 7888.9,
|
|
"valid_targets_min": 6834
|
|
},
|
|
{
|
|
"epoch": 6.977152899824253,
|
|
"grad_norm": 0.31492340742066066,
|
|
"learning_rate": 1.5059632171099402e-09,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29231247305870056,
|
|
"step": 3970,
|
|
"valid_targets_mean": 7748.9,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 6.985940246045694,
|
|
"grad_norm": 0.30231619440695345,
|
|
"learning_rate": 6.22366932676588e-10,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29516685009002686,
|
|
"step": 3975,
|
|
"valid_targets_mean": 8495.4,
|
|
"valid_targets_min": 6964
|
|
},
|
|
{
|
|
"epoch": 6.994727592267135,
|
|
"grad_norm": 0.2997116532045884,
|
|
"learning_rate": 1.229371897149001e-10,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2693384289741516,
|
|
"step": 3980,
|
|
"valid_targets_mean": 7885.3,
|
|
"valid_targets_min": 7269
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778266668319702,
|
|
"step": 3983,
|
|
"total_flos": 1690778105544704.0,
|
|
"train_loss": 0.31329786029258266,
|
|
"train_runtime": 27268.6753,
|
|
"train_samples_per_second": 2.334,
|
|
"train_steps_per_second": 0.146,
|
|
"valid_targets_mean": 7882.6,
|
|
"valid_targets_min": 6749
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 3983,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1690778105544704.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|