9607 lines
267 KiB
JSON
9607 lines
267 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 4347,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.008051529790660225,
|
||
|
|
"grad_norm": 18.887031329342683,
|
||
|
|
"learning_rate": 3.6781609195402303e-07,
|
||
|
|
"loss": 0.7617,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.7170521020889282,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 4721.3,
|
||
|
|
"valid_targets_min": 995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.01610305958132045,
|
||
|
|
"grad_norm": 17.610592482733104,
|
||
|
|
"learning_rate": 8.275862068965518e-07,
|
||
|
|
"loss": 0.7308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.7500532865524292,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 4560.7,
|
||
|
|
"valid_targets_min": 1282
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.024154589371980676,
|
||
|
|
"grad_norm": 17.389455158716025,
|
||
|
|
"learning_rate": 1.2873563218390806e-06,
|
||
|
|
"loss": 0.719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.7645566463470459,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 4224.9,
|
||
|
|
"valid_targets_min": 1424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0322061191626409,
|
||
|
|
"grad_norm": 12.900467657800844,
|
||
|
|
"learning_rate": 1.7471264367816093e-06,
|
||
|
|
"loss": 0.6498,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6394810676574707,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 3705.6,
|
||
|
|
"valid_targets_min": 613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.040257648953301126,
|
||
|
|
"grad_norm": 8.27577785552847,
|
||
|
|
"learning_rate": 2.206896551724138e-06,
|
||
|
|
"loss": 0.6021,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6195220351219177,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 4815.3,
|
||
|
|
"valid_targets_min": 1591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04830917874396135,
|
||
|
|
"grad_norm": 4.570129109709082,
|
||
|
|
"learning_rate": 2.666666666666667e-06,
|
||
|
|
"loss": 0.5337,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4826958477497101,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 4646.1,
|
||
|
|
"valid_targets_min": 1763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05636070853462158,
|
||
|
|
"grad_norm": 2.9815097303289773,
|
||
|
|
"learning_rate": 3.1264367816091956e-06,
|
||
|
|
"loss": 0.5145,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5005267858505249,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 4164.2,
|
||
|
|
"valid_targets_min": 1256
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0644122383252818,
|
||
|
|
"grad_norm": 1.6024853481776866,
|
||
|
|
"learning_rate": 3.5862068965517243e-06,
|
||
|
|
"loss": 0.4877,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.43937644362449646,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 4963.1,
|
||
|
|
"valid_targets_min": 1405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07246376811594203,
|
||
|
|
"grad_norm": 1.1550666348042289,
|
||
|
|
"learning_rate": 4.0459770114942535e-06,
|
||
|
|
"loss": 0.4836,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.45346975326538086,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 5371.4,
|
||
|
|
"valid_targets_min": 1739
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08051529790660225,
|
||
|
|
"grad_norm": 1.1056770926806292,
|
||
|
|
"learning_rate": 4.505747126436782e-06,
|
||
|
|
"loss": 0.4879,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4831688106060028,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 5321.5,
|
||
|
|
"valid_targets_min": 1966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08856682769726248,
|
||
|
|
"grad_norm": 0.9520259825035011,
|
||
|
|
"learning_rate": 4.965517241379311e-06,
|
||
|
|
"loss": 0.4173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.43055927753448486,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 4930.1,
|
||
|
|
"valid_targets_min": 1613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0966183574879227,
|
||
|
|
"grad_norm": 0.7379387322001421,
|
||
|
|
"learning_rate": 5.42528735632184e-06,
|
||
|
|
"loss": 0.4301,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.41147735714912415,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 4576.1,
|
||
|
|
"valid_targets_min": 693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10466988727858294,
|
||
|
|
"grad_norm": 0.6751399532440802,
|
||
|
|
"learning_rate": 5.8850574712643685e-06,
|
||
|
|
"loss": 0.3953,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.40098029375076294,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 4767.1,
|
||
|
|
"valid_targets_min": 1984
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11272141706924316,
|
||
|
|
"grad_norm": 0.6451168287397513,
|
||
|
|
"learning_rate": 6.344827586206898e-06,
|
||
|
|
"loss": 0.3955,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.41312819719314575,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 4322.2,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12077294685990338,
|
||
|
|
"grad_norm": 0.6097646247381094,
|
||
|
|
"learning_rate": 6.804597701149426e-06,
|
||
|
|
"loss": 0.4048,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3773424029350281,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 4922.2,
|
||
|
|
"valid_targets_min": 1736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1288244766505636,
|
||
|
|
"grad_norm": 0.6955263869864199,
|
||
|
|
"learning_rate": 7.264367816091955e-06,
|
||
|
|
"loss": 0.386,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3395404815673828,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 4326.2,
|
||
|
|
"valid_targets_min": 822
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13687600644122383,
|
||
|
|
"grad_norm": 0.6047921080138658,
|
||
|
|
"learning_rate": 7.724137931034483e-06,
|
||
|
|
"loss": 0.373,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33859458565711975,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 4358.8,
|
||
|
|
"valid_targets_min": 2465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14492753623188406,
|
||
|
|
"grad_norm": 0.5884903953581806,
|
||
|
|
"learning_rate": 8.183908045977013e-06,
|
||
|
|
"loss": 0.3308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.323479026556015,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 4712.4,
|
||
|
|
"valid_targets_min": 2373
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1529790660225443,
|
||
|
|
"grad_norm": 0.5738600516460006,
|
||
|
|
"learning_rate": 8.643678160919541e-06,
|
||
|
|
"loss": 0.3431,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33414769172668457,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 4288.1,
|
||
|
|
"valid_targets_min": 1850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1610305958132045,
|
||
|
|
"grad_norm": 0.6365501852080249,
|
||
|
|
"learning_rate": 9.10344827586207e-06,
|
||
|
|
"loss": 0.3526,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.41237330436706543,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 4589.5,
|
||
|
|
"valid_targets_min": 617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16908212560386474,
|
||
|
|
"grad_norm": 0.5810936825227695,
|
||
|
|
"learning_rate": 9.563218390804598e-06,
|
||
|
|
"loss": 0.3694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3179246783256531,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 4583.3,
|
||
|
|
"valid_targets_min": 892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17713365539452497,
|
||
|
|
"grad_norm": 0.6085982104059682,
|
||
|
|
"learning_rate": 1.0022988505747126e-05,
|
||
|
|
"loss": 0.3688,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3603907823562622,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 3972.6,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18518518518518517,
|
||
|
|
"grad_norm": 0.6219836430762767,
|
||
|
|
"learning_rate": 1.0482758620689658e-05,
|
||
|
|
"loss": 0.3434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36107924580574036,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 3626.8,
|
||
|
|
"valid_targets_min": 784
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1932367149758454,
|
||
|
|
"grad_norm": 0.5706613275227181,
|
||
|
|
"learning_rate": 1.0942528735632186e-05,
|
||
|
|
"loss": 0.3638,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35068589448928833,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 4590.1,
|
||
|
|
"valid_targets_min": 708
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20128824476650564,
|
||
|
|
"grad_norm": 0.5239098489712369,
|
||
|
|
"learning_rate": 1.1402298850574713e-05,
|
||
|
|
"loss": 0.3214,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31050339341163635,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 4365.3,
|
||
|
|
"valid_targets_min": 919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20933977455716588,
|
||
|
|
"grad_norm": 0.5408209063854124,
|
||
|
|
"learning_rate": 1.1862068965517241e-05,
|
||
|
|
"loss": 0.3312,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.343374639749527,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 4735.1,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21739130434782608,
|
||
|
|
"grad_norm": 0.5429590967253727,
|
||
|
|
"learning_rate": 1.2321839080459773e-05,
|
||
|
|
"loss": 0.3132,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2973455786705017,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 4331.6,
|
||
|
|
"valid_targets_min": 1580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22544283413848631,
|
||
|
|
"grad_norm": 0.5564263430607277,
|
||
|
|
"learning_rate": 1.2781609195402301e-05,
|
||
|
|
"loss": 0.3223,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33352577686309814,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 4605.9,
|
||
|
|
"valid_targets_min": 973
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23349436392914655,
|
||
|
|
"grad_norm": 0.59405705309966,
|
||
|
|
"learning_rate": 1.324137931034483e-05,
|
||
|
|
"loss": 0.3197,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31183719635009766,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 3893.1,
|
||
|
|
"valid_targets_min": 1253
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24154589371980675,
|
||
|
|
"grad_norm": 0.5158909121745847,
|
||
|
|
"learning_rate": 1.3701149425287356e-05,
|
||
|
|
"loss": 0.3199,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2867949604988098,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 4840.7,
|
||
|
|
"valid_targets_min": 1486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.249597423510467,
|
||
|
|
"grad_norm": 0.6568765520342051,
|
||
|
|
"learning_rate": 1.4160919540229888e-05,
|
||
|
|
"loss": 0.3106,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3462482690811157,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 3947.3,
|
||
|
|
"valid_targets_min": 1429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2576489533011272,
|
||
|
|
"grad_norm": 0.6071351268604575,
|
||
|
|
"learning_rate": 1.4620689655172416e-05,
|
||
|
|
"loss": 0.3274,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33247584104537964,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 4600.6,
|
||
|
|
"valid_targets_min": 1925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26570048309178745,
|
||
|
|
"grad_norm": 0.538560909580689,
|
||
|
|
"learning_rate": 1.5080459770114944e-05,
|
||
|
|
"loss": 0.3158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31049293279647827,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 5258.5,
|
||
|
|
"valid_targets_min": 1983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27375201288244766,
|
||
|
|
"grad_norm": 0.568539391929678,
|
||
|
|
"learning_rate": 1.5540229885057473e-05,
|
||
|
|
"loss": 0.3157,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30851513147354126,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 4449.2,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28180354267310787,
|
||
|
|
"grad_norm": 0.6078239781315768,
|
||
|
|
"learning_rate": 1.6000000000000003e-05,
|
||
|
|
"loss": 0.3106,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.302283376455307,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 4618.0,
|
||
|
|
"valid_targets_min": 2377
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2898550724637681,
|
||
|
|
"grad_norm": 0.5885746435904814,
|
||
|
|
"learning_rate": 1.645977011494253e-05,
|
||
|
|
"loss": 0.3296,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3404327630996704,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 4848.8,
|
||
|
|
"valid_targets_min": 1075
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29790660225442833,
|
||
|
|
"grad_norm": 0.6287600513879442,
|
||
|
|
"learning_rate": 1.691954022988506e-05,
|
||
|
|
"loss": 0.3086,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35874509811401367,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 4003.5,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3059581320450886,
|
||
|
|
"grad_norm": 0.5118887113449945,
|
||
|
|
"learning_rate": 1.7379310344827586e-05,
|
||
|
|
"loss": 0.3036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3192656636238098,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 5059.1,
|
||
|
|
"valid_targets_min": 2133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3140096618357488,
|
||
|
|
"grad_norm": 0.5676499502591437,
|
||
|
|
"learning_rate": 1.7839080459770116e-05,
|
||
|
|
"loss": 0.2746,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2645234167575836,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 4453.4,
|
||
|
|
"valid_targets_min": 1599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.322061191626409,
|
||
|
|
"grad_norm": 0.5756721398531897,
|
||
|
|
"learning_rate": 1.8298850574712646e-05,
|
||
|
|
"loss": 0.3204,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3237614631652832,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 4916.9,
|
||
|
|
"valid_targets_min": 2747
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33011272141706927,
|
||
|
|
"grad_norm": 0.661205961638061,
|
||
|
|
"learning_rate": 1.8758620689655173e-05,
|
||
|
|
"loss": 0.2988,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30946576595306396,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 4202.1,
|
||
|
|
"valid_targets_min": 1793
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33816425120772947,
|
||
|
|
"grad_norm": 0.5585042067771965,
|
||
|
|
"learning_rate": 1.9218390804597703e-05,
|
||
|
|
"loss": 0.3035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30198991298675537,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 4014.8,
|
||
|
|
"valid_targets_min": 1521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3462157809983897,
|
||
|
|
"grad_norm": 0.5364392722283728,
|
||
|
|
"learning_rate": 1.9678160919540233e-05,
|
||
|
|
"loss": 0.3032,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3075558841228485,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 4535.4,
|
||
|
|
"valid_targets_min": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35426731078904994,
|
||
|
|
"grad_norm": 0.5478504018061776,
|
||
|
|
"learning_rate": 2.013793103448276e-05,
|
||
|
|
"loss": 0.293,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30238276720046997,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 4278.9,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36231884057971014,
|
||
|
|
"grad_norm": 0.5543366789399233,
|
||
|
|
"learning_rate": 2.059770114942529e-05,
|
||
|
|
"loss": 0.3067,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31759655475616455,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 5445.6,
|
||
|
|
"valid_targets_min": 1006
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37037037037037035,
|
||
|
|
"grad_norm": 0.6286539962949731,
|
||
|
|
"learning_rate": 2.1057471264367816e-05,
|
||
|
|
"loss": 0.2964,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30352896451950073,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 3805.3,
|
||
|
|
"valid_targets_min": 676
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3784219001610306,
|
||
|
|
"grad_norm": 0.5644783614075699,
|
||
|
|
"learning_rate": 2.1517241379310346e-05,
|
||
|
|
"loss": 0.2967,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3186013102531433,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 4621.7,
|
||
|
|
"valid_targets_min": 1571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3864734299516908,
|
||
|
|
"grad_norm": 0.5931980260785429,
|
||
|
|
"learning_rate": 2.1977011494252873e-05,
|
||
|
|
"loss": 0.3034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28426745533943176,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 4771.1,
|
||
|
|
"valid_targets_min": 2048
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.394524959742351,
|
||
|
|
"grad_norm": 0.5236909740791719,
|
||
|
|
"learning_rate": 2.2436781609195406e-05,
|
||
|
|
"loss": 0.2915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2973197102546692,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 4744.9,
|
||
|
|
"valid_targets_min": 1209
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4025764895330113,
|
||
|
|
"grad_norm": 0.5306243392897267,
|
||
|
|
"learning_rate": 2.2896551724137933e-05,
|
||
|
|
"loss": 0.2952,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3088257312774658,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 5178.1,
|
||
|
|
"valid_targets_min": 1966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4106280193236715,
|
||
|
|
"grad_norm": 0.48677773497495813,
|
||
|
|
"learning_rate": 2.3356321839080463e-05,
|
||
|
|
"loss": 0.285,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26008594036102295,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 5040.2,
|
||
|
|
"valid_targets_min": 1177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41867954911433175,
|
||
|
|
"grad_norm": 1.2774083864267622,
|
||
|
|
"learning_rate": 2.381609195402299e-05,
|
||
|
|
"loss": 0.2967,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2775506377220154,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 4319.1,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42673107890499196,
|
||
|
|
"grad_norm": 0.6457594097564714,
|
||
|
|
"learning_rate": 2.427586206896552e-05,
|
||
|
|
"loss": 0.2886,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2859913408756256,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 3785.8,
|
||
|
|
"valid_targets_min": 284
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43478260869565216,
|
||
|
|
"grad_norm": 0.5721794133457904,
|
||
|
|
"learning_rate": 2.4735632183908046e-05,
|
||
|
|
"loss": 0.2675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2737387716770172,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 4904.9,
|
||
|
|
"valid_targets_min": 1197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4428341384863124,
|
||
|
|
"grad_norm": 0.6155727873490212,
|
||
|
|
"learning_rate": 2.5195402298850576e-05,
|
||
|
|
"loss": 0.2987,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.317902147769928,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 3661.2,
|
||
|
|
"valid_targets_min": 818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45088566827697263,
|
||
|
|
"grad_norm": 0.6349757508130273,
|
||
|
|
"learning_rate": 2.5655172413793103e-05,
|
||
|
|
"loss": 0.282,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2630305290222168,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 3326.7,
|
||
|
|
"valid_targets_min": 941
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45893719806763283,
|
||
|
|
"grad_norm": 0.6148296415866233,
|
||
|
|
"learning_rate": 2.6114942528735636e-05,
|
||
|
|
"loss": 0.2793,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27396440505981445,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 4434.6,
|
||
|
|
"valid_targets_min": 1038
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4669887278582931,
|
||
|
|
"grad_norm": 0.5645172489838126,
|
||
|
|
"learning_rate": 2.6574712643678166e-05,
|
||
|
|
"loss": 0.2686,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25225305557250977,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 4993.3,
|
||
|
|
"valid_targets_min": 1962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4750402576489533,
|
||
|
|
"grad_norm": 0.5739342126572219,
|
||
|
|
"learning_rate": 2.7034482758620693e-05,
|
||
|
|
"loss": 0.2957,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.340742826461792,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 4796.8,
|
||
|
|
"valid_targets_min": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4830917874396135,
|
||
|
|
"grad_norm": 0.5587075968581772,
|
||
|
|
"learning_rate": 2.749425287356322e-05,
|
||
|
|
"loss": 0.2832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27218180894851685,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 4788.8,
|
||
|
|
"valid_targets_min": 882
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49114331723027377,
|
||
|
|
"grad_norm": 0.7790422573070022,
|
||
|
|
"learning_rate": 2.795402298850575e-05,
|
||
|
|
"loss": 0.2762,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25361332297325134,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 3807.0,
|
||
|
|
"valid_targets_min": 344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.499194847020934,
|
||
|
|
"grad_norm": 0.5359108312034309,
|
||
|
|
"learning_rate": 2.8413793103448276e-05,
|
||
|
|
"loss": 0.2899,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24744702875614166,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 4844.2,
|
||
|
|
"valid_targets_min": 1948
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5072463768115942,
|
||
|
|
"grad_norm": 0.7694244503734501,
|
||
|
|
"learning_rate": 2.8873563218390806e-05,
|
||
|
|
"loss": 0.2574,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25962382555007935,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 3957.3,
|
||
|
|
"valid_targets_min": 751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5152979066022544,
|
||
|
|
"grad_norm": 0.6998112569580612,
|
||
|
|
"learning_rate": 2.9333333333333333e-05,
|
||
|
|
"loss": 0.2823,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.283919095993042,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 4178.9,
|
||
|
|
"valid_targets_min": 1504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5233494363929146,
|
||
|
|
"grad_norm": 0.5572107376686236,
|
||
|
|
"learning_rate": 2.9793103448275866e-05,
|
||
|
|
"loss": 0.299,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3307899236679077,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 4934.9,
|
||
|
|
"valid_targets_min": 860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5314009661835749,
|
||
|
|
"grad_norm": 0.5601650228344617,
|
||
|
|
"learning_rate": 3.0252873563218396e-05,
|
||
|
|
"loss": 0.2824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26864737272262573,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 3976.2,
|
||
|
|
"valid_targets_min": 1313
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5394524959742351,
|
||
|
|
"grad_norm": 0.6400241549564539,
|
||
|
|
"learning_rate": 3.071264367816092e-05,
|
||
|
|
"loss": 0.2817,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2869405746459961,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 4011.7,
|
||
|
|
"valid_targets_min": 952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5475040257648953,
|
||
|
|
"grad_norm": 0.5021644926973708,
|
||
|
|
"learning_rate": 3.117241379310345e-05,
|
||
|
|
"loss": 0.2568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2651926279067993,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 5310.1,
|
||
|
|
"valid_targets_min": 692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5555555555555556,
|
||
|
|
"grad_norm": 0.819724946601287,
|
||
|
|
"learning_rate": 3.1632183908045976e-05,
|
||
|
|
"loss": 0.2768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.297665536403656,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 3748.9,
|
||
|
|
"valid_targets_min": 249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5636070853462157,
|
||
|
|
"grad_norm": 0.49635945683598137,
|
||
|
|
"learning_rate": 3.2091954022988506e-05,
|
||
|
|
"loss": 0.2736,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27254384756088257,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 4190.4,
|
||
|
|
"valid_targets_min": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.571658615136876,
|
||
|
|
"grad_norm": 1.1643886506532481,
|
||
|
|
"learning_rate": 3.2551724137931036e-05,
|
||
|
|
"loss": 0.2698,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2615281343460083,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 3684.1,
|
||
|
|
"valid_targets_min": 304
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5797101449275363,
|
||
|
|
"grad_norm": 0.5045131044975655,
|
||
|
|
"learning_rate": 3.3011494252873566e-05,
|
||
|
|
"loss": 0.2772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28446757793426514,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 5213.9,
|
||
|
|
"valid_targets_min": 2518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5877616747181964,
|
||
|
|
"grad_norm": 0.5901966768401951,
|
||
|
|
"learning_rate": 3.3471264367816096e-05,
|
||
|
|
"loss": 0.2751,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2504952549934387,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 4182.6,
|
||
|
|
"valid_targets_min": 691
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5958132045088567,
|
||
|
|
"grad_norm": 0.620399649393727,
|
||
|
|
"learning_rate": 3.3931034482758626e-05,
|
||
|
|
"loss": 0.2721,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2381853461265564,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 4586.0,
|
||
|
|
"valid_targets_min": 1672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6038647342995169,
|
||
|
|
"grad_norm": 0.5417315383509183,
|
||
|
|
"learning_rate": 3.4390804597701156e-05,
|
||
|
|
"loss": 0.2635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2672516405582428,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 4311.4,
|
||
|
|
"valid_targets_min": 1082
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6119162640901772,
|
||
|
|
"grad_norm": 0.6433514192784039,
|
||
|
|
"learning_rate": 3.485057471264368e-05,
|
||
|
|
"loss": 0.284,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32015344500541687,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 3902.9,
|
||
|
|
"valid_targets_min": 1623
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6199677938808373,
|
||
|
|
"grad_norm": 0.4775998328542989,
|
||
|
|
"learning_rate": 3.531034482758621e-05,
|
||
|
|
"loss": 0.2664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24788491427898407,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 4189.4,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6280193236714976,
|
||
|
|
"grad_norm": 0.5122265709609899,
|
||
|
|
"learning_rate": 3.577011494252874e-05,
|
||
|
|
"loss": 0.2738,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27622339129447937,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 4446.6,
|
||
|
|
"valid_targets_min": 1670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6360708534621579,
|
||
|
|
"grad_norm": 0.659466553378636,
|
||
|
|
"learning_rate": 3.622988505747126e-05,
|
||
|
|
"loss": 0.2831,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3116128742694855,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 4667.6,
|
||
|
|
"valid_targets_min": 1070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.644122383252818,
|
||
|
|
"grad_norm": 0.5849425616180388,
|
||
|
|
"learning_rate": 3.668965517241379e-05,
|
||
|
|
"loss": 0.2548,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27755099534988403,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 4714.6,
|
||
|
|
"valid_targets_min": 995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6521739130434783,
|
||
|
|
"grad_norm": 0.5327108961779472,
|
||
|
|
"learning_rate": 3.714942528735633e-05,
|
||
|
|
"loss": 0.2745,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27385851740837097,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 5260.8,
|
||
|
|
"valid_targets_min": 2141
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6602254428341385,
|
||
|
|
"grad_norm": 0.5998643197455865,
|
||
|
|
"learning_rate": 3.760919540229885e-05,
|
||
|
|
"loss": 0.2872,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3069554567337036,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 4323.8,
|
||
|
|
"valid_targets_min": 831
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6682769726247987,
|
||
|
|
"grad_norm": 0.5690712799581026,
|
||
|
|
"learning_rate": 3.806896551724138e-05,
|
||
|
|
"loss": 0.2731,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29719623923301697,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 4270.9,
|
||
|
|
"valid_targets_min": 1089
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6763285024154589,
|
||
|
|
"grad_norm": 0.5754513117973928,
|
||
|
|
"learning_rate": 3.852873563218391e-05,
|
||
|
|
"loss": 0.2721,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29911819100379944,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 4512.4,
|
||
|
|
"valid_targets_min": 1988
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6843800322061192,
|
||
|
|
"grad_norm": 0.6061703211904361,
|
||
|
|
"learning_rate": 3.898850574712644e-05,
|
||
|
|
"loss": 0.2673,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23556432127952576,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 3921.6,
|
||
|
|
"valid_targets_min": 1217
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6924315619967794,
|
||
|
|
"grad_norm": 0.5391890622955845,
|
||
|
|
"learning_rate": 3.9448275862068966e-05,
|
||
|
|
"loss": 0.2459,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23832057416439056,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 4345.2,
|
||
|
|
"valid_targets_min": 2090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7004830917874396,
|
||
|
|
"grad_norm": 0.6101927641358206,
|
||
|
|
"learning_rate": 3.9908045977011496e-05,
|
||
|
|
"loss": 0.2721,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2649371325969696,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 4433.6,
|
||
|
|
"valid_targets_min": 1317
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7085346215780999,
|
||
|
|
"grad_norm": 0.5646390271799305,
|
||
|
|
"learning_rate": 3.9999896813789735e-05,
|
||
|
|
"loss": 0.2601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27699440717697144,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 4894.1,
|
||
|
|
"valid_targets_min": 731
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.71658615136876,
|
||
|
|
"grad_norm": 0.5718807548846909,
|
||
|
|
"learning_rate": 3.999947762163533e-05,
|
||
|
|
"loss": 0.2623,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29374945163726807,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 3907.8,
|
||
|
|
"valid_targets_min": 1022
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7246376811594203,
|
||
|
|
"grad_norm": 0.5614682322405541,
|
||
|
|
"learning_rate": 3.999873598115203e-05,
|
||
|
|
"loss": 0.251,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.257183313369751,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 4987.9,
|
||
|
|
"valid_targets_min": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7326892109500805,
|
||
|
|
"grad_norm": 0.5300074790618707,
|
||
|
|
"learning_rate": 3.999767190429718e-05,
|
||
|
|
"loss": 0.2748,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23682445287704468,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 5225.4,
|
||
|
|
"valid_targets_min": 2450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7407407407407407,
|
||
|
|
"grad_norm": 0.6542049865844981,
|
||
|
|
"learning_rate": 3.99962854082267e-05,
|
||
|
|
"loss": 0.2822,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29080742597579956,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 4597.2,
|
||
|
|
"valid_targets_min": 2222
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.748792270531401,
|
||
|
|
"grad_norm": 0.502946880278254,
|
||
|
|
"learning_rate": 3.9994576515294864e-05,
|
||
|
|
"loss": 0.2668,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28522011637687683,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 4706.1,
|
||
|
|
"valid_targets_min": 1773
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7568438003220612,
|
||
|
|
"grad_norm": 0.5050040823394027,
|
||
|
|
"learning_rate": 3.999254525305386e-05,
|
||
|
|
"loss": 0.2704,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2506832480430603,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 5194.9,
|
||
|
|
"valid_targets_min": 715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7648953301127214,
|
||
|
|
"grad_norm": 0.8322414020355918,
|
||
|
|
"learning_rate": 3.999019165425341e-05,
|
||
|
|
"loss": 0.2682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29580700397491455,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 3748.2,
|
||
|
|
"valid_targets_min": 1077
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7729468599033816,
|
||
|
|
"grad_norm": 0.5680386432970268,
|
||
|
|
"learning_rate": 3.99875157568402e-05,
|
||
|
|
"loss": 0.2655,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2671373188495636,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 4587.8,
|
||
|
|
"valid_targets_min": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7809983896940419,
|
||
|
|
"grad_norm": 0.5112203887556883,
|
||
|
|
"learning_rate": 3.998451760395729e-05,
|
||
|
|
"loss": 0.2623,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2868801951408386,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 4774.4,
|
||
|
|
"valid_targets_min": 1792
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.789049919484702,
|
||
|
|
"grad_norm": 0.5606005265672219,
|
||
|
|
"learning_rate": 3.99811972439434e-05,
|
||
|
|
"loss": 0.2643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2751631736755371,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 4579.5,
|
||
|
|
"valid_targets_min": 1396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7971014492753623,
|
||
|
|
"grad_norm": 0.5467905444044749,
|
||
|
|
"learning_rate": 3.997755473033218e-05,
|
||
|
|
"loss": 0.2521,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2562072277069092,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 3827.3,
|
||
|
|
"valid_targets_min": 1348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8051529790660226,
|
||
|
|
"grad_norm": 0.5387721194984708,
|
||
|
|
"learning_rate": 3.997359012185127e-05,
|
||
|
|
"loss": 0.2612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2520545721054077,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 4061.6,
|
||
|
|
"valid_targets_min": 547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8132045088566827,
|
||
|
|
"grad_norm": 0.5976518636156888,
|
||
|
|
"learning_rate": 3.996930348242141e-05,
|
||
|
|
"loss": 0.2388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23626355826854706,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 4540.9,
|
||
|
|
"valid_targets_min": 1484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.821256038647343,
|
||
|
|
"grad_norm": 0.6166919816090995,
|
||
|
|
"learning_rate": 3.996469488115539e-05,
|
||
|
|
"loss": 0.2583,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2615845203399658,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 4508.8,
|
||
|
|
"valid_targets_min": 1246
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8293075684380032,
|
||
|
|
"grad_norm": 0.530813397454018,
|
||
|
|
"learning_rate": 3.995976439235694e-05,
|
||
|
|
"loss": 0.2722,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2612318694591522,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 4225.1,
|
||
|
|
"valid_targets_min": 1044
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8373590982286635,
|
||
|
|
"grad_norm": 0.6517729260520128,
|
||
|
|
"learning_rate": 3.995451209551953e-05,
|
||
|
|
"loss": 0.2386,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23576730489730835,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 5023.0,
|
||
|
|
"valid_targets_min": 1701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8454106280193237,
|
||
|
|
"grad_norm": 0.4740752251218264,
|
||
|
|
"learning_rate": 3.994893807532509e-05,
|
||
|
|
"loss": 0.2922,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28204214572906494,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 4959.8,
|
||
|
|
"valid_targets_min": 1081
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8534621578099839,
|
||
|
|
"grad_norm": 0.5257899294040256,
|
||
|
|
"learning_rate": 3.994304242164265e-05,
|
||
|
|
"loss": 0.2593,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2655045986175537,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 4981.6,
|
||
|
|
"valid_targets_min": 2051
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8615136876006442,
|
||
|
|
"grad_norm": 0.536523092714892,
|
||
|
|
"learning_rate": 3.9936825229526855e-05,
|
||
|
|
"loss": 0.2861,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29214853048324585,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 4368.4,
|
||
|
|
"valid_targets_min": 2191
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8695652173913043,
|
||
|
|
"grad_norm": 0.5028299546325571,
|
||
|
|
"learning_rate": 3.9930286599216506e-05,
|
||
|
|
"loss": 0.2587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27183425426483154,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 5098.8,
|
||
|
|
"valid_targets_min": 2186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8776167471819646,
|
||
|
|
"grad_norm": 0.5635419055915679,
|
||
|
|
"learning_rate": 3.9923426636132866e-05,
|
||
|
|
"loss": 0.2442,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2873607277870178,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 4578.5,
|
||
|
|
"valid_targets_min": 1671
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8856682769726248,
|
||
|
|
"grad_norm": 0.5724413640787459,
|
||
|
|
"learning_rate": 3.991624545087801e-05,
|
||
|
|
"loss": 0.2765,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27895426750183105,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 3958.7,
|
||
|
|
"valid_targets_min": 1181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.893719806763285,
|
||
|
|
"grad_norm": 0.5024007626126078,
|
||
|
|
"learning_rate": 3.9908743159233016e-05,
|
||
|
|
"loss": 0.2569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20082515478134155,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 4550.9,
|
||
|
|
"valid_targets_min": 1029
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9017713365539453,
|
||
|
|
"grad_norm": 0.5485922659002295,
|
||
|
|
"learning_rate": 3.990091988215612e-05,
|
||
|
|
"loss": 0.2468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2621651887893677,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 4163.2,
|
||
|
|
"valid_targets_min": 1507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9098228663446055,
|
||
|
|
"grad_norm": 0.5184933123196136,
|
||
|
|
"learning_rate": 3.989277574578074e-05,
|
||
|
|
"loss": 0.2692,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20918220281600952,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 3785.8,
|
||
|
|
"valid_targets_min": 990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9178743961352657,
|
||
|
|
"grad_norm": 0.5186343425685319,
|
||
|
|
"learning_rate": 3.9884310881413473e-05,
|
||
|
|
"loss": 0.2637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2877514362335205,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 5365.6,
|
||
|
|
"valid_targets_min": 956
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9259259259259259,
|
||
|
|
"grad_norm": 0.5097833267576443,
|
||
|
|
"learning_rate": 3.987552542553194e-05,
|
||
|
|
"loss": 0.2501,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23448419570922852,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 4300.2,
|
||
|
|
"valid_targets_min": 1615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9339774557165862,
|
||
|
|
"grad_norm": 0.5183101791069704,
|
||
|
|
"learning_rate": 3.9866419519782636e-05,
|
||
|
|
"loss": 0.2565,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2744702696800232,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 4674.6,
|
||
|
|
"valid_targets_min": 267
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9420289855072463,
|
||
|
|
"grad_norm": 0.5621277031078485,
|
||
|
|
"learning_rate": 3.985699331097858e-05,
|
||
|
|
"loss": 0.2678,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2877187728881836,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 4813.2,
|
||
|
|
"valid_targets_min": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9500805152979066,
|
||
|
|
"grad_norm": 0.6940945000218365,
|
||
|
|
"learning_rate": 3.984724695109702e-05,
|
||
|
|
"loss": 0.2541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29620370268821716,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 5132.4,
|
||
|
|
"valid_targets_min": 1631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9581320450885669,
|
||
|
|
"grad_norm": 0.6065162564844915,
|
||
|
|
"learning_rate": 3.983718059727693e-05,
|
||
|
|
"loss": 0.2557,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27759110927581787,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 3982.5,
|
||
|
|
"valid_targets_min": 1549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.966183574879227,
|
||
|
|
"grad_norm": 0.5059275833472612,
|
||
|
|
"learning_rate": 3.9826794411816495e-05,
|
||
|
|
"loss": 0.2645,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27101898193359375,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 4788.5,
|
||
|
|
"valid_targets_min": 1713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9742351046698873,
|
||
|
|
"grad_norm": 0.4992312775195265,
|
||
|
|
"learning_rate": 3.981608856217049e-05,
|
||
|
|
"loss": 0.2626,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23076659440994263,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 4342.8,
|
||
|
|
"valid_targets_min": 1263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9822866344605475,
|
||
|
|
"grad_norm": 0.5353175524067539,
|
||
|
|
"learning_rate": 3.980506322094761e-05,
|
||
|
|
"loss": 0.2441,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2311723828315735,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 4810.8,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9903381642512077,
|
||
|
|
"grad_norm": 0.6330855933495575,
|
||
|
|
"learning_rate": 3.979371856590762e-05,
|
||
|
|
"loss": 0.2715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28571778535842896,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 4376.3,
|
||
|
|
"valid_targets_min": 2240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.998389694041868,
|
||
|
|
"grad_norm": 0.5397840170469355,
|
||
|
|
"learning_rate": 3.978205477995856e-05,
|
||
|
|
"loss": 0.2662,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24998024106025696,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 4034.1,
|
||
|
|
"valid_targets_min": 1638
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0064412238325282,
|
||
|
|
"grad_norm": 0.5507361899409122,
|
||
|
|
"learning_rate": 3.9770072051153754e-05,
|
||
|
|
"loss": 0.2544,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.252638041973114,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 4238.3,
|
||
|
|
"valid_targets_min": 1021
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0144927536231885,
|
||
|
|
"grad_norm": 0.5602531648929935,
|
||
|
|
"learning_rate": 3.9757770572688786e-05,
|
||
|
|
"loss": 0.2414,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23810608685016632,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 3836.5,
|
||
|
|
"valid_targets_min": 1569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0225442834138487,
|
||
|
|
"grad_norm": 0.5389828337838378,
|
||
|
|
"learning_rate": 3.9745150542898405e-05,
|
||
|
|
"loss": 0.2349,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20239052176475525,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 3506.9,
|
||
|
|
"valid_targets_min": 1175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0305958132045088,
|
||
|
|
"grad_norm": 0.5471627213957879,
|
||
|
|
"learning_rate": 3.97322121652533e-05,
|
||
|
|
"loss": 0.2466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23700180649757385,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 4167.8,
|
||
|
|
"valid_targets_min": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.038647342995169,
|
||
|
|
"grad_norm": 0.5304941534687796,
|
||
|
|
"learning_rate": 3.971895564835683e-05,
|
||
|
|
"loss": 0.2265,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21442700922489166,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 4103.1,
|
||
|
|
"valid_targets_min": 1034
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0466988727858293,
|
||
|
|
"grad_norm": 0.5123253191465812,
|
||
|
|
"learning_rate": 3.970538120594166e-05,
|
||
|
|
"loss": 0.2191,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22118300199508667,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 4709.2,
|
||
|
|
"valid_targets_min": 1225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0547504025764896,
|
||
|
|
"grad_norm": 0.5144164225120834,
|
||
|
|
"learning_rate": 3.9691489056866324e-05,
|
||
|
|
"loss": 0.2423,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2365712821483612,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 4630.8,
|
||
|
|
"valid_targets_min": 862
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0628019323671498,
|
||
|
|
"grad_norm": 0.6461651582671288,
|
||
|
|
"learning_rate": 3.9677279425111684e-05,
|
||
|
|
"loss": 0.2439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24799947440624237,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 4166.1,
|
||
|
|
"valid_targets_min": 1309
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.07085346215781,
|
||
|
|
"grad_norm": 0.5024189972964114,
|
||
|
|
"learning_rate": 3.9662752539777314e-05,
|
||
|
|
"loss": 0.2445,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24382641911506653,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 5176.7,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0789049919484701,
|
||
|
|
"grad_norm": 0.9391803347175582,
|
||
|
|
"learning_rate": 3.9647908635077845e-05,
|
||
|
|
"loss": 0.2615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.277189165353775,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 4315.8,
|
||
|
|
"valid_targets_min": 703
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0869565217391304,
|
||
|
|
"grad_norm": 0.4882832004268316,
|
||
|
|
"learning_rate": 3.963274795033913e-05,
|
||
|
|
"loss": 0.245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24163475632667542,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 5134.3,
|
||
|
|
"valid_targets_min": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0950080515297906,
|
||
|
|
"grad_norm": 0.6167872996926629,
|
||
|
|
"learning_rate": 3.9617270729994436e-05,
|
||
|
|
"loss": 0.2482,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24507923424243927,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 4063.7,
|
||
|
|
"valid_targets_min": 1058
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.103059581320451,
|
||
|
|
"grad_norm": 0.6232571737295083,
|
||
|
|
"learning_rate": 3.960147722358046e-05,
|
||
|
|
"loss": 0.2504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23517994582653046,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 3350.7,
|
||
|
|
"valid_targets_min": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1111111111111112,
|
||
|
|
"grad_norm": 0.5205417579091651,
|
||
|
|
"learning_rate": 3.958536768573335e-05,
|
||
|
|
"loss": 0.245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25678330659866333,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 4774.0,
|
||
|
|
"valid_targets_min": 1681
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1191626409017714,
|
||
|
|
"grad_norm": 0.5540696760333587,
|
||
|
|
"learning_rate": 3.956894237618456e-05,
|
||
|
|
"loss": 0.2455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2578514814376831,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 3721.6,
|
||
|
|
"valid_targets_min": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1272141706924317,
|
||
|
|
"grad_norm": 0.5703134217215095,
|
||
|
|
"learning_rate": 3.955220155975669e-05,
|
||
|
|
"loss": 0.2465,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27087417244911194,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 4647.0,
|
||
|
|
"valid_targets_min": 1837
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1352657004830917,
|
||
|
|
"grad_norm": 0.5281006763450222,
|
||
|
|
"learning_rate": 3.9535145506359206e-05,
|
||
|
|
"loss": 0.2431,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2202616184949875,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 4643.4,
|
||
|
|
"valid_targets_min": 548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.143317230273752,
|
||
|
|
"grad_norm": 0.4639603789841704,
|
||
|
|
"learning_rate": 3.951777449098408e-05,
|
||
|
|
"loss": 0.2377,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2633700966835022,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 5459.8,
|
||
|
|
"valid_targets_min": 2264
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1513687600644122,
|
||
|
|
"grad_norm": 0.469137374700247,
|
||
|
|
"learning_rate": 3.9500088793701387e-05,
|
||
|
|
"loss": 0.2415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19833050668239594,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 4964.5,
|
||
|
|
"valid_targets_min": 2160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1594202898550725,
|
||
|
|
"grad_norm": 0.4905202669992282,
|
||
|
|
"learning_rate": 3.948208869965473e-05,
|
||
|
|
"loss": 0.2388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24042034149169922,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 4569.5,
|
||
|
|
"valid_targets_min": 776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1674718196457328,
|
||
|
|
"grad_norm": 0.5271378070562325,
|
||
|
|
"learning_rate": 3.946377449905672e-05,
|
||
|
|
"loss": 0.2332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24370059370994568,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 4409.8,
|
||
|
|
"valid_targets_min": 1705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1755233494363928,
|
||
|
|
"grad_norm": 0.48816490928722517,
|
||
|
|
"learning_rate": 3.9445146487184226e-05,
|
||
|
|
"loss": 0.2268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23395568132400513,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 4742.6,
|
||
|
|
"valid_targets_min": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.183574879227053,
|
||
|
|
"grad_norm": 0.508703086089454,
|
||
|
|
"learning_rate": 3.942620496437366e-05,
|
||
|
|
"loss": 0.2398,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2787438929080963,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 5090.3,
|
||
|
|
"valid_targets_min": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1916264090177133,
|
||
|
|
"grad_norm": 0.5274435230388868,
|
||
|
|
"learning_rate": 3.940695023601612e-05,
|
||
|
|
"loss": 0.2434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23083099722862244,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 4942.7,
|
||
|
|
"valid_targets_min": 1747
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1996779388083736,
|
||
|
|
"grad_norm": 0.48187263788077117,
|
||
|
|
"learning_rate": 3.938738261255247e-05,
|
||
|
|
"loss": 0.2322,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21453389525413513,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 4564.6,
|
||
|
|
"valid_targets_min": 1401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2077294685990339,
|
||
|
|
"grad_norm": 0.7495259641509139,
|
||
|
|
"learning_rate": 3.9367502409468315e-05,
|
||
|
|
"loss": 0.2489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21770915389060974,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 4150.0,
|
||
|
|
"valid_targets_min": 848
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2157809983896941,
|
||
|
|
"grad_norm": 0.513119058591006,
|
||
|
|
"learning_rate": 3.934730994728893e-05,
|
||
|
|
"loss": 0.2449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2321898341178894,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 3742.6,
|
||
|
|
"valid_targets_min": 1247
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2238325281803544,
|
||
|
|
"grad_norm": 0.5088681345844467,
|
||
|
|
"learning_rate": 3.932680555157413e-05,
|
||
|
|
"loss": 0.2362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2543366551399231,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 4571.6,
|
||
|
|
"valid_targets_min": 745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2318840579710144,
|
||
|
|
"grad_norm": 0.5212074947311489,
|
||
|
|
"learning_rate": 3.9305989552912936e-05,
|
||
|
|
"loss": 0.2311,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24161793291568756,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 4267.8,
|
||
|
|
"valid_targets_min": 2059
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2399355877616747,
|
||
|
|
"grad_norm": 0.6379814909420424,
|
||
|
|
"learning_rate": 3.928486228691831e-05,
|
||
|
|
"loss": 0.2377,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25767624378204346,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 4049.4,
|
||
|
|
"valid_targets_min": 1596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.247987117552335,
|
||
|
|
"grad_norm": 0.5355001588932113,
|
||
|
|
"learning_rate": 3.926342409422175e-05,
|
||
|
|
"loss": 0.2504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23288603127002716,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 3844.3,
|
||
|
|
"valid_targets_min": 823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2560386473429952,
|
||
|
|
"grad_norm": 0.5212100454034752,
|
||
|
|
"learning_rate": 3.924167532046773e-05,
|
||
|
|
"loss": 0.2554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24769359827041626,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 4338.0,
|
||
|
|
"valid_targets_min": 1507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2640901771336555,
|
||
|
|
"grad_norm": 0.5991969877035959,
|
||
|
|
"learning_rate": 3.9219616316308215e-05,
|
||
|
|
"loss": 0.2391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23935574293136597,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 4815.1,
|
||
|
|
"valid_targets_min": 1755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2721417069243155,
|
||
|
|
"grad_norm": 0.5852623692737184,
|
||
|
|
"learning_rate": 3.919724743739694e-05,
|
||
|
|
"loss": 0.2462,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2316393107175827,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 4748.1,
|
||
|
|
"valid_targets_min": 1010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2801932367149758,
|
||
|
|
"grad_norm": 0.5400353515222852,
|
||
|
|
"learning_rate": 3.91745690443837e-05,
|
||
|
|
"loss": 0.2372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24360281229019165,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 4905.1,
|
||
|
|
"valid_targets_min": 1697
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.288244766505636,
|
||
|
|
"grad_norm": 0.5161454582163929,
|
||
|
|
"learning_rate": 3.915158150290855e-05,
|
||
|
|
"loss": 0.2297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2185833752155304,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 4708.6,
|
||
|
|
"valid_targets_min": 1210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2962962962962963,
|
||
|
|
"grad_norm": 0.5421912889681342,
|
||
|
|
"learning_rate": 3.912828518359588e-05,
|
||
|
|
"loss": 0.2361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24869637191295624,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 5677.7,
|
||
|
|
"valid_targets_min": 2666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3043478260869565,
|
||
|
|
"grad_norm": 0.5587294675034205,
|
||
|
|
"learning_rate": 3.910468046204846e-05,
|
||
|
|
"loss": 0.2255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.240371972322464,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 3585.4,
|
||
|
|
"valid_targets_min": 677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3123993558776168,
|
||
|
|
"grad_norm": 0.5803843717029648,
|
||
|
|
"learning_rate": 3.908076771884139e-05,
|
||
|
|
"loss": 0.2386,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23448866605758667,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 4076.1,
|
||
|
|
"valid_targets_min": 1353
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.320450885668277,
|
||
|
|
"grad_norm": 0.6615696992092541,
|
||
|
|
"learning_rate": 3.905654733951595e-05,
|
||
|
|
"loss": 0.2366,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24138154089450836,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 3298.6,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3285024154589373,
|
||
|
|
"grad_norm": 0.48424332287717764,
|
||
|
|
"learning_rate": 3.9032019714573366e-05,
|
||
|
|
"loss": 0.2274,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2244749665260315,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 4370.7,
|
||
|
|
"valid_targets_min": 1299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3365539452495974,
|
||
|
|
"grad_norm": 0.5116233310783836,
|
||
|
|
"learning_rate": 3.9007185239468554e-05,
|
||
|
|
"loss": 0.2523,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2363509237766266,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 4251.2,
|
||
|
|
"valid_targets_min": 1057
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3446054750402576,
|
||
|
|
"grad_norm": 0.5726251093172268,
|
||
|
|
"learning_rate": 3.8982044314603725e-05,
|
||
|
|
"loss": 0.2382,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25756919384002686,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 4103.4,
|
||
|
|
"valid_targets_min": 818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3526570048309179,
|
||
|
|
"grad_norm": 0.5399551753973628,
|
||
|
|
"learning_rate": 3.8956597345321927e-05,
|
||
|
|
"loss": 0.2403,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23214933276176453,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 3571.1,
|
||
|
|
"valid_targets_min": 997
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3607085346215781,
|
||
|
|
"grad_norm": 0.5153551957359142,
|
||
|
|
"learning_rate": 3.893084474190051e-05,
|
||
|
|
"loss": 0.2456,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24375241994857788,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 4158.2,
|
||
|
|
"valid_targets_min": 952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3687600644122382,
|
||
|
|
"grad_norm": 0.48984930240931984,
|
||
|
|
"learning_rate": 3.890478691954452e-05,
|
||
|
|
"loss": 0.2416,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2558751702308655,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 4738.9,
|
||
|
|
"valid_targets_min": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3768115942028984,
|
||
|
|
"grad_norm": 0.5179307198966028,
|
||
|
|
"learning_rate": 3.8878424298379996e-05,
|
||
|
|
"loss": 0.2445,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22707146406173706,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 4074.3,
|
||
|
|
"valid_targets_min": 1101
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3848631239935587,
|
||
|
|
"grad_norm": 0.6293401724856043,
|
||
|
|
"learning_rate": 3.885175730344718e-05,
|
||
|
|
"loss": 0.2487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24598611891269684,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 4331.4,
|
||
|
|
"valid_targets_min": 1429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.392914653784219,
|
||
|
|
"grad_norm": 0.5429776165767362,
|
||
|
|
"learning_rate": 3.882478636469372e-05,
|
||
|
|
"loss": 0.2381,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25051477551460266,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 4375.6,
|
||
|
|
"valid_targets_min": 1616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4009661835748792,
|
||
|
|
"grad_norm": 0.5668228228210831,
|
||
|
|
"learning_rate": 3.879751191696766e-05,
|
||
|
|
"loss": 0.2554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2403651624917984,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 3847.9,
|
||
|
|
"valid_targets_min": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4090177133655395,
|
||
|
|
"grad_norm": 0.4916125568229993,
|
||
|
|
"learning_rate": 3.8769934400010506e-05,
|
||
|
|
"loss": 0.2376,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23794618248939514,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 4820.1,
|
||
|
|
"valid_targets_min": 284
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4170692431561998,
|
||
|
|
"grad_norm": 0.48401044866312015,
|
||
|
|
"learning_rate": 3.8742054258450085e-05,
|
||
|
|
"loss": 0.2435,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25705933570861816,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 4531.7,
|
||
|
|
"valid_targets_min": 1442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.42512077294686,
|
||
|
|
"grad_norm": 0.5305875988771065,
|
||
|
|
"learning_rate": 3.871387194179338e-05,
|
||
|
|
"loss": 0.2428,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20840579271316528,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 4164.4,
|
||
|
|
"valid_targets_min": 2047
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.43317230273752,
|
||
|
|
"grad_norm": 0.5592288973797526,
|
||
|
|
"learning_rate": 3.868538790441931e-05,
|
||
|
|
"loss": 0.2344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24872620403766632,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 4424.3,
|
||
|
|
"valid_targets_min": 1035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4412238325281803,
|
||
|
|
"grad_norm": 0.46555566538841864,
|
||
|
|
"learning_rate": 3.865660260557138e-05,
|
||
|
|
"loss": 0.2324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21978382766246796,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 4317.1,
|
||
|
|
"valid_targets_min": 2278
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4492753623188406,
|
||
|
|
"grad_norm": 0.5185263723338586,
|
||
|
|
"learning_rate": 3.8627516509350286e-05,
|
||
|
|
"loss": 0.2379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2476550042629242,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 3795.6,
|
||
|
|
"valid_targets_min": 1701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4573268921095008,
|
||
|
|
"grad_norm": 0.5514262995566678,
|
||
|
|
"learning_rate": 3.859813008470644e-05,
|
||
|
|
"loss": 0.2488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24102932214736938,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 3926.9,
|
||
|
|
"valid_targets_min": 1209
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.465378421900161,
|
||
|
|
"grad_norm": 0.4713138062680591,
|
||
|
|
"learning_rate": 3.856844380543239e-05,
|
||
|
|
"loss": 0.2316,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23857206106185913,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 4829.1,
|
||
|
|
"valid_targets_min": 1822
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4734299516908211,
|
||
|
|
"grad_norm": 0.46745151947518565,
|
||
|
|
"learning_rate": 3.8538458150155186e-05,
|
||
|
|
"loss": 0.2337,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21306023001670837,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 5670.3,
|
||
|
|
"valid_targets_min": 1223
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4814814814814814,
|
||
|
|
"grad_norm": 0.5399378076969245,
|
||
|
|
"learning_rate": 3.850817360232869e-05,
|
||
|
|
"loss": 0.2228,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23806512355804443,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 5083.9,
|
||
|
|
"valid_targets_min": 2183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4895330112721417,
|
||
|
|
"grad_norm": 0.6208023209932018,
|
||
|
|
"learning_rate": 3.8477590650225735e-05,
|
||
|
|
"loss": 0.245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26910722255706787,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 4777.4,
|
||
|
|
"valid_targets_min": 1044
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.497584541062802,
|
||
|
|
"grad_norm": 0.5213298121096795,
|
||
|
|
"learning_rate": 3.8446709786930305e-05,
|
||
|
|
"loss": 0.2341,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24538570642471313,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 4161.3,
|
||
|
|
"valid_targets_min": 1399
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5056360708534622,
|
||
|
|
"grad_norm": 0.5236314811464815,
|
||
|
|
"learning_rate": 3.841553151032953e-05,
|
||
|
|
"loss": 0.2439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23516438901424408,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 5095.7,
|
||
|
|
"valid_targets_min": 1027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5136876006441224,
|
||
|
|
"grad_norm": 0.44460197327326756,
|
||
|
|
"learning_rate": 3.8384056323105695e-05,
|
||
|
|
"loss": 0.2441,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23193703591823578,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 5266.9,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5217391304347827,
|
||
|
|
"grad_norm": 0.6021732933768308,
|
||
|
|
"learning_rate": 3.835228473272814e-05,
|
||
|
|
"loss": 0.2202,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2238101363182068,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 4458.2,
|
||
|
|
"valid_targets_min": 751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.529790660225443,
|
||
|
|
"grad_norm": 0.5548844712586349,
|
||
|
|
"learning_rate": 3.832021725144506e-05,
|
||
|
|
"loss": 0.2345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2418593019247055,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 4469.2,
|
||
|
|
"valid_targets_min": 1717
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.537842190016103,
|
||
|
|
"grad_norm": 0.46334027352601115,
|
||
|
|
"learning_rate": 3.828785439627523e-05,
|
||
|
|
"loss": 0.2517,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24094292521476746,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 5478.1,
|
||
|
|
"valid_targets_min": 1745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5458937198067633,
|
||
|
|
"grad_norm": 0.5074800565232367,
|
||
|
|
"learning_rate": 3.825519668899972e-05,
|
||
|
|
"loss": 0.2418,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25097841024398804,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 5015.4,
|
||
|
|
"valid_targets_min": 1369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5539452495974235,
|
||
|
|
"grad_norm": 0.5212941157766611,
|
||
|
|
"learning_rate": 3.8222244656153444e-05,
|
||
|
|
"loss": 0.2467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2439125031232834,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 4481.0,
|
||
|
|
"valid_targets_min": 973
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5619967793880838,
|
||
|
|
"grad_norm": 0.5111240156665687,
|
||
|
|
"learning_rate": 3.818899882901666e-05,
|
||
|
|
"loss": 0.2359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22901548445224762,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 4092.8,
|
||
|
|
"valid_targets_min": 1591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5700483091787438,
|
||
|
|
"grad_norm": 0.512770693439504,
|
||
|
|
"learning_rate": 3.815545974360644e-05,
|
||
|
|
"loss": 0.2231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23196808993816376,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 4075.3,
|
||
|
|
"valid_targets_min": 1956
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.578099838969404,
|
||
|
|
"grad_norm": 0.5042161332697872,
|
||
|
|
"learning_rate": 3.812162794066802e-05,
|
||
|
|
"loss": 0.236,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25058242678642273,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 4614.4,
|
||
|
|
"valid_targets_min": 339
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5861513687600644,
|
||
|
|
"grad_norm": 0.4846950795943882,
|
||
|
|
"learning_rate": 3.8087503965666057e-05,
|
||
|
|
"loss": 0.2359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2251492440700531,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 4310.9,
|
||
|
|
"valid_targets_min": 1978
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5942028985507246,
|
||
|
|
"grad_norm": 0.49445825909902524,
|
||
|
|
"learning_rate": 3.805308836877586e-05,
|
||
|
|
"loss": 0.2419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2309640347957611,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 4083.0,
|
||
|
|
"valid_targets_min": 688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6022544283413849,
|
||
|
|
"grad_norm": 0.522939331982219,
|
||
|
|
"learning_rate": 3.80183817048745e-05,
|
||
|
|
"loss": 0.2301,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23157933354377747,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 3599.1,
|
||
|
|
"valid_targets_min": 358
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6103059581320451,
|
||
|
|
"grad_norm": 0.5144792659864994,
|
||
|
|
"learning_rate": 3.7983384533531894e-05,
|
||
|
|
"loss": 0.2367,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2594456374645233,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 4572.1,
|
||
|
|
"valid_targets_min": 1116
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6183574879227054,
|
||
|
|
"grad_norm": 0.514940069429718,
|
||
|
|
"learning_rate": 3.7948097419001736e-05,
|
||
|
|
"loss": 0.2325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23250789940357208,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 4625.0,
|
||
|
|
"valid_targets_min": 350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6264090177133657,
|
||
|
|
"grad_norm": 0.52221869816674,
|
||
|
|
"learning_rate": 3.7912520930212445e-05,
|
||
|
|
"loss": 0.2434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21590971946716309,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 4035.4,
|
||
|
|
"valid_targets_min": 1483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.634460547504026,
|
||
|
|
"grad_norm": 0.42946813411877144,
|
||
|
|
"learning_rate": 3.7876655640757974e-05,
|
||
|
|
"loss": 0.2439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23158404231071472,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 5336.1,
|
||
|
|
"valid_targets_min": 979
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.642512077294686,
|
||
|
|
"grad_norm": 0.4983259472306404,
|
||
|
|
"learning_rate": 3.784050212888857e-05,
|
||
|
|
"loss": 0.2476,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2527810335159302,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 5022.8,
|
||
|
|
"valid_targets_min": 2939
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6505636070853462,
|
||
|
|
"grad_norm": 0.46712880691077496,
|
||
|
|
"learning_rate": 3.780406097750141e-05,
|
||
|
|
"loss": 0.2465,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22650480270385742,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 4311.5,
|
||
|
|
"valid_targets_min": 1275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6586151368760065,
|
||
|
|
"grad_norm": 0.5417140597671484,
|
||
|
|
"learning_rate": 3.776733277413127e-05,
|
||
|
|
"loss": 0.2176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.195145845413208,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 4207.8,
|
||
|
|
"valid_targets_min": 1043
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6666666666666665,
|
||
|
|
"grad_norm": 0.4883088038596097,
|
||
|
|
"learning_rate": 3.7730318110941004e-05,
|
||
|
|
"loss": 0.224,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21643765270709991,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 4391.3,
|
||
|
|
"valid_targets_min": 2660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6747181964573268,
|
||
|
|
"grad_norm": 0.5289568127223118,
|
||
|
|
"learning_rate": 3.7693017584712013e-05,
|
||
|
|
"loss": 0.2456,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23360106348991394,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 4826.2,
|
||
|
|
"valid_targets_min": 2011
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.682769726247987,
|
||
|
|
"grad_norm": 0.5576792989418248,
|
||
|
|
"learning_rate": 3.765543179683462e-05,
|
||
|
|
"loss": 0.2296,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24486708641052246,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 3934.4,
|
||
|
|
"valid_targets_min": 1683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6908212560386473,
|
||
|
|
"grad_norm": 0.5017617827412163,
|
||
|
|
"learning_rate": 3.7617561353298395e-05,
|
||
|
|
"loss": 0.2521,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24896810948848724,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 4011.8,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6988727858293076,
|
||
|
|
"grad_norm": 0.4860709875293544,
|
||
|
|
"learning_rate": 3.7579406864682327e-05,
|
||
|
|
"loss": 0.2264,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24139925837516785,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 4299.9,
|
||
|
|
"valid_targets_min": 667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7069243156199678,
|
||
|
|
"grad_norm": 0.5345894675911452,
|
||
|
|
"learning_rate": 3.7540968946145036e-05,
|
||
|
|
"loss": 0.241,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2598978877067566,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 4928.6,
|
||
|
|
"valid_targets_min": 1283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.714975845410628,
|
||
|
|
"grad_norm": 0.5465814570656329,
|
||
|
|
"learning_rate": 3.750224821741486e-05,
|
||
|
|
"loss": 0.2358,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2219647765159607,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 5155.9,
|
||
|
|
"valid_targets_min": 1303
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7230273752012883,
|
||
|
|
"grad_norm": 0.5300553470904004,
|
||
|
|
"learning_rate": 3.7463245302779795e-05,
|
||
|
|
"loss": 0.2298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28267112374305725,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 3852.3,
|
||
|
|
"valid_targets_min": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7310789049919486,
|
||
|
|
"grad_norm": 0.4624218505703108,
|
||
|
|
"learning_rate": 3.742396083107751e-05,
|
||
|
|
"loss": 0.2128,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21661648154258728,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 4283.7,
|
||
|
|
"valid_targets_min": 1147
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7391304347826086,
|
||
|
|
"grad_norm": 0.4993979746767997,
|
||
|
|
"learning_rate": 3.7384395435685166e-05,
|
||
|
|
"loss": 0.2346,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21909837424755096,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 4359.1,
|
||
|
|
"valid_targets_min": 2250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.747181964573269,
|
||
|
|
"grad_norm": 0.5407067040296573,
|
||
|
|
"learning_rate": 3.7344549754509196e-05,
|
||
|
|
"loss": 0.2353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22699233889579773,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 3677.0,
|
||
|
|
"valid_targets_min": 892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7552334943639292,
|
||
|
|
"grad_norm": 0.4642737834973646,
|
||
|
|
"learning_rate": 3.7304424429975046e-05,
|
||
|
|
"loss": 0.2318,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22292137145996094,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 5690.0,
|
||
|
|
"valid_targets_min": 782
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7632850241545892,
|
||
|
|
"grad_norm": 0.5671519558623886,
|
||
|
|
"learning_rate": 3.726402010901681e-05,
|
||
|
|
"loss": 0.2353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21334418654441833,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 4616.9,
|
||
|
|
"valid_targets_min": 1898
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7713365539452495,
|
||
|
|
"grad_norm": 0.5083634785423456,
|
||
|
|
"learning_rate": 3.722333744306678e-05,
|
||
|
|
"loss": 0.2249,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21738044917583466,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 5448.9,
|
||
|
|
"valid_targets_min": 1726
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7793880837359097,
|
||
|
|
"grad_norm": 0.8896163020318464,
|
||
|
|
"learning_rate": 3.7182377088044984e-05,
|
||
|
|
"loss": 0.2286,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22921855747699738,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 4703.8,
|
||
|
|
"valid_targets_min": 1508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.78743961352657,
|
||
|
|
"grad_norm": 0.5450639925225488,
|
||
|
|
"learning_rate": 3.7141139704348576e-05,
|
||
|
|
"loss": 0.228,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21833300590515137,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 3445.8,
|
||
|
|
"valid_targets_min": 1099
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7954911433172303,
|
||
|
|
"grad_norm": 0.6520638172384545,
|
||
|
|
"learning_rate": 3.7099625956841175e-05,
|
||
|
|
"loss": 0.223,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25272321701049805,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 3248.3,
|
||
|
|
"valid_targets_min": 617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8035426731078905,
|
||
|
|
"grad_norm": 0.4493969588039246,
|
||
|
|
"learning_rate": 3.70578365148422e-05,
|
||
|
|
"loss": 0.2367,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23505555093288422,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 5024.8,
|
||
|
|
"valid_targets_min": 1833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8115942028985508,
|
||
|
|
"grad_norm": 0.4827090106067237,
|
||
|
|
"learning_rate": 3.701577205211604e-05,
|
||
|
|
"loss": 0.2545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2535856366157532,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 4428.5,
|
||
|
|
"valid_targets_min": 990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.819645732689211,
|
||
|
|
"grad_norm": 0.6263613356926185,
|
||
|
|
"learning_rate": 3.697343324686119e-05,
|
||
|
|
"loss": 0.2272,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23104625940322876,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 4673.7,
|
||
|
|
"valid_targets_min": 1449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8276972624798713,
|
||
|
|
"grad_norm": 0.5298323694554691,
|
||
|
|
"learning_rate": 3.693082078169933e-05,
|
||
|
|
"loss": 0.2427,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24033012986183167,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 4732.1,
|
||
|
|
"valid_targets_min": 1605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8357487922705316,
|
||
|
|
"grad_norm": 0.4783853550563579,
|
||
|
|
"learning_rate": 3.68879353436643e-05,
|
||
|
|
"loss": 0.2189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2028733789920807,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 4867.9,
|
||
|
|
"valid_targets_min": 2154
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8438003220611916,
|
||
|
|
"grad_norm": 0.4595078299986765,
|
||
|
|
"learning_rate": 3.684477762419108e-05,
|
||
|
|
"loss": 0.2194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.215766042470932,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 4522.2,
|
||
|
|
"valid_targets_min": 2208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8518518518518519,
|
||
|
|
"grad_norm": 0.5149239335560051,
|
||
|
|
"learning_rate": 3.6801348319104546e-05,
|
||
|
|
"loss": 0.2319,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2230435311794281,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 3680.6,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8599033816425121,
|
||
|
|
"grad_norm": 0.42919344493593076,
|
||
|
|
"learning_rate": 3.675764812860833e-05,
|
||
|
|
"loss": 0.2518,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20287317037582397,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 5642.9,
|
||
|
|
"valid_targets_min": 2036
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8679549114331722,
|
||
|
|
"grad_norm": 0.4884349396198345,
|
||
|
|
"learning_rate": 3.671367775727353e-05,
|
||
|
|
"loss": 0.2463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24478939175605774,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 4762.1,
|
||
|
|
"valid_targets_min": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8760064412238324,
|
||
|
|
"grad_norm": 0.49953971630838795,
|
||
|
|
"learning_rate": 3.666943791402726e-05,
|
||
|
|
"loss": 0.2527,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2292686104774475,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 4543.8,
|
||
|
|
"valid_targets_min": 1026
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8840579710144927,
|
||
|
|
"grad_norm": 0.4644474561197982,
|
||
|
|
"learning_rate": 3.662492931214137e-05,
|
||
|
|
"loss": 0.2492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2597373127937317,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 5243.1,
|
||
|
|
"valid_targets_min": 1246
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.892109500805153,
|
||
|
|
"grad_norm": 0.5180686308684069,
|
||
|
|
"learning_rate": 3.6580152669220784e-05,
|
||
|
|
"loss": 0.2365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2606375813484192,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 4800.7,
|
||
|
|
"valid_targets_min": 2456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9001610305958132,
|
||
|
|
"grad_norm": 0.42092491518373576,
|
||
|
|
"learning_rate": 3.6535108707192053e-05,
|
||
|
|
"loss": 0.2175,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22271546721458435,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 5251.3,
|
||
|
|
"valid_targets_min": 1616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9082125603864735,
|
||
|
|
"grad_norm": 0.5826394657065203,
|
||
|
|
"learning_rate": 3.648979815229167e-05,
|
||
|
|
"loss": 0.2372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20627357065677643,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 3834.6,
|
||
|
|
"valid_targets_min": 693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9162640901771337,
|
||
|
|
"grad_norm": 0.7082935353213954,
|
||
|
|
"learning_rate": 3.644422173505433e-05,
|
||
|
|
"loss": 0.2517,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24815203249454498,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 4642.2,
|
||
|
|
"valid_targets_min": 338
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.924315619967794,
|
||
|
|
"grad_norm": 0.5088420525828131,
|
||
|
|
"learning_rate": 3.639838019030123e-05,
|
||
|
|
"loss": 0.2303,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23133976757526398,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 4064.8,
|
||
|
|
"valid_targets_min": 1849
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9323671497584543,
|
||
|
|
"grad_norm": 0.4807696052913685,
|
||
|
|
"learning_rate": 3.635227425712812e-05,
|
||
|
|
"loss": 0.2441,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24485018849372864,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 5358.3,
|
||
|
|
"valid_targets_min": 1956
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9404186795491143,
|
||
|
|
"grad_norm": 0.5374569252336697,
|
||
|
|
"learning_rate": 3.6305904678893504e-05,
|
||
|
|
"loss": 0.2417,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23684567213058472,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 4465.9,
|
||
|
|
"valid_targets_min": 2036
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9484702093397746,
|
||
|
|
"grad_norm": 0.4315082896253737,
|
||
|
|
"learning_rate": 3.6259272203206535e-05,
|
||
|
|
"loss": 0.2397,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20846745371818542,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 5167.9,
|
||
|
|
"valid_targets_min": 1434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9565217391304348,
|
||
|
|
"grad_norm": 0.5262891588589963,
|
||
|
|
"learning_rate": 3.621237758191505e-05,
|
||
|
|
"loss": 0.2385,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24596351385116577,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 3761.8,
|
||
|
|
"valid_targets_min": 1066
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9645732689210949,
|
||
|
|
"grad_norm": 0.49628184032222405,
|
||
|
|
"learning_rate": 3.616522157109342e-05,
|
||
|
|
"loss": 0.2198,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21257467567920685,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 4065.5,
|
||
|
|
"valid_targets_min": 1897
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9726247987117551,
|
||
|
|
"grad_norm": 0.4787291955056688,
|
||
|
|
"learning_rate": 3.6117804931030324e-05,
|
||
|
|
"loss": 0.2457,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2495032548904419,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 4767.3,
|
||
|
|
"valid_targets_min": 930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9806763285024154,
|
||
|
|
"grad_norm": 0.4952065016883059,
|
||
|
|
"learning_rate": 3.607012842621657e-05,
|
||
|
|
"loss": 0.2273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23980122804641724,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 4069.4,
|
||
|
|
"valid_targets_min": 1260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9887278582930756,
|
||
|
|
"grad_norm": 0.5230866628171786,
|
||
|
|
"learning_rate": 3.602219282533269e-05,
|
||
|
|
"loss": 0.2255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2368500828742981,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 4682.7,
|
||
|
|
"valid_targets_min": 2343
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.996779388083736,
|
||
|
|
"grad_norm": 0.5246193826991147,
|
||
|
|
"learning_rate": 3.597399890123659e-05,
|
||
|
|
"loss": 0.2329,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21333156526088715,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 4692.2,
|
||
|
|
"valid_targets_min": 2047
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.004830917874396,
|
||
|
|
"grad_norm": 0.505863313320365,
|
||
|
|
"learning_rate": 3.5925547430951094e-05,
|
||
|
|
"loss": 0.2099,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20909734070301056,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 4637.2,
|
||
|
|
"valid_targets_min": 989
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0128824476650564,
|
||
|
|
"grad_norm": 0.43133643927265986,
|
||
|
|
"learning_rate": 3.587683919565136e-05,
|
||
|
|
"loss": 0.2156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19329163432121277,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 5283.4,
|
||
|
|
"valid_targets_min": 1835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0209339774557167,
|
||
|
|
"grad_norm": 0.4837745442669591,
|
||
|
|
"learning_rate": 3.582787498065237e-05,
|
||
|
|
"loss": 0.2031,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20409853756427765,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 4767.1,
|
||
|
|
"valid_targets_min": 1074
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.028985507246377,
|
||
|
|
"grad_norm": 0.6452848255637806,
|
||
|
|
"learning_rate": 3.577865557539621e-05,
|
||
|
|
"loss": 0.1953,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1907821148633957,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 4763.1,
|
||
|
|
"valid_targets_min": 392
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.037037037037037,
|
||
|
|
"grad_norm": 0.5867010578619666,
|
||
|
|
"learning_rate": 3.572918177343935e-05,
|
||
|
|
"loss": 0.2132,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1922091245651245,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 4118.4,
|
||
|
|
"valid_targets_min": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0450885668276975,
|
||
|
|
"grad_norm": 0.48474397713094575,
|
||
|
|
"learning_rate": 3.567945437243987e-05,
|
||
|
|
"loss": 0.222,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19150424003601074,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 4600.4,
|
||
|
|
"valid_targets_min": 1066
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0531400966183573,
|
||
|
|
"grad_norm": 0.5113649508317663,
|
||
|
|
"learning_rate": 3.5629474174144564e-05,
|
||
|
|
"loss": 0.2242,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.253940224647522,
|
||
|
|
"step": 1275,
|
||
|
|
"valid_targets_mean": 5131.0,
|
||
|
|
"valid_targets_min": 2438
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0611916264090175,
|
||
|
|
"grad_norm": 0.491663340986429,
|
||
|
|
"learning_rate": 3.5579241984376065e-05,
|
||
|
|
"loss": 0.217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22499999403953552,
|
||
|
|
"step": 1280,
|
||
|
|
"valid_targets_mean": 3951.9,
|
||
|
|
"valid_targets_min": 1010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.069243156199678,
|
||
|
|
"grad_norm": 0.520462248826658,
|
||
|
|
"learning_rate": 3.5528758613019804e-05,
|
||
|
|
"loss": 0.2142,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19735172390937805,
|
||
|
|
"step": 1285,
|
||
|
|
"valid_targets_mean": 3831.2,
|
||
|
|
"valid_targets_min": 1088
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.077294685990338,
|
||
|
|
"grad_norm": 0.5206723700369398,
|
||
|
|
"learning_rate": 3.547802487401097e-05,
|
||
|
|
"loss": 0.2294,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23212730884552002,
|
||
|
|
"step": 1290,
|
||
|
|
"valid_targets_mean": 5061.6,
|
||
|
|
"valid_targets_min": 2180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0853462157809983,
|
||
|
|
"grad_norm": 0.577226384156968,
|
||
|
|
"learning_rate": 3.54270415853214e-05,
|
||
|
|
"loss": 0.22,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20443934202194214,
|
||
|
|
"step": 1295,
|
||
|
|
"valid_targets_mean": 3395.8,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0933977455716586,
|
||
|
|
"grad_norm": 0.4423746810496331,
|
||
|
|
"learning_rate": 3.537580956894638e-05,
|
||
|
|
"loss": 0.225,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21375350654125214,
|
||
|
|
"step": 1300,
|
||
|
|
"valid_targets_mean": 5160.8,
|
||
|
|
"valid_targets_min": 1507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.101449275362319,
|
||
|
|
"grad_norm": 0.5548629803187833,
|
||
|
|
"learning_rate": 3.532432965089138e-05,
|
||
|
|
"loss": 0.2134,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22949251532554626,
|
||
|
|
"step": 1305,
|
||
|
|
"valid_targets_mean": 5229.6,
|
||
|
|
"valid_targets_min": 2557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.109500805152979,
|
||
|
|
"grad_norm": 0.4906711255865245,
|
||
|
|
"learning_rate": 3.527260266115876e-05,
|
||
|
|
"loss": 0.2215,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2087794542312622,
|
||
|
|
"step": 1310,
|
||
|
|
"valid_targets_mean": 5409.4,
|
||
|
|
"valid_targets_min": 2349
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1175523349436394,
|
||
|
|
"grad_norm": 0.6560805934109221,
|
||
|
|
"learning_rate": 3.522062943373438e-05,
|
||
|
|
"loss": 0.195,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17596739530563354,
|
||
|
|
"step": 1315,
|
||
|
|
"valid_targets_mean": 4226.9,
|
||
|
|
"valid_targets_min": 1025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1256038647342996,
|
||
|
|
"grad_norm": 0.7107665752653891,
|
||
|
|
"learning_rate": 3.516841080657413e-05,
|
||
|
|
"loss": 0.22,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23091405630111694,
|
||
|
|
"step": 1320,
|
||
|
|
"valid_targets_mean": 4053.8,
|
||
|
|
"valid_targets_min": 689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.13365539452496,
|
||
|
|
"grad_norm": 0.5185713231757747,
|
||
|
|
"learning_rate": 3.511594762159046e-05,
|
||
|
|
"loss": 0.2269,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20578667521476746,
|
||
|
|
"step": 1325,
|
||
|
|
"valid_targets_mean": 4018.0,
|
||
|
|
"valid_targets_min": 978
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.14170692431562,
|
||
|
|
"grad_norm": 0.463159581350649,
|
||
|
|
"learning_rate": 3.506324072463878e-05,
|
||
|
|
"loss": 0.2049,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1929136961698532,
|
||
|
|
"step": 1330,
|
||
|
|
"valid_targets_mean": 4945.8,
|
||
|
|
"valid_targets_min": 2099
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.14975845410628,
|
||
|
|
"grad_norm": 0.5447005808188629,
|
||
|
|
"learning_rate": 3.5010290965503826e-05,
|
||
|
|
"loss": 0.2194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23045824468135834,
|
||
|
|
"step": 1335,
|
||
|
|
"valid_targets_mean": 4698.5,
|
||
|
|
"valid_targets_min": 1716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1578099838969402,
|
||
|
|
"grad_norm": 0.4595107321034477,
|
||
|
|
"learning_rate": 3.495709919788597e-05,
|
||
|
|
"loss": 0.2076,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18875578045845032,
|
||
|
|
"step": 1340,
|
||
|
|
"valid_targets_mean": 4457.4,
|
||
|
|
"valid_targets_min": 755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1658615136876005,
|
||
|
|
"grad_norm": 0.6481051628858272,
|
||
|
|
"learning_rate": 3.490366627938742e-05,
|
||
|
|
"loss": 0.216,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22552573680877686,
|
||
|
|
"step": 1345,
|
||
|
|
"valid_targets_mean": 4907.9,
|
||
|
|
"valid_targets_min": 1381
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1739130434782608,
|
||
|
|
"grad_norm": 0.8255339190022734,
|
||
|
|
"learning_rate": 3.484999307149846e-05,
|
||
|
|
"loss": 0.2202,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20174431800842285,
|
||
|
|
"step": 1350,
|
||
|
|
"valid_targets_mean": 3897.0,
|
||
|
|
"valid_targets_min": 979
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.181964573268921,
|
||
|
|
"grad_norm": 0.47550594396325907,
|
||
|
|
"learning_rate": 3.47960804395835e-05,
|
||
|
|
"loss": 0.2158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21997377276420593,
|
||
|
|
"step": 1355,
|
||
|
|
"valid_targets_mean": 4916.5,
|
||
|
|
"valid_targets_min": 990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1900161030595813,
|
||
|
|
"grad_norm": 0.5142914581894688,
|
||
|
|
"learning_rate": 3.474192925286714e-05,
|
||
|
|
"loss": 0.2166,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2234359234571457,
|
||
|
|
"step": 1360,
|
||
|
|
"valid_targets_mean": 4057.0,
|
||
|
|
"valid_targets_min": 554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1980676328502415,
|
||
|
|
"grad_norm": 0.5135241141747307,
|
||
|
|
"learning_rate": 3.468754038442017e-05,
|
||
|
|
"loss": 0.2079,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21173028647899628,
|
||
|
|
"step": 1365,
|
||
|
|
"valid_targets_mean": 4122.8,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.206119162640902,
|
||
|
|
"grad_norm": 0.5281981416757465,
|
||
|
|
"learning_rate": 3.463291471114548e-05,
|
||
|
|
"loss": 0.2135,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2580035328865051,
|
||
|
|
"step": 1370,
|
||
|
|
"valid_targets_mean": 4534.7,
|
||
|
|
"valid_targets_min": 1253
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.214170692431562,
|
||
|
|
"grad_norm": 0.546080448981762,
|
||
|
|
"learning_rate": 3.4578053113763936e-05,
|
||
|
|
"loss": 0.2185,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2357328236103058,
|
||
|
|
"step": 1375,
|
||
|
|
"valid_targets_mean": 4083.2,
|
||
|
|
"valid_targets_min": 1119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2222222222222223,
|
||
|
|
"grad_norm": 0.43510725221723323,
|
||
|
|
"learning_rate": 3.452295647680014e-05,
|
||
|
|
"loss": 0.2108,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.210770383477211,
|
||
|
|
"step": 1380,
|
||
|
|
"valid_targets_mean": 5021.1,
|
||
|
|
"valid_targets_min": 1913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2302737520128826,
|
||
|
|
"grad_norm": 0.45576706352160945,
|
||
|
|
"learning_rate": 3.4467625688568245e-05,
|
||
|
|
"loss": 0.21,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20456649363040924,
|
||
|
|
"step": 1385,
|
||
|
|
"valid_targets_mean": 4813.6,
|
||
|
|
"valid_targets_min": 1879
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.238325281803543,
|
||
|
|
"grad_norm": 0.4605011820162796,
|
||
|
|
"learning_rate": 3.4412061641157546e-05,
|
||
|
|
"loss": 0.2026,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1862925887107849,
|
||
|
|
"step": 1390,
|
||
|
|
"valid_targets_mean": 5375.2,
|
||
|
|
"valid_targets_min": 1405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.246376811594203,
|
||
|
|
"grad_norm": 0.5067717085593049,
|
||
|
|
"learning_rate": 3.435626523041815e-05,
|
||
|
|
"loss": 0.2132,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20806747674942017,
|
||
|
|
"step": 1395,
|
||
|
|
"valid_targets_mean": 4125.7,
|
||
|
|
"valid_targets_min": 2104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2544283413848634,
|
||
|
|
"grad_norm": 0.503211076543084,
|
||
|
|
"learning_rate": 3.430023735594653e-05,
|
||
|
|
"loss": 0.2163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23167727887630463,
|
||
|
|
"step": 1400,
|
||
|
|
"valid_targets_mean": 4897.6,
|
||
|
|
"valid_targets_min": 344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.262479871175523,
|
||
|
|
"grad_norm": 0.5252844399823857,
|
||
|
|
"learning_rate": 3.4243978921071005e-05,
|
||
|
|
"loss": 0.2117,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18407735228538513,
|
||
|
|
"step": 1405,
|
||
|
|
"valid_targets_mean": 4546.8,
|
||
|
|
"valid_targets_min": 1908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2705314009661834,
|
||
|
|
"grad_norm": 0.5703046217142366,
|
||
|
|
"learning_rate": 3.418749083283719e-05,
|
||
|
|
"loss": 0.223,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23428402841091156,
|
||
|
|
"step": 1410,
|
||
|
|
"valid_targets_mean": 4894.1,
|
||
|
|
"valid_targets_min": 1720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2785829307568437,
|
||
|
|
"grad_norm": 0.45360806117795466,
|
||
|
|
"learning_rate": 3.413077400199334e-05,
|
||
|
|
"loss": 0.1978,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16744893789291382,
|
||
|
|
"step": 1415,
|
||
|
|
"valid_targets_mean": 4128.2,
|
||
|
|
"valid_targets_min": 1177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.286634460547504,
|
||
|
|
"grad_norm": 0.48066382062291113,
|
||
|
|
"learning_rate": 3.407382934297571e-05,
|
||
|
|
"loss": 0.2096,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21223318576812744,
|
||
|
|
"step": 1420,
|
||
|
|
"valid_targets_mean": 4697.8,
|
||
|
|
"valid_targets_min": 1982
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2946859903381642,
|
||
|
|
"grad_norm": 0.4623146781727566,
|
||
|
|
"learning_rate": 3.4016657773893785e-05,
|
||
|
|
"loss": 0.1946,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.186685249209404,
|
||
|
|
"step": 1425,
|
||
|
|
"valid_targets_mean": 5018.8,
|
||
|
|
"valid_targets_min": 1843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3027375201288245,
|
||
|
|
"grad_norm": 0.5051350867881649,
|
||
|
|
"learning_rate": 3.3959260216515495e-05,
|
||
|
|
"loss": 0.2309,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1981533169746399,
|
||
|
|
"step": 1430,
|
||
|
|
"valid_targets_mean": 4461.0,
|
||
|
|
"valid_targets_min": 1264
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3107890499194848,
|
||
|
|
"grad_norm": 0.547928503478585,
|
||
|
|
"learning_rate": 3.3901637596252325e-05,
|
||
|
|
"loss": 0.2059,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20269712805747986,
|
||
|
|
"step": 1435,
|
||
|
|
"valid_targets_mean": 4606.9,
|
||
|
|
"valid_targets_min": 216
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.318840579710145,
|
||
|
|
"grad_norm": 0.5050671980985264,
|
||
|
|
"learning_rate": 3.384379084214443e-05,
|
||
|
|
"loss": 0.2157,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21335461735725403,
|
||
|
|
"step": 1440,
|
||
|
|
"valid_targets_mean": 3892.7,
|
||
|
|
"valid_targets_min": 1209
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3268921095008053,
|
||
|
|
"grad_norm": 0.48909304735454484,
|
||
|
|
"learning_rate": 3.378572088684562e-05,
|
||
|
|
"loss": 0.2037,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19523759186267853,
|
||
|
|
"step": 1445,
|
||
|
|
"valid_targets_mean": 4354.8,
|
||
|
|
"valid_targets_min": 1308
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3349436392914655,
|
||
|
|
"grad_norm": 0.6388146830801954,
|
||
|
|
"learning_rate": 3.372742866660836e-05,
|
||
|
|
"loss": 0.2178,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.198106050491333,
|
||
|
|
"step": 1450,
|
||
|
|
"valid_targets_mean": 4365.9,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.342995169082126,
|
||
|
|
"grad_norm": 0.5410396103360308,
|
||
|
|
"learning_rate": 3.3668915121268636e-05,
|
||
|
|
"loss": 0.212,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23412463068962097,
|
||
|
|
"step": 1455,
|
||
|
|
"valid_targets_mean": 4374.0,
|
||
|
|
"valid_targets_min": 547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3510466988727856,
|
||
|
|
"grad_norm": 0.5291219901130383,
|
||
|
|
"learning_rate": 3.361018119423085e-05,
|
||
|
|
"loss": 0.2084,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2332005500793457,
|
||
|
|
"step": 1460,
|
||
|
|
"valid_targets_mean": 3958.9,
|
||
|
|
"valid_targets_min": 1381
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.359098228663446,
|
||
|
|
"grad_norm": 0.5100995870174685,
|
||
|
|
"learning_rate": 3.3551227832452555e-05,
|
||
|
|
"loss": 0.2113,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2195730209350586,
|
||
|
|
"step": 1465,
|
||
|
|
"valid_targets_mean": 4228.3,
|
||
|
|
"valid_targets_min": 2352
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.367149758454106,
|
||
|
|
"grad_norm": 0.5322099118621038,
|
||
|
|
"learning_rate": 3.3492055986429235e-05,
|
||
|
|
"loss": 0.2227,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22615352272987366,
|
||
|
|
"step": 1470,
|
||
|
|
"valid_targets_mean": 4539.8,
|
||
|
|
"valid_targets_min": 296
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3752012882447664,
|
||
|
|
"grad_norm": 0.4656836442072764,
|
||
|
|
"learning_rate": 3.3432666610178936e-05,
|
||
|
|
"loss": 0.2039,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17955255508422852,
|
||
|
|
"step": 1475,
|
||
|
|
"valid_targets_mean": 4149.8,
|
||
|
|
"valid_targets_min": 1631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3832528180354267,
|
||
|
|
"grad_norm": 0.5493172940293227,
|
||
|
|
"learning_rate": 3.3373060661226944e-05,
|
||
|
|
"loss": 0.2078,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23940381407737732,
|
||
|
|
"step": 1480,
|
||
|
|
"valid_targets_mean": 4181.2,
|
||
|
|
"valid_targets_min": 2011
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.391304347826087,
|
||
|
|
"grad_norm": 0.49597904545260957,
|
||
|
|
"learning_rate": 3.331323910059027e-05,
|
||
|
|
"loss": 0.222,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24300578236579895,
|
||
|
|
"step": 1485,
|
||
|
|
"valid_targets_mean": 3967.4,
|
||
|
|
"valid_targets_min": 677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.399355877616747,
|
||
|
|
"grad_norm": 0.5003561031279338,
|
||
|
|
"learning_rate": 3.3253202892762244e-05,
|
||
|
|
"loss": 0.2088,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1889512538909912,
|
||
|
|
"step": 1490,
|
||
|
|
"valid_targets_mean": 4254.9,
|
||
|
|
"valid_targets_min": 1570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4074074074074074,
|
||
|
|
"grad_norm": 0.4683723297524737,
|
||
|
|
"learning_rate": 3.319295300569686e-05,
|
||
|
|
"loss": 0.2176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22197048366069794,
|
||
|
|
"step": 1495,
|
||
|
|
"valid_targets_mean": 5005.4,
|
||
|
|
"valid_targets_min": 857
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4154589371980677,
|
||
|
|
"grad_norm": 0.5071114785034765,
|
||
|
|
"learning_rate": 3.3132490410793294e-05,
|
||
|
|
"loss": 0.2086,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20913785696029663,
|
||
|
|
"step": 1500,
|
||
|
|
"valid_targets_mean": 4472.4,
|
||
|
|
"valid_targets_min": 1073
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.423510466988728,
|
||
|
|
"grad_norm": 0.4572009276147089,
|
||
|
|
"learning_rate": 3.3071816082880115e-05,
|
||
|
|
"loss": 0.2038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19436806440353394,
|
||
|
|
"step": 1505,
|
||
|
|
"valid_targets_mean": 4905.2,
|
||
|
|
"valid_targets_min": 1404
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4315619967793882,
|
||
|
|
"grad_norm": 0.5338897691126591,
|
||
|
|
"learning_rate": 3.3010931000199674e-05,
|
||
|
|
"loss": 0.2173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22107122838497162,
|
||
|
|
"step": 1510,
|
||
|
|
"valid_targets_mean": 4076.6,
|
||
|
|
"valid_targets_min": 1904
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4396135265700485,
|
||
|
|
"grad_norm": 0.49724905678619213,
|
||
|
|
"learning_rate": 3.2949836144392256e-05,
|
||
|
|
"loss": 0.2266,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22516636550426483,
|
||
|
|
"step": 1515,
|
||
|
|
"valid_targets_mean": 5009.3,
|
||
|
|
"valid_targets_min": 1353
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4476650563607087,
|
||
|
|
"grad_norm": 0.5108239733431804,
|
||
|
|
"learning_rate": 3.28885325004803e-05,
|
||
|
|
"loss": 0.2163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23558905720710754,
|
||
|
|
"step": 1520,
|
||
|
|
"valid_targets_mean": 4774.4,
|
||
|
|
"valid_targets_min": 1731
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.455716586151369,
|
||
|
|
"grad_norm": 0.4908585219445986,
|
||
|
|
"learning_rate": 3.282702105685251e-05,
|
||
|
|
"loss": 0.2224,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22913925349712372,
|
||
|
|
"step": 1525,
|
||
|
|
"valid_targets_mean": 5611.8,
|
||
|
|
"valid_targets_min": 2104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.463768115942029,
|
||
|
|
"grad_norm": 0.4523927665088012,
|
||
|
|
"learning_rate": 3.2765302805247885e-05,
|
||
|
|
"loss": 0.2063,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20795124769210815,
|
||
|
|
"step": 1530,
|
||
|
|
"valid_targets_mean": 4889.2,
|
||
|
|
"valid_targets_min": 2184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.471819645732689,
|
||
|
|
"grad_norm": 0.5485354909981668,
|
||
|
|
"learning_rate": 3.270337874073977e-05,
|
||
|
|
"loss": 0.217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2317976951599121,
|
||
|
|
"step": 1535,
|
||
|
|
"valid_targets_mean": 5319.1,
|
||
|
|
"valid_targets_min": 1932
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4798711755233493,
|
||
|
|
"grad_norm": 0.5061836838324374,
|
||
|
|
"learning_rate": 3.264124986171981e-05,
|
||
|
|
"loss": 0.2273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25826138257980347,
|
||
|
|
"step": 1540,
|
||
|
|
"valid_targets_mean": 4537.9,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4879227053140096,
|
||
|
|
"grad_norm": 0.47633311927326305,
|
||
|
|
"learning_rate": 3.2578917169881816e-05,
|
||
|
|
"loss": 0.2185,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22371219098567963,
|
||
|
|
"step": 1545,
|
||
|
|
"valid_targets_mean": 4426.5,
|
||
|
|
"valid_targets_min": 1435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.49597423510467,
|
||
|
|
"grad_norm": 0.528382297311333,
|
||
|
|
"learning_rate": 3.2516381670205665e-05,
|
||
|
|
"loss": 0.2309,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22219571471214294,
|
||
|
|
"step": 1550,
|
||
|
|
"valid_targets_mean": 4377.4,
|
||
|
|
"valid_targets_min": 812
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.50402576489533,
|
||
|
|
"grad_norm": 0.496647756887993,
|
||
|
|
"learning_rate": 3.245364437094105e-05,
|
||
|
|
"loss": 0.2134,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22214139997959137,
|
||
|
|
"step": 1555,
|
||
|
|
"valid_targets_mean": 4800.2,
|
||
|
|
"valid_targets_min": 1516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5120772946859904,
|
||
|
|
"grad_norm": 0.5176776733548973,
|
||
|
|
"learning_rate": 3.239070628359126e-05,
|
||
|
|
"loss": 0.2243,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2536792755126953,
|
||
|
|
"step": 1560,
|
||
|
|
"valid_targets_mean": 4532.4,
|
||
|
|
"valid_targets_min": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5201288244766507,
|
||
|
|
"grad_norm": 0.4521203535869814,
|
||
|
|
"learning_rate": 3.232756842289685e-05,
|
||
|
|
"loss": 0.209,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1837652325630188,
|
||
|
|
"step": 1565,
|
||
|
|
"valid_targets_mean": 4955.4,
|
||
|
|
"valid_targets_min": 1611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.528180354267311,
|
||
|
|
"grad_norm": 0.48332269443651105,
|
||
|
|
"learning_rate": 3.2264231806819286e-05,
|
||
|
|
"loss": 0.1944,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18321290612220764,
|
||
|
|
"step": 1570,
|
||
|
|
"valid_targets_mean": 4541.4,
|
||
|
|
"valid_targets_min": 1371
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.536231884057971,
|
||
|
|
"grad_norm": 0.5203474071856915,
|
||
|
|
"learning_rate": 3.220069745652456e-05,
|
||
|
|
"loss": 0.2189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2053850144147873,
|
||
|
|
"step": 1575,
|
||
|
|
"valid_targets_mean": 4024.7,
|
||
|
|
"valid_targets_min": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.544283413848631,
|
||
|
|
"grad_norm": 0.46470085704874836,
|
||
|
|
"learning_rate": 3.213696639636666e-05,
|
||
|
|
"loss": 0.2233,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19794431328773499,
|
||
|
|
"step": 1580,
|
||
|
|
"valid_targets_mean": 4241.2,
|
||
|
|
"valid_targets_min": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5523349436392913,
|
||
|
|
"grad_norm": 0.4921456777251355,
|
||
|
|
"learning_rate": 3.207303965387114e-05,
|
||
|
|
"loss": 0.2195,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2276471108198166,
|
||
|
|
"step": 1585,
|
||
|
|
"valid_targets_mean": 4376.2,
|
||
|
|
"valid_targets_min": 327
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5603864734299515,
|
||
|
|
"grad_norm": 0.5461925808616269,
|
||
|
|
"learning_rate": 3.200891825971846e-05,
|
||
|
|
"loss": 0.2148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2170354425907135,
|
||
|
|
"step": 1590,
|
||
|
|
"valid_targets_mean": 4376.3,
|
||
|
|
"valid_targets_min": 1624
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5684380032206118,
|
||
|
|
"grad_norm": 0.45331868338223147,
|
||
|
|
"learning_rate": 3.194460324772746e-05,
|
||
|
|
"loss": 0.1971,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17769229412078857,
|
||
|
|
"step": 1595,
|
||
|
|
"valid_targets_mean": 4559.6,
|
||
|
|
"valid_targets_min": 1717
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.576489533011272,
|
||
|
|
"grad_norm": 0.5481556666487605,
|
||
|
|
"learning_rate": 3.188009565483861e-05,
|
||
|
|
"loss": 0.2192,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21930727362632751,
|
||
|
|
"step": 1600,
|
||
|
|
"valid_targets_mean": 3786.7,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5845410628019323,
|
||
|
|
"grad_norm": 0.5306078402232887,
|
||
|
|
"learning_rate": 3.1815396521097376e-05,
|
||
|
|
"loss": 0.2091,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22777387499809265,
|
||
|
|
"step": 1605,
|
||
|
|
"valid_targets_mean": 4098.1,
|
||
|
|
"valid_targets_min": 581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5925925925925926,
|
||
|
|
"grad_norm": 0.6396015312802733,
|
||
|
|
"learning_rate": 3.1750506889637366e-05,
|
||
|
|
"loss": 0.219,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2606090009212494,
|
||
|
|
"step": 1610,
|
||
|
|
"valid_targets_mean": 4573.8,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.600644122383253,
|
||
|
|
"grad_norm": 0.5139629491463821,
|
||
|
|
"learning_rate": 3.1685427806663574e-05,
|
||
|
|
"loss": 0.213,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22717058658599854,
|
||
|
|
"step": 1615,
|
||
|
|
"valid_targets_mean": 4616.6,
|
||
|
|
"valid_targets_min": 2109
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.608695652173913,
|
||
|
|
"grad_norm": 0.4896772812229126,
|
||
|
|
"learning_rate": 3.1620160321435475e-05,
|
||
|
|
"loss": 0.2204,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2393844723701477,
|
||
|
|
"step": 1620,
|
||
|
|
"valid_targets_mean": 4748.1,
|
||
|
|
"valid_targets_min": 1671
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6167471819645733,
|
||
|
|
"grad_norm": 0.5123496934824213,
|
||
|
|
"learning_rate": 3.155470548625014e-05,
|
||
|
|
"loss": 0.2257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2570544481277466,
|
||
|
|
"step": 1625,
|
||
|
|
"valid_targets_mean": 4746.9,
|
||
|
|
"valid_targets_min": 1642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6247987117552336,
|
||
|
|
"grad_norm": 0.5011886008293388,
|
||
|
|
"learning_rate": 3.1489064356425235e-05,
|
||
|
|
"loss": 0.2223,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21146038174629211,
|
||
|
|
"step": 1630,
|
||
|
|
"valid_targets_mean": 4106.5,
|
||
|
|
"valid_targets_min": 1263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.632850241545894,
|
||
|
|
"grad_norm": 0.4918629952935143,
|
||
|
|
"learning_rate": 3.142323799028204e-05,
|
||
|
|
"loss": 0.2206,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.205857515335083,
|
||
|
|
"step": 1635,
|
||
|
|
"valid_targets_mean": 4128.4,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.640901771336554,
|
||
|
|
"grad_norm": 0.4975884336977351,
|
||
|
|
"learning_rate": 3.135722744912836e-05,
|
||
|
|
"loss": 0.2067,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19787558913230896,
|
||
|
|
"step": 1640,
|
||
|
|
"valid_targets_mean": 5678.5,
|
||
|
|
"valid_targets_min": 886
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6489533011272144,
|
||
|
|
"grad_norm": 0.48481732390442694,
|
||
|
|
"learning_rate": 3.129103379724143e-05,
|
||
|
|
"loss": 0.1994,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1850062608718872,
|
||
|
|
"step": 1645,
|
||
|
|
"valid_targets_mean": 3477.6,
|
||
|
|
"valid_targets_min": 752
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6570048309178746,
|
||
|
|
"grad_norm": 0.4665528515319807,
|
||
|
|
"learning_rate": 3.122465810185075e-05,
|
||
|
|
"loss": 0.2172,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21181383728981018,
|
||
|
|
"step": 1650,
|
||
|
|
"valid_targets_mean": 4864.5,
|
||
|
|
"valid_targets_min": 1598
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6650563607085345,
|
||
|
|
"grad_norm": 0.5077396920276867,
|
||
|
|
"learning_rate": 3.1158101433120863e-05,
|
||
|
|
"loss": 0.2186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19636741280555725,
|
||
|
|
"step": 1655,
|
||
|
|
"valid_targets_mean": 3697.4,
|
||
|
|
"valid_targets_min": 339
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6731078904991947,
|
||
|
|
"grad_norm": 0.5055566499587948,
|
||
|
|
"learning_rate": 3.1091364864134136e-05,
|
||
|
|
"loss": 0.197,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20249465107917786,
|
||
|
|
"step": 1660,
|
||
|
|
"valid_targets_mean": 4576.9,
|
||
|
|
"valid_targets_min": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.681159420289855,
|
||
|
|
"grad_norm": 0.49702037708190555,
|
||
|
|
"learning_rate": 3.102444947087342e-05,
|
||
|
|
"loss": 0.2007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17359226942062378,
|
||
|
|
"step": 1665,
|
||
|
|
"valid_targets_mean": 3802.2,
|
||
|
|
"valid_targets_min": 1850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6892109500805152,
|
||
|
|
"grad_norm": 0.543548482081646,
|
||
|
|
"learning_rate": 3.0957356332204745e-05,
|
||
|
|
"loss": 0.2364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21617215871810913,
|
||
|
|
"step": 1670,
|
||
|
|
"valid_targets_mean": 5324.2,
|
||
|
|
"valid_targets_min": 1401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6972624798711755,
|
||
|
|
"grad_norm": 0.473211339207569,
|
||
|
|
"learning_rate": 3.089008652985989e-05,
|
||
|
|
"loss": 0.2,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20709389448165894,
|
||
|
|
"step": 1675,
|
||
|
|
"valid_targets_mean": 4730.4,
|
||
|
|
"valid_targets_min": 1283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7053140096618358,
|
||
|
|
"grad_norm": 0.48495909325226566,
|
||
|
|
"learning_rate": 3.082264114841892e-05,
|
||
|
|
"loss": 0.2152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19697055220603943,
|
||
|
|
"step": 1680,
|
||
|
|
"valid_targets_mean": 4703.1,
|
||
|
|
"valid_targets_min": 1393
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.713365539452496,
|
||
|
|
"grad_norm": 0.525234042627054,
|
||
|
|
"learning_rate": 3.07550212752928e-05,
|
||
|
|
"loss": 0.2152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21602725982666016,
|
||
|
|
"step": 1685,
|
||
|
|
"valid_targets_mean": 4527.6,
|
||
|
|
"valid_targets_min": 728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7214170692431563,
|
||
|
|
"grad_norm": 0.49507933380428965,
|
||
|
|
"learning_rate": 3.068722800070574e-05,
|
||
|
|
"loss": 0.2489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23652076721191406,
|
||
|
|
"step": 1690,
|
||
|
|
"valid_targets_mean": 3893.8,
|
||
|
|
"valid_targets_min": 1490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7294685990338166,
|
||
|
|
"grad_norm": 0.478229441737468,
|
||
|
|
"learning_rate": 3.0619262417677695e-05,
|
||
|
|
"loss": 0.2065,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20760485529899597,
|
||
|
|
"step": 1695,
|
||
|
|
"valid_targets_mean": 4375.8,
|
||
|
|
"valid_targets_min": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7375201288244764,
|
||
|
|
"grad_norm": 0.4760382784479069,
|
||
|
|
"learning_rate": 3.055112562200673e-05,
|
||
|
|
"loss": 0.2077,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20306912064552307,
|
||
|
|
"step": 1700,
|
||
|
|
"valid_targets_mean": 4093.1,
|
||
|
|
"valid_targets_min": 892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7455716586151366,
|
||
|
|
"grad_norm": 0.567194545288659,
|
||
|
|
"learning_rate": 3.0482818712251318e-05,
|
||
|
|
"loss": 0.2031,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22697149217128754,
|
||
|
|
"step": 1705,
|
||
|
|
"valid_targets_mean": 5224.9,
|
||
|
|
"valid_targets_min": 679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.753623188405797,
|
||
|
|
"grad_norm": 0.7966562099200152,
|
||
|
|
"learning_rate": 3.0414342789712675e-05,
|
||
|
|
"loss": 0.2246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2644144296646118,
|
||
|
|
"step": 1710,
|
||
|
|
"valid_targets_mean": 3641.8,
|
||
|
|
"valid_targets_min": 267
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.761674718196457,
|
||
|
|
"grad_norm": 0.5194781986137333,
|
||
|
|
"learning_rate": 3.034569895841699e-05,
|
||
|
|
"loss": 0.2012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20732516050338745,
|
||
|
|
"step": 1715,
|
||
|
|
"valid_targets_mean": 4665.7,
|
||
|
|
"valid_targets_min": 1796
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7697262479871174,
|
||
|
|
"grad_norm": 0.5139542297559575,
|
||
|
|
"learning_rate": 3.0276888325097583e-05,
|
||
|
|
"loss": 0.2108,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2747165858745575,
|
||
|
|
"step": 1720,
|
||
|
|
"valid_targets_mean": 5593.7,
|
||
|
|
"valid_targets_min": 863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7777777777777777,
|
||
|
|
"grad_norm": 0.5017349754200195,
|
||
|
|
"learning_rate": 3.020791199917713e-05,
|
||
|
|
"loss": 0.2097,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19357708096504211,
|
||
|
|
"step": 1725,
|
||
|
|
"valid_targets_mean": 4411.8,
|
||
|
|
"valid_targets_min": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.785829307568438,
|
||
|
|
"grad_norm": 0.47883623225450245,
|
||
|
|
"learning_rate": 3.0138771092749722e-05,
|
||
|
|
"loss": 0.2158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21340464055538177,
|
||
|
|
"step": 1730,
|
||
|
|
"valid_targets_mean": 4259.2,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.793880837359098,
|
||
|
|
"grad_norm": 0.5042451347947459,
|
||
|
|
"learning_rate": 3.006946672056297e-05,
|
||
|
|
"loss": 0.2163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20300012826919556,
|
||
|
|
"step": 1735,
|
||
|
|
"valid_targets_mean": 4185.3,
|
||
|
|
"valid_targets_min": 1004
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8019323671497585,
|
||
|
|
"grad_norm": 0.5599312568052899,
|
||
|
|
"learning_rate": 3.0000000000000004e-05,
|
||
|
|
"loss": 0.2111,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22356563806533813,
|
||
|
|
"step": 1740,
|
||
|
|
"valid_targets_mean": 4172.1,
|
||
|
|
"valid_targets_min": 1019
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8099838969404187,
|
||
|
|
"grad_norm": 0.5188094581424917,
|
||
|
|
"learning_rate": 2.993037205106147e-05,
|
||
|
|
"loss": 0.2181,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21803626418113708,
|
||
|
|
"step": 1745,
|
||
|
|
"valid_targets_mean": 4445.8,
|
||
|
|
"valid_targets_min": 1772
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.818035426731079,
|
||
|
|
"grad_norm": 0.48903765537814037,
|
||
|
|
"learning_rate": 2.9860583996347495e-05,
|
||
|
|
"loss": 0.2093,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21702814102172852,
|
||
|
|
"step": 1750,
|
||
|
|
"valid_targets_mean": 4806.7,
|
||
|
|
"valid_targets_min": 1671
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8260869565217392,
|
||
|
|
"grad_norm": 0.48803716906398215,
|
||
|
|
"learning_rate": 2.9790636961039524e-05,
|
||
|
|
"loss": 0.2193,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22425541281700134,
|
||
|
|
"step": 1755,
|
||
|
|
"valid_targets_mean": 4643.0,
|
||
|
|
"valid_targets_min": 1506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8341384863123995,
|
||
|
|
"grad_norm": 0.48749480729575717,
|
||
|
|
"learning_rate": 2.9720532072882268e-05,
|
||
|
|
"loss": 0.2248,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19417119026184082,
|
||
|
|
"step": 1760,
|
||
|
|
"valid_targets_mean": 4002.8,
|
||
|
|
"valid_targets_min": 1522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8421900161030598,
|
||
|
|
"grad_norm": 0.45848716202240636,
|
||
|
|
"learning_rate": 2.965027046216544e-05,
|
||
|
|
"loss": 0.2081,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1857365071773529,
|
||
|
|
"step": 1765,
|
||
|
|
"valid_targets_mean": 4783.7,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.85024154589372,
|
||
|
|
"grad_norm": 0.6718800164601025,
|
||
|
|
"learning_rate": 2.9579853261705573e-05,
|
||
|
|
"loss": 0.2024,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23533181846141815,
|
||
|
|
"step": 1770,
|
||
|
|
"valid_targets_mean": 4673.9,
|
||
|
|
"valid_targets_min": 1311
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8582930756843803,
|
||
|
|
"grad_norm": 0.4660110391421861,
|
||
|
|
"learning_rate": 2.950928160682775e-05,
|
||
|
|
"loss": 0.2089,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19263693690299988,
|
||
|
|
"step": 1775,
|
||
|
|
"valid_targets_mean": 4255.1,
|
||
|
|
"valid_targets_min": 1076
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.86634460547504,
|
||
|
|
"grad_norm": 0.5145254890484248,
|
||
|
|
"learning_rate": 2.943855663534731e-05,
|
||
|
|
"loss": 0.2183,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23376551270484924,
|
||
|
|
"step": 1780,
|
||
|
|
"valid_targets_mean": 4397.2,
|
||
|
|
"valid_targets_min": 1787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8743961352657004,
|
||
|
|
"grad_norm": 0.4906091551370421,
|
||
|
|
"learning_rate": 2.9367679487551473e-05,
|
||
|
|
"loss": 0.2056,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20546765625476837,
|
||
|
|
"step": 1785,
|
||
|
|
"valid_targets_mean": 3990.4,
|
||
|
|
"valid_targets_min": 1477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8824476650563606,
|
||
|
|
"grad_norm": 0.5093898731159036,
|
||
|
|
"learning_rate": 2.929665130618098e-05,
|
||
|
|
"loss": 0.2037,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22851824760437012,
|
||
|
|
"step": 1790,
|
||
|
|
"valid_targets_mean": 5522.1,
|
||
|
|
"valid_targets_min": 1762
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.890499194847021,
|
||
|
|
"grad_norm": 0.4518802511006292,
|
||
|
|
"learning_rate": 2.9225473236411655e-05,
|
||
|
|
"loss": 0.2217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18847951292991638,
|
||
|
|
"step": 1795,
|
||
|
|
"valid_targets_mean": 4818.7,
|
||
|
|
"valid_targets_min": 1111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.898550724637681,
|
||
|
|
"grad_norm": 0.6937426790422542,
|
||
|
|
"learning_rate": 2.915414642583596e-05,
|
||
|
|
"loss": 0.2148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20088736712932587,
|
||
|
|
"step": 1800,
|
||
|
|
"valid_targets_mean": 3600.9,
|
||
|
|
"valid_targets_min": 848
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9066022544283414,
|
||
|
|
"grad_norm": 0.46521668715049397,
|
||
|
|
"learning_rate": 2.9082672024444485e-05,
|
||
|
|
"loss": 0.2229,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22432249784469604,
|
||
|
|
"step": 1805,
|
||
|
|
"valid_targets_mean": 4322.0,
|
||
|
|
"valid_targets_min": 1358
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9146537842190017,
|
||
|
|
"grad_norm": 0.436860595041999,
|
||
|
|
"learning_rate": 2.901105118460737e-05,
|
||
|
|
"loss": 0.2072,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1966436803340912,
|
||
|
|
"step": 1810,
|
||
|
|
"valid_targets_mean": 4940.1,
|
||
|
|
"valid_targets_min": 1944
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.922705314009662,
|
||
|
|
"grad_norm": 0.5361044008606226,
|
||
|
|
"learning_rate": 2.8939285061055807e-05,
|
||
|
|
"loss": 0.2146,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2078220695257187,
|
||
|
|
"step": 1815,
|
||
|
|
"valid_targets_mean": 3302.5,
|
||
|
|
"valid_targets_min": 609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.930756843800322,
|
||
|
|
"grad_norm": 0.49990872670719105,
|
||
|
|
"learning_rate": 2.8867374810863325e-05,
|
||
|
|
"loss": 0.2136,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2396693229675293,
|
||
|
|
"step": 1820,
|
||
|
|
"valid_targets_mean": 4933.6,
|
||
|
|
"valid_targets_min": 1197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.938808373590982,
|
||
|
|
"grad_norm": 0.4676050983632418,
|
||
|
|
"learning_rate": 2.8795321593427227e-05,
|
||
|
|
"loss": 0.2105,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21761463582515717,
|
||
|
|
"step": 1825,
|
||
|
|
"valid_targets_mean": 4473.9,
|
||
|
|
"valid_targets_min": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9468599033816423,
|
||
|
|
"grad_norm": 0.4579800892905144,
|
||
|
|
"learning_rate": 2.8723126570449813e-05,
|
||
|
|
"loss": 0.2137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22557538747787476,
|
||
|
|
"step": 1830,
|
||
|
|
"valid_targets_mean": 4990.2,
|
||
|
|
"valid_targets_min": 1104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9549114331723025,
|
||
|
|
"grad_norm": 0.521039627014324,
|
||
|
|
"learning_rate": 2.8650790905919724e-05,
|
||
|
|
"loss": 0.2127,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21462702751159668,
|
||
|
|
"step": 1835,
|
||
|
|
"valid_targets_mean": 4125.9,
|
||
|
|
"valid_targets_min": 1187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.962962962962963,
|
||
|
|
"grad_norm": 0.5446614093095541,
|
||
|
|
"learning_rate": 2.8578315766093133e-05,
|
||
|
|
"loss": 0.2275,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22720134258270264,
|
||
|
|
"step": 1840,
|
||
|
|
"valid_targets_mean": 4199.9,
|
||
|
|
"valid_targets_min": 379
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.971014492753623,
|
||
|
|
"grad_norm": 0.5085501802829419,
|
||
|
|
"learning_rate": 2.850570231947493e-05,
|
||
|
|
"loss": 0.2086,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20165984332561493,
|
||
|
|
"step": 1845,
|
||
|
|
"valid_targets_mean": 4610.2,
|
||
|
|
"valid_targets_min": 745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9790660225442833,
|
||
|
|
"grad_norm": 0.4995531673717949,
|
||
|
|
"learning_rate": 2.8432951736799933e-05,
|
||
|
|
"loss": 0.2149,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21701423823833466,
|
||
|
|
"step": 1850,
|
||
|
|
"valid_targets_mean": 4243.4,
|
||
|
|
"valid_targets_min": 1952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9871175523349436,
|
||
|
|
"grad_norm": 0.5654118721016965,
|
||
|
|
"learning_rate": 2.8360065191013967e-05,
|
||
|
|
"loss": 0.2189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22924692928791046,
|
||
|
|
"step": 1855,
|
||
|
|
"valid_targets_mean": 3948.6,
|
||
|
|
"valid_targets_min": 1818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.995169082125604,
|
||
|
|
"grad_norm": 0.5368747366497263,
|
||
|
|
"learning_rate": 2.8287043857254957e-05,
|
||
|
|
"loss": 0.2049,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18569841980934143,
|
||
|
|
"step": 1860,
|
||
|
|
"valid_targets_mean": 4243.3,
|
||
|
|
"valid_targets_min": 944
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.003220611916264,
|
||
|
|
"grad_norm": 0.5308612324214572,
|
||
|
|
"learning_rate": 2.8213888912834026e-05,
|
||
|
|
"loss": 0.2026,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20392943918704987,
|
||
|
|
"step": 1865,
|
||
|
|
"valid_targets_mean": 5240.9,
|
||
|
|
"valid_targets_min": 3176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0112721417069244,
|
||
|
|
"grad_norm": 0.5850366317547473,
|
||
|
|
"learning_rate": 2.814060153721644e-05,
|
||
|
|
"loss": 0.1835,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22741422057151794,
|
||
|
|
"step": 1870,
|
||
|
|
"valid_targets_mean": 4518.1,
|
||
|
|
"valid_targets_min": 1313
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0193236714975846,
|
||
|
|
"grad_norm": 0.5456534322958014,
|
||
|
|
"learning_rate": 2.8067182912002663e-05,
|
||
|
|
"loss": 0.1929,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19902676343917847,
|
||
|
|
"step": 1875,
|
||
|
|
"valid_targets_mean": 4697.9,
|
||
|
|
"valid_targets_min": 1401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.027375201288245,
|
||
|
|
"grad_norm": 0.5866691020588805,
|
||
|
|
"learning_rate": 2.7993634220909254e-05,
|
||
|
|
"loss": 0.1873,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17653107643127441,
|
||
|
|
"step": 1880,
|
||
|
|
"valid_targets_mean": 5312.7,
|
||
|
|
"valid_targets_min": 1668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.035426731078905,
|
||
|
|
"grad_norm": 0.4839014869325985,
|
||
|
|
"learning_rate": 2.7919956649749826e-05,
|
||
|
|
"loss": 0.2012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.207495778799057,
|
||
|
|
"step": 1885,
|
||
|
|
"valid_targets_mean": 4792.9,
|
||
|
|
"valid_targets_min": 1475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0434782608695654,
|
||
|
|
"grad_norm": 0.5792725572749162,
|
||
|
|
"learning_rate": 2.784615138641588e-05,
|
||
|
|
"loss": 0.2028,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2462862730026245,
|
||
|
|
"step": 1890,
|
||
|
|
"valid_targets_mean": 3582.7,
|
||
|
|
"valid_targets_min": 1264
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0515297906602252,
|
||
|
|
"grad_norm": 0.5471292037490801,
|
||
|
|
"learning_rate": 2.7772219620857685e-05,
|
||
|
|
"loss": 0.2036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2000941038131714,
|
||
|
|
"step": 1895,
|
||
|
|
"valid_targets_mean": 4260.8,
|
||
|
|
"valid_targets_min": 1662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0595813204508855,
|
||
|
|
"grad_norm": 0.5418789392536902,
|
||
|
|
"learning_rate": 2.769816254506509e-05,
|
||
|
|
"loss": 0.1972,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17109820246696472,
|
||
|
|
"step": 1900,
|
||
|
|
"valid_targets_mean": 4285.0,
|
||
|
|
"valid_targets_min": 688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0676328502415457,
|
||
|
|
"grad_norm": 0.5696873068816691,
|
||
|
|
"learning_rate": 2.76239813530483e-05,
|
||
|
|
"loss": 0.1917,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17864085733890533,
|
||
|
|
"step": 1905,
|
||
|
|
"valid_targets_mean": 3839.0,
|
||
|
|
"valid_targets_min": 356
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.075684380032206,
|
||
|
|
"grad_norm": 0.5257872930167539,
|
||
|
|
"learning_rate": 2.7549677240818628e-05,
|
||
|
|
"loss": 0.1918,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20962268114089966,
|
||
|
|
"step": 1910,
|
||
|
|
"valid_targets_mean": 4587.1,
|
||
|
|
"valid_targets_min": 854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0837359098228663,
|
||
|
|
"grad_norm": 0.5648446285032245,
|
||
|
|
"learning_rate": 2.7475251406369197e-05,
|
||
|
|
"loss": 0.1969,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20219004154205322,
|
||
|
|
"step": 1915,
|
||
|
|
"valid_targets_mean": 4792.9,
|
||
|
|
"valid_targets_min": 1897
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0917874396135265,
|
||
|
|
"grad_norm": 0.454963234903237,
|
||
|
|
"learning_rate": 2.740070504965565e-05,
|
||
|
|
"loss": 0.1732,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1661757528781891,
|
||
|
|
"step": 1920,
|
||
|
|
"valid_targets_mean": 5107.2,
|
||
|
|
"valid_targets_min": 1842
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.099838969404187,
|
||
|
|
"grad_norm": 0.5802646506856157,
|
||
|
|
"learning_rate": 2.7326039372576782e-05,
|
||
|
|
"loss": 0.2025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20974227786064148,
|
||
|
|
"step": 1925,
|
||
|
|
"valid_targets_mean": 4211.8,
|
||
|
|
"valid_targets_min": 1850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.107890499194847,
|
||
|
|
"grad_norm": 0.5115859872263251,
|
||
|
|
"learning_rate": 2.7251255578955186e-05,
|
||
|
|
"loss": 0.2016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.191048726439476,
|
||
|
|
"step": 1930,
|
||
|
|
"valid_targets_mean": 4847.3,
|
||
|
|
"valid_targets_min": 2149
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1159420289855073,
|
||
|
|
"grad_norm": 0.4838092911137751,
|
||
|
|
"learning_rate": 2.7176354874517805e-05,
|
||
|
|
"loss": 0.1915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18548092246055603,
|
||
|
|
"step": 1935,
|
||
|
|
"valid_targets_mean": 4928.9,
|
||
|
|
"valid_targets_min": 1746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1239935587761676,
|
||
|
|
"grad_norm": 0.6716396355587732,
|
||
|
|
"learning_rate": 2.7101338466876542e-05,
|
||
|
|
"loss": 0.1884,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17967697978019714,
|
||
|
|
"step": 1940,
|
||
|
|
"valid_targets_mean": 3838.4,
|
||
|
|
"valid_targets_min": 933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.132045088566828,
|
||
|
|
"grad_norm": 0.5175526656495621,
|
||
|
|
"learning_rate": 2.702620756550874e-05,
|
||
|
|
"loss": 0.2077,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18851426243782043,
|
||
|
|
"step": 1945,
|
||
|
|
"valid_targets_mean": 4249.6,
|
||
|
|
"valid_targets_min": 1164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.140096618357488,
|
||
|
|
"grad_norm": 0.5292721987453661,
|
||
|
|
"learning_rate": 2.6950963381737728e-05,
|
||
|
|
"loss": 0.1948,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20516784489154816,
|
||
|
|
"step": 1950,
|
||
|
|
"valid_targets_mean": 4134.1,
|
||
|
|
"valid_targets_min": 1487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.148148148148148,
|
||
|
|
"grad_norm": 0.5429459342739271,
|
||
|
|
"learning_rate": 2.687560712871325e-05,
|
||
|
|
"loss": 0.1959,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19477809965610504,
|
||
|
|
"step": 1955,
|
||
|
|
"valid_targets_mean": 4617.6,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.156199677938808,
|
||
|
|
"grad_norm": 0.5204990053162142,
|
||
|
|
"learning_rate": 2.6800140021391933e-05,
|
||
|
|
"loss": 0.2088,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2121405303478241,
|
||
|
|
"step": 1960,
|
||
|
|
"valid_targets_mean": 5504.8,
|
||
|
|
"valid_targets_min": 2133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1642512077294684,
|
||
|
|
"grad_norm": 0.5107603352588185,
|
||
|
|
"learning_rate": 2.6724563276517697e-05,
|
||
|
|
"loss": 0.1903,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.217079758644104,
|
||
|
|
"step": 1965,
|
||
|
|
"valid_targets_mean": 4874.7,
|
||
|
|
"valid_targets_min": 578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1723027375201287,
|
||
|
|
"grad_norm": 0.5709422965320798,
|
||
|
|
"learning_rate": 2.6648878112602115e-05,
|
||
|
|
"loss": 0.1865,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20290029048919678,
|
||
|
|
"step": 1970,
|
||
|
|
"valid_targets_mean": 4168.8,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.180354267310789,
|
||
|
|
"grad_norm": 0.5236384072204857,
|
||
|
|
"learning_rate": 2.6573085749904784e-05,
|
||
|
|
"loss": 0.2089,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18646635115146637,
|
||
|
|
"step": 1975,
|
||
|
|
"valid_targets_mean": 5792.3,
|
||
|
|
"valid_targets_min": 3148
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1884057971014492,
|
||
|
|
"grad_norm": 0.4931013592583483,
|
||
|
|
"learning_rate": 2.6497187410413676e-05,
|
||
|
|
"loss": 0.191,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16621598601341248,
|
||
|
|
"step": 1980,
|
||
|
|
"valid_targets_mean": 4182.5,
|
||
|
|
"valid_targets_min": 626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1964573268921095,
|
||
|
|
"grad_norm": 0.513477571797092,
|
||
|
|
"learning_rate": 2.642118431782537e-05,
|
||
|
|
"loss": 0.2045,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19758376479148865,
|
||
|
|
"step": 1985,
|
||
|
|
"valid_targets_mean": 4814.4,
|
||
|
|
"valid_targets_min": 841
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2045088566827697,
|
||
|
|
"grad_norm": 0.5208664241436397,
|
||
|
|
"learning_rate": 2.6345077697525394e-05,
|
||
|
|
"loss": 0.2018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2036036252975464,
|
||
|
|
"step": 1990,
|
||
|
|
"valid_targets_mean": 4699.8,
|
||
|
|
"valid_targets_min": 1314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.21256038647343,
|
||
|
|
"grad_norm": 0.48590086014586165,
|
||
|
|
"learning_rate": 2.6268868776568416e-05,
|
||
|
|
"loss": 0.1918,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1840904951095581,
|
||
|
|
"step": 1995,
|
||
|
|
"valid_targets_mean": 4029.8,
|
||
|
|
"valid_targets_min": 2119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2206119162640903,
|
||
|
|
"grad_norm": 0.48975245964069325,
|
||
|
|
"learning_rate": 2.619255878365849e-05,
|
||
|
|
"loss": 0.2021,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19650408625602722,
|
||
|
|
"step": 2000,
|
||
|
|
"valid_targets_mean": 4923.2,
|
||
|
|
"valid_targets_min": 2035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2286634460547505,
|
||
|
|
"grad_norm": 0.4730736998762011,
|
||
|
|
"learning_rate": 2.6116148949129237e-05,
|
||
|
|
"loss": 0.2022,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19848835468292236,
|
||
|
|
"step": 2005,
|
||
|
|
"valid_targets_mean": 5154.9,
|
||
|
|
"valid_targets_min": 862
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.236714975845411,
|
||
|
|
"grad_norm": 0.49293775625702296,
|
||
|
|
"learning_rate": 2.603964050492401e-05,
|
||
|
|
"loss": 0.1845,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1910780966281891,
|
||
|
|
"step": 2010,
|
||
|
|
"valid_targets_mean": 5177.5,
|
||
|
|
"valid_targets_min": 1035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.244766505636071,
|
||
|
|
"grad_norm": 0.4352279629169526,
|
||
|
|
"learning_rate": 2.5963034684576024e-05,
|
||
|
|
"loss": 0.1833,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1654304563999176,
|
||
|
|
"step": 2015,
|
||
|
|
"valid_targets_mean": 4889.2,
|
||
|
|
"valid_targets_min": 755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2528180354267313,
|
||
|
|
"grad_norm": 0.48928557887722884,
|
||
|
|
"learning_rate": 2.5886332723188484e-05,
|
||
|
|
"loss": 0.1874,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17599686980247498,
|
||
|
|
"step": 2020,
|
||
|
|
"valid_targets_mean": 3929.6,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.260869565217391,
|
||
|
|
"grad_norm": 0.4602375034247986,
|
||
|
|
"learning_rate": 2.5809535857414637e-05,
|
||
|
|
"loss": 0.1916,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19927427172660828,
|
||
|
|
"step": 2025,
|
||
|
|
"valid_targets_mean": 4907.2,
|
||
|
|
"valid_targets_min": 700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2689210950080514,
|
||
|
|
"grad_norm": 0.45533522839686313,
|
||
|
|
"learning_rate": 2.573264532543788e-05,
|
||
|
|
"loss": 0.1902,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1822260171175003,
|
||
|
|
"step": 2030,
|
||
|
|
"valid_targets_mean": 4557.0,
|
||
|
|
"valid_targets_min": 2401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2769726247987117,
|
||
|
|
"grad_norm": 0.5432317512688624,
|
||
|
|
"learning_rate": 2.5655662366951778e-05,
|
||
|
|
"loss": 0.1966,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1943272203207016,
|
||
|
|
"step": 2035,
|
||
|
|
"valid_targets_mean": 3990.8,
|
||
|
|
"valid_targets_min": 1026
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.285024154589372,
|
||
|
|
"grad_norm": 0.5340485828335133,
|
||
|
|
"learning_rate": 2.557858822314007e-05,
|
||
|
|
"loss": 0.2004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2411888986825943,
|
||
|
|
"step": 2040,
|
||
|
|
"valid_targets_mean": 4090.6,
|
||
|
|
"valid_targets_min": 1032
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.293075684380032,
|
||
|
|
"grad_norm": 0.5155182145466647,
|
||
|
|
"learning_rate": 2.5501424136656635e-05,
|
||
|
|
"loss": 0.1928,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2276889681816101,
|
||
|
|
"step": 2045,
|
||
|
|
"valid_targets_mean": 4593.6,
|
||
|
|
"valid_targets_min": 574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3011272141706924,
|
||
|
|
"grad_norm": 0.5465406153378882,
|
||
|
|
"learning_rate": 2.5424171351605518e-05,
|
||
|
|
"loss": 0.1874,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17450806498527527,
|
||
|
|
"step": 2050,
|
||
|
|
"valid_targets_mean": 4325.6,
|
||
|
|
"valid_targets_min": 1283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3091787439613527,
|
||
|
|
"grad_norm": 0.5652814304177497,
|
||
|
|
"learning_rate": 2.5346831113520827e-05,
|
||
|
|
"loss": 0.1961,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21284984052181244,
|
||
|
|
"step": 2055,
|
||
|
|
"valid_targets_mean": 4811.6,
|
||
|
|
"valid_targets_min": 1066
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.317230273752013,
|
||
|
|
"grad_norm": 0.5717869882020237,
|
||
|
|
"learning_rate": 2.526940466934664e-05,
|
||
|
|
"loss": 0.1937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19881507754325867,
|
||
|
|
"step": 2060,
|
||
|
|
"valid_targets_mean": 3001.5,
|
||
|
|
"valid_targets_min": 350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.325281803542673,
|
||
|
|
"grad_norm": 0.5193991609655757,
|
||
|
|
"learning_rate": 2.5191893267416964e-05,
|
||
|
|
"loss": 0.1913,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19282767176628113,
|
||
|
|
"step": 2065,
|
||
|
|
"valid_targets_mean": 4379.1,
|
||
|
|
"valid_targets_min": 943
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3333333333333335,
|
||
|
|
"grad_norm": 0.5087654352874541,
|
||
|
|
"learning_rate": 2.5114298157435526e-05,
|
||
|
|
"loss": 0.2077,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22454917430877686,
|
||
|
|
"step": 2070,
|
||
|
|
"valid_targets_mean": 5994.5,
|
||
|
|
"valid_targets_min": 1253
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3413848631239937,
|
||
|
|
"grad_norm": 0.4773374743223842,
|
||
|
|
"learning_rate": 2.503662059045568e-05,
|
||
|
|
"loss": 0.1961,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19372783601284027,
|
||
|
|
"step": 2075,
|
||
|
|
"valid_targets_mean": 4415.6,
|
||
|
|
"valid_targets_min": 1046
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3494363929146536,
|
||
|
|
"grad_norm": 0.5211750691881805,
|
||
|
|
"learning_rate": 2.4958861818860217e-05,
|
||
|
|
"loss": 0.1936,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20317870378494263,
|
||
|
|
"step": 2080,
|
||
|
|
"valid_targets_mean": 4938.8,
|
||
|
|
"valid_targets_min": 1053
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.357487922705314,
|
||
|
|
"grad_norm": 0.580769513908195,
|
||
|
|
"learning_rate": 2.488102309634119e-05,
|
||
|
|
"loss": 0.1918,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17012670636177063,
|
||
|
|
"step": 2085,
|
||
|
|
"valid_targets_mean": 3300.9,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.365539452495974,
|
||
|
|
"grad_norm": 0.4677262023270228,
|
||
|
|
"learning_rate": 2.480310567787967e-05,
|
||
|
|
"loss": 0.1943,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21143823862075806,
|
||
|
|
"step": 2090,
|
||
|
|
"valid_targets_mean": 4960.4,
|
||
|
|
"valid_targets_min": 1486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3735909822866343,
|
||
|
|
"grad_norm": 0.498695274074395,
|
||
|
|
"learning_rate": 2.4725110819725542e-05,
|
||
|
|
"loss": 0.1877,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19077152013778687,
|
||
|
|
"step": 2095,
|
||
|
|
"valid_targets_mean": 4409.8,
|
||
|
|
"valid_targets_min": 1603
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3816425120772946,
|
||
|
|
"grad_norm": 0.6606944270313985,
|
||
|
|
"learning_rate": 2.464703977937723e-05,
|
||
|
|
"loss": 0.2122,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2209235429763794,
|
||
|
|
"step": 2100,
|
||
|
|
"valid_targets_mean": 5160.9,
|
||
|
|
"valid_targets_min": 2260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.389694041867955,
|
||
|
|
"grad_norm": 0.48392843927081547,
|
||
|
|
"learning_rate": 2.456889381556144e-05,
|
||
|
|
"loss": 0.1863,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1707090586423874,
|
||
|
|
"step": 2105,
|
||
|
|
"valid_targets_mean": 4587.8,
|
||
|
|
"valid_targets_min": 1717
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.397745571658615,
|
||
|
|
"grad_norm": 0.4875147573168212,
|
||
|
|
"learning_rate": 2.449067418821285e-05,
|
||
|
|
"loss": 0.1944,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18568046391010284,
|
||
|
|
"step": 2110,
|
||
|
|
"valid_targets_mean": 4881.9,
|
||
|
|
"valid_targets_min": 2069
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4057971014492754,
|
||
|
|
"grad_norm": 0.5240738697697526,
|
||
|
|
"learning_rate": 2.4412382158453807e-05,
|
||
|
|
"loss": 0.2027,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17656230926513672,
|
||
|
|
"step": 2115,
|
||
|
|
"valid_targets_mean": 4386.1,
|
||
|
|
"valid_targets_min": 1483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4138486312399356,
|
||
|
|
"grad_norm": 0.5359424822294189,
|
||
|
|
"learning_rate": 2.4334018988573983e-05,
|
||
|
|
"loss": 0.2071,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22453176975250244,
|
||
|
|
"step": 2120,
|
||
|
|
"valid_targets_mean": 4363.1,
|
||
|
|
"valid_targets_min": 2099
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.421900161030596,
|
||
|
|
"grad_norm": 0.7044397785288949,
|
||
|
|
"learning_rate": 2.425558594201004e-05,
|
||
|
|
"loss": 0.1963,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17415092885494232,
|
||
|
|
"step": 2125,
|
||
|
|
"valid_targets_mean": 4853.5,
|
||
|
|
"valid_targets_min": 1658
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.429951690821256,
|
||
|
|
"grad_norm": 0.7313542515268883,
|
||
|
|
"learning_rate": 2.417708428332525e-05,
|
||
|
|
"loss": 0.2088,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18305009603500366,
|
||
|
|
"step": 2130,
|
||
|
|
"valid_targets_mean": 4156.8,
|
||
|
|
"valid_targets_min": 317
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4380032206119164,
|
||
|
|
"grad_norm": 0.4738591164631795,
|
||
|
|
"learning_rate": 2.4098515278189097e-05,
|
||
|
|
"loss": 0.192,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1723197102546692,
|
||
|
|
"step": 2135,
|
||
|
|
"valid_targets_mean": 4533.4,
|
||
|
|
"valid_targets_min": 1353
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4460547504025767,
|
||
|
|
"grad_norm": 0.5276265701938105,
|
||
|
|
"learning_rate": 2.4019880193356902e-05,
|
||
|
|
"loss": 0.182,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17899435758590698,
|
||
|
|
"step": 2140,
|
||
|
|
"valid_targets_mean": 4434.9,
|
||
|
|
"valid_targets_min": 675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.454106280193237,
|
||
|
|
"grad_norm": 0.5632489582281142,
|
||
|
|
"learning_rate": 2.3941180296649348e-05,
|
||
|
|
"loss": 0.2035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22366738319396973,
|
||
|
|
"step": 2145,
|
||
|
|
"valid_targets_mean": 3995.8,
|
||
|
|
"valid_targets_min": 1400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4621578099838968,
|
||
|
|
"grad_norm": 0.5526994009194625,
|
||
|
|
"learning_rate": 2.3862416856932087e-05,
|
||
|
|
"loss": 0.19,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19457975029945374,
|
||
|
|
"step": 2150,
|
||
|
|
"valid_targets_mean": 3929.9,
|
||
|
|
"valid_targets_min": 1593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.470209339774557,
|
||
|
|
"grad_norm": 0.5417281294909433,
|
||
|
|
"learning_rate": 2.378359114409527e-05,
|
||
|
|
"loss": 0.2146,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20209567248821259,
|
||
|
|
"step": 2155,
|
||
|
|
"valid_targets_mean": 4491.2,
|
||
|
|
"valid_targets_min": 1771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4782608695652173,
|
||
|
|
"grad_norm": 0.5283860783665116,
|
||
|
|
"learning_rate": 2.370470442903306e-05,
|
||
|
|
"loss": 0.2024,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2074143886566162,
|
||
|
|
"step": 2160,
|
||
|
|
"valid_targets_mean": 3453.6,
|
||
|
|
"valid_targets_min": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4863123993558776,
|
||
|
|
"grad_norm": 0.5596014550678241,
|
||
|
|
"learning_rate": 2.362575798362315e-05,
|
||
|
|
"loss": 0.2018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20553532242774963,
|
||
|
|
"step": 2165,
|
||
|
|
"valid_targets_mean": 3728.8,
|
||
|
|
"valid_targets_min": 1291
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.494363929146538,
|
||
|
|
"grad_norm": 0.47388886907429295,
|
||
|
|
"learning_rate": 2.3546753080706242e-05,
|
||
|
|
"loss": 0.1962,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2011997401714325,
|
||
|
|
"step": 2170,
|
||
|
|
"valid_targets_mean": 4747.0,
|
||
|
|
"valid_targets_min": 1499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.502415458937198,
|
||
|
|
"grad_norm": 0.5433473628742698,
|
||
|
|
"learning_rate": 2.346769099406557e-05,
|
||
|
|
"loss": 0.2048,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22939231991767883,
|
||
|
|
"step": 2175,
|
||
|
|
"valid_targets_mean": 3726.4,
|
||
|
|
"valid_targets_min": 824
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5104669887278583,
|
||
|
|
"grad_norm": 0.4457288216134543,
|
||
|
|
"learning_rate": 2.33885729984063e-05,
|
||
|
|
"loss": 0.2015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1754472553730011,
|
||
|
|
"step": 2180,
|
||
|
|
"valid_targets_mean": 5101.0,
|
||
|
|
"valid_targets_min": 1832
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5185185185185186,
|
||
|
|
"grad_norm": 0.511189888500102,
|
||
|
|
"learning_rate": 2.3309400369335033e-05,
|
||
|
|
"loss": 0.1909,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18584825098514557,
|
||
|
|
"step": 2185,
|
||
|
|
"valid_targets_mean": 4445.9,
|
||
|
|
"valid_targets_min": 1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.526570048309179,
|
||
|
|
"grad_norm": 0.5632502133321703,
|
||
|
|
"learning_rate": 2.3230174383339196e-05,
|
||
|
|
"loss": 0.2072,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2198297381401062,
|
||
|
|
"step": 2190,
|
||
|
|
"valid_targets_mean": 4171.1,
|
||
|
|
"valid_targets_min": 758
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.534621578099839,
|
||
|
|
"grad_norm": 0.4874975024015211,
|
||
|
|
"learning_rate": 2.3150896317766505e-05,
|
||
|
|
"loss": 0.1896,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1995978057384491,
|
||
|
|
"step": 2195,
|
||
|
|
"valid_targets_mean": 4628.1,
|
||
|
|
"valid_targets_min": 1282
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.542673107890499,
|
||
|
|
"grad_norm": 0.47843928271386893,
|
||
|
|
"learning_rate": 2.3071567450804325e-05,
|
||
|
|
"loss": 0.2053,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1690368950366974,
|
||
|
|
"step": 2200,
|
||
|
|
"valid_targets_mean": 4314.7,
|
||
|
|
"valid_targets_min": 1529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.550724637681159,
|
||
|
|
"grad_norm": 0.5030690747235215,
|
||
|
|
"learning_rate": 2.299218906145909e-05,
|
||
|
|
"loss": 0.1883,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17591848969459534,
|
||
|
|
"step": 2205,
|
||
|
|
"valid_targets_mean": 4831.5,
|
||
|
|
"valid_targets_min": 1692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5587761674718195,
|
||
|
|
"grad_norm": 0.4445594459012944,
|
||
|
|
"learning_rate": 2.2912762429535684e-05,
|
||
|
|
"loss": 0.1974,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2045142650604248,
|
||
|
|
"step": 2210,
|
||
|
|
"valid_targets_mean": 5075.3,
|
||
|
|
"valid_targets_min": 1787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5668276972624797,
|
||
|
|
"grad_norm": 0.5066371584429301,
|
||
|
|
"learning_rate": 2.2833288835616784e-05,
|
||
|
|
"loss": 0.1948,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20660072565078735,
|
||
|
|
"step": 2215,
|
||
|
|
"valid_targets_mean": 4255.6,
|
||
|
|
"valid_targets_min": 978
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.57487922705314,
|
||
|
|
"grad_norm": 0.5314312269324657,
|
||
|
|
"learning_rate": 2.2753769561042235e-05,
|
||
|
|
"loss": 0.2013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1856955885887146,
|
||
|
|
"step": 2220,
|
||
|
|
"valid_targets_mean": 4382.4,
|
||
|
|
"valid_targets_min": 306
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5829307568438002,
|
||
|
|
"grad_norm": 0.594664193968932,
|
||
|
|
"learning_rate": 2.2674205887888386e-05,
|
||
|
|
"loss": 0.2093,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22779573500156403,
|
||
|
|
"step": 2225,
|
||
|
|
"valid_targets_mean": 4177.1,
|
||
|
|
"valid_targets_min": 581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5909822866344605,
|
||
|
|
"grad_norm": 0.4922219770303433,
|
||
|
|
"learning_rate": 2.259459909894742e-05,
|
||
|
|
"loss": 0.2071,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22328010201454163,
|
||
|
|
"step": 2230,
|
||
|
|
"valid_targets_mean": 5152.0,
|
||
|
|
"valid_targets_min": 851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5990338164251208,
|
||
|
|
"grad_norm": 0.48869544172575563,
|
||
|
|
"learning_rate": 2.2514950477706657e-05,
|
||
|
|
"loss": 0.1956,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20914164185523987,
|
||
|
|
"step": 2235,
|
||
|
|
"valid_targets_mean": 5138.6,
|
||
|
|
"valid_targets_min": 1484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.607085346215781,
|
||
|
|
"grad_norm": 0.6963749789958893,
|
||
|
|
"learning_rate": 2.2435261308327875e-05,
|
||
|
|
"loss": 0.1945,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1746867597103119,
|
||
|
|
"step": 2240,
|
||
|
|
"valid_targets_mean": 4765.4,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6151368760064413,
|
||
|
|
"grad_norm": 0.5367886666270889,
|
||
|
|
"learning_rate": 2.2355532875626612e-05,
|
||
|
|
"loss": 0.1794,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18968860805034637,
|
||
|
|
"step": 2245,
|
||
|
|
"valid_targets_mean": 3609.9,
|
||
|
|
"valid_targets_min": 1136
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6231884057971016,
|
||
|
|
"grad_norm": 0.5074205448750082,
|
||
|
|
"learning_rate": 2.2275766465051444e-05,
|
||
|
|
"loss": 0.1989,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20984122157096863,
|
||
|
|
"step": 2250,
|
||
|
|
"valid_targets_mean": 4557.3,
|
||
|
|
"valid_targets_min": 818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.631239935587762,
|
||
|
|
"grad_norm": 0.5328336917497768,
|
||
|
|
"learning_rate": 2.2195963362663236e-05,
|
||
|
|
"loss": 0.196,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1873650997877121,
|
||
|
|
"step": 2255,
|
||
|
|
"valid_targets_mean": 4251.6,
|
||
|
|
"valid_targets_min": 1417
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.639291465378422,
|
||
|
|
"grad_norm": 0.5279031470689559,
|
||
|
|
"learning_rate": 2.211612485511446e-05,
|
||
|
|
"loss": 0.1953,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19223317503929138,
|
||
|
|
"step": 2260,
|
||
|
|
"valid_targets_mean": 4547.8,
|
||
|
|
"valid_targets_min": 989
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6473429951690823,
|
||
|
|
"grad_norm": 0.4807142652014487,
|
||
|
|
"learning_rate": 2.2036252229628392e-05,
|
||
|
|
"loss": 0.1855,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17448017001152039,
|
||
|
|
"step": 2265,
|
||
|
|
"valid_targets_mean": 4428.8,
|
||
|
|
"valid_targets_min": 1791
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6553945249597426,
|
||
|
|
"grad_norm": 0.46162214621632697,
|
||
|
|
"learning_rate": 2.19563467739784e-05,
|
||
|
|
"loss": 0.1923,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15724977850914001,
|
||
|
|
"step": 2270,
|
||
|
|
"valid_targets_mean": 4100.6,
|
||
|
|
"valid_targets_min": 1856
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6634460547504024,
|
||
|
|
"grad_norm": 0.437259128811763,
|
||
|
|
"learning_rate": 2.1876409776467165e-05,
|
||
|
|
"loss": 0.2014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19419334828853607,
|
||
|
|
"step": 2275,
|
||
|
|
"valid_targets_mean": 5604.3,
|
||
|
|
"valid_targets_min": 1799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6714975845410627,
|
||
|
|
"grad_norm": 0.509046081849507,
|
||
|
|
"learning_rate": 2.1796442525905923e-05,
|
||
|
|
"loss": 0.19,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1819322556257248,
|
||
|
|
"step": 2280,
|
||
|
|
"valid_targets_mean": 3572.2,
|
||
|
|
"valid_targets_min": 1228
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.679549114331723,
|
||
|
|
"grad_norm": 0.512456182600672,
|
||
|
|
"learning_rate": 2.171644631159366e-05,
|
||
|
|
"loss": 0.2032,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21677514910697937,
|
||
|
|
"step": 2285,
|
||
|
|
"valid_targets_mean": 4920.6,
|
||
|
|
"valid_targets_min": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.687600644122383,
|
||
|
|
"grad_norm": 3.5749072057598865,
|
||
|
|
"learning_rate": 2.163642242329633e-05,
|
||
|
|
"loss": 0.2007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1948278695344925,
|
||
|
|
"step": 2290,
|
||
|
|
"valid_targets_mean": 4014.2,
|
||
|
|
"valid_targets_min": 1316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6956521739130435,
|
||
|
|
"grad_norm": 0.48865940439627575,
|
||
|
|
"learning_rate": 2.1556372151226097e-05,
|
||
|
|
"loss": 0.1904,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21440473198890686,
|
||
|
|
"step": 2295,
|
||
|
|
"valid_targets_mean": 4626.4,
|
||
|
|
"valid_targets_min": 1967
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7037037037037037,
|
||
|
|
"grad_norm": 0.5877525411956264,
|
||
|
|
"learning_rate": 2.1476296786020502e-05,
|
||
|
|
"loss": 0.1881,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.173828125,
|
||
|
|
"step": 2300,
|
||
|
|
"valid_targets_mean": 4059.1,
|
||
|
|
"valid_targets_min": 345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.711755233494364,
|
||
|
|
"grad_norm": 0.532071939990665,
|
||
|
|
"learning_rate": 2.139619761872163e-05,
|
||
|
|
"loss": 0.2034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20449981093406677,
|
||
|
|
"step": 2305,
|
||
|
|
"valid_targets_mean": 3978.1,
|
||
|
|
"valid_targets_min": 987
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7198067632850242,
|
||
|
|
"grad_norm": 0.5500257667017805,
|
||
|
|
"learning_rate": 2.1316075940755363e-05,
|
||
|
|
"loss": 0.1994,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19600236415863037,
|
||
|
|
"step": 2310,
|
||
|
|
"valid_targets_mean": 4358.1,
|
||
|
|
"valid_targets_min": 2076
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7278582930756845,
|
||
|
|
"grad_norm": 0.4910824420542707,
|
||
|
|
"learning_rate": 2.1235933043910488e-05,
|
||
|
|
"loss": 0.2025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1749979555606842,
|
||
|
|
"step": 2315,
|
||
|
|
"valid_targets_mean": 3997.7,
|
||
|
|
"valid_targets_min": 1403
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7359098228663448,
|
||
|
|
"grad_norm": 0.49001315022408615,
|
||
|
|
"learning_rate": 2.1155770220317918e-05,
|
||
|
|
"loss": 0.2012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1976926326751709,
|
||
|
|
"step": 2320,
|
||
|
|
"valid_targets_mean": 4852.8,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7439613526570046,
|
||
|
|
"grad_norm": 0.6702681948241392,
|
||
|
|
"learning_rate": 2.107558876242983e-05,
|
||
|
|
"loss": 0.1964,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19468314945697784,
|
||
|
|
"step": 2325,
|
||
|
|
"valid_targets_mean": 3499.4,
|
||
|
|
"valid_targets_min": 1520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.752012882447665,
|
||
|
|
"grad_norm": 0.4609480240941858,
|
||
|
|
"learning_rate": 2.0995389962998845e-05,
|
||
|
|
"loss": 0.1928,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19903123378753662,
|
||
|
|
"step": 2330,
|
||
|
|
"valid_targets_mean": 4891.3,
|
||
|
|
"valid_targets_min": 1678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.760064412238325,
|
||
|
|
"grad_norm": 0.48913542025927365,
|
||
|
|
"learning_rate": 2.091517511505719e-05,
|
||
|
|
"loss": 0.2002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21223650872707367,
|
||
|
|
"step": 2335,
|
||
|
|
"valid_targets_mean": 4559.2,
|
||
|
|
"valid_targets_min": 2650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7681159420289854,
|
||
|
|
"grad_norm": 0.46883840985224706,
|
||
|
|
"learning_rate": 2.0834945511895816e-05,
|
||
|
|
"loss": 0.1885,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1941649466753006,
|
||
|
|
"step": 2340,
|
||
|
|
"valid_targets_mean": 5603.5,
|
||
|
|
"valid_targets_min": 2498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7761674718196456,
|
||
|
|
"grad_norm": 0.4681238690714185,
|
||
|
|
"learning_rate": 2.0754702447043585e-05,
|
||
|
|
"loss": 0.1954,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1943756639957428,
|
||
|
|
"step": 2345,
|
||
|
|
"valid_targets_mean": 4566.8,
|
||
|
|
"valid_targets_min": 677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.784219001610306,
|
||
|
|
"grad_norm": 0.4482811469832385,
|
||
|
|
"learning_rate": 2.0674447214246394e-05,
|
||
|
|
"loss": 0.2,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1913878321647644,
|
||
|
|
"step": 2350,
|
||
|
|
"valid_targets_mean": 5163.6,
|
||
|
|
"valid_targets_min": 2337
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.792270531400966,
|
||
|
|
"grad_norm": 0.47556405809315727,
|
||
|
|
"learning_rate": 2.059418110744633e-05,
|
||
|
|
"loss": 0.1922,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18677163124084473,
|
||
|
|
"step": 2355,
|
||
|
|
"valid_targets_mean": 4713.3,
|
||
|
|
"valid_targets_min": 933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8003220611916264,
|
||
|
|
"grad_norm": 0.4690759790686193,
|
||
|
|
"learning_rate": 2.0513905420760798e-05,
|
||
|
|
"loss": 0.1988,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19385367631912231,
|
||
|
|
"step": 2360,
|
||
|
|
"valid_targets_mean": 4902.2,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8083735909822867,
|
||
|
|
"grad_norm": 0.5121675478037654,
|
||
|
|
"learning_rate": 2.043362144846164e-05,
|
||
|
|
"loss": 0.1957,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19691093266010284,
|
||
|
|
"step": 2365,
|
||
|
|
"valid_targets_mean": 3866.6,
|
||
|
|
"valid_targets_min": 689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.816425120772947,
|
||
|
|
"grad_norm": 0.52059333383564,
|
||
|
|
"learning_rate": 2.035333048495431e-05,
|
||
|
|
"loss": 0.1864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16973920166492462,
|
||
|
|
"step": 2370,
|
||
|
|
"valid_targets_mean": 3510.0,
|
||
|
|
"valid_targets_min": 909
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.824476650563607,
|
||
|
|
"grad_norm": 0.5076146647866968,
|
||
|
|
"learning_rate": 2.0273033824756964e-05,
|
||
|
|
"loss": 0.2007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17860174179077148,
|
||
|
|
"step": 2375,
|
||
|
|
"valid_targets_mean": 3703.1,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8325281803542675,
|
||
|
|
"grad_norm": 0.6011850519404554,
|
||
|
|
"learning_rate": 2.0192732762479616e-05,
|
||
|
|
"loss": 0.1908,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20434610545635223,
|
||
|
|
"step": 2380,
|
||
|
|
"valid_targets_mean": 4579.6,
|
||
|
|
"valid_targets_min": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8405797101449277,
|
||
|
|
"grad_norm": 0.5345131849791486,
|
||
|
|
"learning_rate": 2.011242859280325e-05,
|
||
|
|
"loss": 0.197,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21205343306064606,
|
||
|
|
"step": 2385,
|
||
|
|
"valid_targets_mean": 4124.2,
|
||
|
|
"valid_targets_min": 823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.848631239935588,
|
||
|
|
"grad_norm": 0.5114321926687082,
|
||
|
|
"learning_rate": 2.0032122610458947e-05,
|
||
|
|
"loss": 0.1921,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17156703770160675,
|
||
|
|
"step": 2390,
|
||
|
|
"valid_targets_mean": 4274.9,
|
||
|
|
"valid_targets_min": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8566827697262482,
|
||
|
|
"grad_norm": 0.6336174608958026,
|
||
|
|
"learning_rate": 1.9951816110207004e-05,
|
||
|
|
"loss": 0.1885,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1964995563030243,
|
||
|
|
"step": 2395,
|
||
|
|
"valid_targets_mean": 4251.1,
|
||
|
|
"valid_targets_min": 1610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.864734299516908,
|
||
|
|
"grad_norm": 0.49492043165747124,
|
||
|
|
"learning_rate": 1.9871510386816103e-05,
|
||
|
|
"loss": 0.1869,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17603623867034912,
|
||
|
|
"step": 2400,
|
||
|
|
"valid_targets_mean": 4051.9,
|
||
|
|
"valid_targets_min": 1925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8727858293075683,
|
||
|
|
"grad_norm": 0.5681035881235681,
|
||
|
|
"learning_rate": 1.979120673504235e-05,
|
||
|
|
"loss": 0.2058,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20705150067806244,
|
||
|
|
"step": 2405,
|
||
|
|
"valid_targets_mean": 5603.1,
|
||
|
|
"valid_targets_min": 3701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8808373590982286,
|
||
|
|
"grad_norm": 1.2929630707396456,
|
||
|
|
"learning_rate": 1.9710906449608498e-05,
|
||
|
|
"loss": 0.1998,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19122429192066193,
|
||
|
|
"step": 2410,
|
||
|
|
"valid_targets_mean": 4886.8,
|
||
|
|
"valid_targets_min": 1569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.888888888888889,
|
||
|
|
"grad_norm": 0.47492876907794285,
|
||
|
|
"learning_rate": 1.9630610825182992e-05,
|
||
|
|
"loss": 0.1871,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18603213131427765,
|
||
|
|
"step": 2415,
|
||
|
|
"valid_targets_mean": 4944.1,
|
||
|
|
"valid_targets_min": 1929
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.896940418679549,
|
||
|
|
"grad_norm": 0.5030538323026981,
|
||
|
|
"learning_rate": 1.955032115635915e-05,
|
||
|
|
"loss": 0.2138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22285720705986023,
|
||
|
|
"step": 2420,
|
||
|
|
"valid_targets_mean": 4422.1,
|
||
|
|
"valid_targets_min": 1352
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9049919484702094,
|
||
|
|
"grad_norm": 0.5031818050359901,
|
||
|
|
"learning_rate": 1.9470038737634257e-05,
|
||
|
|
"loss": 0.1991,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19892553985118866,
|
||
|
|
"step": 2425,
|
||
|
|
"valid_targets_mean": 4099.8,
|
||
|
|
"valid_targets_min": 1262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9130434782608696,
|
||
|
|
"grad_norm": 0.5757670497420159,
|
||
|
|
"learning_rate": 1.9389764863388706e-05,
|
||
|
|
"loss": 0.1797,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19620567560195923,
|
||
|
|
"step": 2430,
|
||
|
|
"valid_targets_mean": 3868.5,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.92109500805153,
|
||
|
|
"grad_norm": 0.48359405966706115,
|
||
|
|
"learning_rate": 1.9309500827865136e-05,
|
||
|
|
"loss": 0.191,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1803748607635498,
|
||
|
|
"step": 2435,
|
||
|
|
"valid_targets_mean": 4338.0,
|
||
|
|
"valid_targets_min": 1348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.92914653784219,
|
||
|
|
"grad_norm": 0.4640923476833218,
|
||
|
|
"learning_rate": 1.9229247925147553e-05,
|
||
|
|
"loss": 0.1968,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1907849907875061,
|
||
|
|
"step": 2440,
|
||
|
|
"valid_targets_mean": 4583.9,
|
||
|
|
"valid_targets_min": 1771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9371980676328504,
|
||
|
|
"grad_norm": 0.5117773057032822,
|
||
|
|
"learning_rate": 1.9149007449140462e-05,
|
||
|
|
"loss": 0.1877,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17059685289859772,
|
||
|
|
"step": 2445,
|
||
|
|
"valid_targets_mean": 4338.1,
|
||
|
|
"valid_targets_min": 1784
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9452495974235102,
|
||
|
|
"grad_norm": 0.5915049699700269,
|
||
|
|
"learning_rate": 1.906878069354804e-05,
|
||
|
|
"loss": 0.1955,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21201738715171814,
|
||
|
|
"step": 2450,
|
||
|
|
"valid_targets_mean": 3018.6,
|
||
|
|
"valid_targets_min": 1027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9533011272141705,
|
||
|
|
"grad_norm": 0.47160593652811594,
|
||
|
|
"learning_rate": 1.898856895185322e-05,
|
||
|
|
"loss": 0.1862,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16955235600471497,
|
||
|
|
"step": 2455,
|
||
|
|
"valid_targets_mean": 4181.1,
|
||
|
|
"valid_targets_min": 1075
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9613526570048307,
|
||
|
|
"grad_norm": 0.49702142957897827,
|
||
|
|
"learning_rate": 1.8908373517296888e-05,
|
||
|
|
"loss": 0.1943,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19719739258289337,
|
||
|
|
"step": 2460,
|
||
|
|
"valid_targets_mean": 4295.4,
|
||
|
|
"valid_targets_min": 249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.969404186795491,
|
||
|
|
"grad_norm": 0.5129921266484054,
|
||
|
|
"learning_rate": 1.882819568285701e-05,
|
||
|
|
"loss": 0.191,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19295062124729156,
|
||
|
|
"step": 2465,
|
||
|
|
"valid_targets_mean": 3921.4,
|
||
|
|
"valid_targets_min": 966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9774557165861513,
|
||
|
|
"grad_norm": 0.45382648008126336,
|
||
|
|
"learning_rate": 1.874803674122778e-05,
|
||
|
|
"loss": 0.1929,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1945837438106537,
|
||
|
|
"step": 2470,
|
||
|
|
"valid_targets_mean": 5054.1,
|
||
|
|
"valid_targets_min": 1828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9855072463768115,
|
||
|
|
"grad_norm": 0.47820396478837407,
|
||
|
|
"learning_rate": 1.8667897984798804e-05,
|
||
|
|
"loss": 0.2066,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21562841534614563,
|
||
|
|
"step": 2475,
|
||
|
|
"valid_targets_mean": 4692.4,
|
||
|
|
"valid_targets_min": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.993558776167472,
|
||
|
|
"grad_norm": 0.539195337968872,
|
||
|
|
"learning_rate": 1.858778070563422e-05,
|
||
|
|
"loss": 0.1889,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1887316107749939,
|
||
|
|
"step": 2480,
|
||
|
|
"valid_targets_mean": 4306.7,
|
||
|
|
"valid_targets_min": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.001610305958132,
|
||
|
|
"grad_norm": 0.5374780928948082,
|
||
|
|
"learning_rate": 1.8507686195451918e-05,
|
||
|
|
"loss": 0.194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25074928998947144,
|
||
|
|
"step": 2485,
|
||
|
|
"valid_targets_mean": 4598.8,
|
||
|
|
"valid_targets_min": 374
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.009661835748792,
|
||
|
|
"grad_norm": 0.4944011317477905,
|
||
|
|
"learning_rate": 1.8427615745602667e-05,
|
||
|
|
"loss": 0.1624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15997666120529175,
|
||
|
|
"step": 2490,
|
||
|
|
"valid_targets_mean": 4936.9,
|
||
|
|
"valid_targets_min": 1591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.017713365539453,
|
||
|
|
"grad_norm": 0.4765635224384645,
|
||
|
|
"learning_rate": 1.834757064704933e-05,
|
||
|
|
"loss": 0.1769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1592136025428772,
|
||
|
|
"step": 2495,
|
||
|
|
"valid_targets_mean": 4652.1,
|
||
|
|
"valid_targets_min": 1149
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.025764895330113,
|
||
|
|
"grad_norm": 0.4956167235108182,
|
||
|
|
"learning_rate": 1.826755219034603e-05,
|
||
|
|
"loss": 0.1939,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19604167342185974,
|
||
|
|
"step": 2500,
|
||
|
|
"valid_targets_mean": 4386.4,
|
||
|
|
"valid_targets_min": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.033816425120773,
|
||
|
|
"grad_norm": 0.45476926402320006,
|
||
|
|
"learning_rate": 1.818756166561733e-05,
|
||
|
|
"loss": 0.1865,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1637008637189865,
|
||
|
|
"step": 2505,
|
||
|
|
"valid_targets_mean": 4699.8,
|
||
|
|
"valid_targets_min": 1197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.041867954911433,
|
||
|
|
"grad_norm": 0.43733602489225165,
|
||
|
|
"learning_rate": 1.8107600362537473e-05,
|
||
|
|
"loss": 0.1778,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16251038014888763,
|
||
|
|
"step": 2510,
|
||
|
|
"valid_targets_mean": 4847.2,
|
||
|
|
"valid_targets_min": 1813
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.049919484702094,
|
||
|
|
"grad_norm": 0.49193894405362015,
|
||
|
|
"learning_rate": 1.8027669570309572e-05,
|
||
|
|
"loss": 0.1885,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16793620586395264,
|
||
|
|
"step": 2515,
|
||
|
|
"valid_targets_mean": 4591.9,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.057971014492754,
|
||
|
|
"grad_norm": 0.5075017593056104,
|
||
|
|
"learning_rate": 1.7947770577644787e-05,
|
||
|
|
"loss": 0.1803,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1729605346918106,
|
||
|
|
"step": 2520,
|
||
|
|
"valid_targets_mean": 4395.8,
|
||
|
|
"valid_targets_min": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.066022544283414,
|
||
|
|
"grad_norm": 0.499304090315969,
|
||
|
|
"learning_rate": 1.786790467274161e-05,
|
||
|
|
"loss": 0.1664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17558912932872772,
|
||
|
|
"step": 2525,
|
||
|
|
"valid_targets_mean": 4637.6,
|
||
|
|
"valid_targets_min": 2011
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.074074074074074,
|
||
|
|
"grad_norm": 0.5559504833423238,
|
||
|
|
"learning_rate": 1.778807314326505e-05,
|
||
|
|
"loss": 0.1784,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18689918518066406,
|
||
|
|
"step": 2530,
|
||
|
|
"valid_targets_mean": 3930.3,
|
||
|
|
"valid_targets_min": 548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.082125603864735,
|
||
|
|
"grad_norm": 0.47641769475056645,
|
||
|
|
"learning_rate": 1.7708277276325886e-05,
|
||
|
|
"loss": 0.1714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1637909710407257,
|
||
|
|
"step": 2535,
|
||
|
|
"valid_targets_mean": 5154.6,
|
||
|
|
"valid_targets_min": 1826
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.090177133655395,
|
||
|
|
"grad_norm": 0.5182362997358064,
|
||
|
|
"learning_rate": 1.762851835845992e-05,
|
||
|
|
"loss": 0.1803,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18916486203670502,
|
||
|
|
"step": 2540,
|
||
|
|
"valid_targets_mean": 4544.1,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.098228663446055,
|
||
|
|
"grad_norm": 0.4862343355871757,
|
||
|
|
"learning_rate": 1.754879767560723e-05,
|
||
|
|
"loss": 0.1981,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18593139946460724,
|
||
|
|
"step": 2545,
|
||
|
|
"valid_targets_mean": 4532.4,
|
||
|
|
"valid_targets_min": 1521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.106280193236715,
|
||
|
|
"grad_norm": 0.527960071470203,
|
||
|
|
"learning_rate": 1.746911651309144e-05,
|
||
|
|
"loss": 0.1788,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19690777361392975,
|
||
|
|
"step": 2550,
|
||
|
|
"valid_targets_mean": 3843.6,
|
||
|
|
"valid_targets_min": 881
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.114331723027375,
|
||
|
|
"grad_norm": 0.5103425212083436,
|
||
|
|
"learning_rate": 1.7389476155598974e-05,
|
||
|
|
"loss": 0.187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21908551454544067,
|
||
|
|
"step": 2555,
|
||
|
|
"valid_targets_mean": 5082.4,
|
||
|
|
"valid_targets_min": 784
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.122383252818035,
|
||
|
|
"grad_norm": 0.7756087805548777,
|
||
|
|
"learning_rate": 1.7309877887158388e-05,
|
||
|
|
"loss": 0.1832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1794763058423996,
|
||
|
|
"step": 2560,
|
||
|
|
"valid_targets_mean": 2992.2,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.130434782608695,
|
||
|
|
"grad_norm": 0.4567684656881922,
|
||
|
|
"learning_rate": 1.723032299111964e-05,
|
||
|
|
"loss": 0.1688,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15088678896427155,
|
||
|
|
"step": 2565,
|
||
|
|
"valid_targets_mean": 4922.8,
|
||
|
|
"valid_targets_min": 1327
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.138486312399356,
|
||
|
|
"grad_norm": 0.5425918961875108,
|
||
|
|
"learning_rate": 1.7150812750133382e-05,
|
||
|
|
"loss": 0.1775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1673865020275116,
|
||
|
|
"step": 2570,
|
||
|
|
"valid_targets_mean": 3965.9,
|
||
|
|
"valid_targets_min": 1407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.146537842190016,
|
||
|
|
"grad_norm": 0.6838407873867918,
|
||
|
|
"learning_rate": 1.707134844613032e-05,
|
||
|
|
"loss": 0.1818,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15312550961971283,
|
||
|
|
"step": 2575,
|
||
|
|
"valid_targets_mean": 4199.9,
|
||
|
|
"valid_targets_min": 1891
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.154589371980676,
|
||
|
|
"grad_norm": 0.5385560587074719,
|
||
|
|
"learning_rate": 1.699193136030052e-05,
|
||
|
|
"loss": 0.1984,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17279349267482758,
|
||
|
|
"step": 2580,
|
||
|
|
"valid_targets_mean": 3938.8,
|
||
|
|
"valid_targets_min": 880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.162640901771336,
|
||
|
|
"grad_norm": 0.5502968325727028,
|
||
|
|
"learning_rate": 1.6912562773072765e-05,
|
||
|
|
"loss": 0.1787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18601390719413757,
|
||
|
|
"step": 2585,
|
||
|
|
"valid_targets_mean": 4653.2,
|
||
|
|
"valid_targets_min": 2040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.170692431561997,
|
||
|
|
"grad_norm": 0.5862578517681709,
|
||
|
|
"learning_rate": 1.6833243964093877e-05,
|
||
|
|
"loss": 0.1952,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2094813883304596,
|
||
|
|
"step": 2590,
|
||
|
|
"valid_targets_mean": 4215.5,
|
||
|
|
"valid_targets_min": 1508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.178743961352657,
|
||
|
|
"grad_norm": 0.525233464465349,
|
||
|
|
"learning_rate": 1.6753976212208137e-05,
|
||
|
|
"loss": 0.1795,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17922204732894897,
|
||
|
|
"step": 2595,
|
||
|
|
"valid_targets_mean": 3851.4,
|
||
|
|
"valid_targets_min": 667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.186795491143317,
|
||
|
|
"grad_norm": 0.6610861468502892,
|
||
|
|
"learning_rate": 1.667476079543664e-05,
|
||
|
|
"loss": 0.1907,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17614664137363434,
|
||
|
|
"step": 2600,
|
||
|
|
"valid_targets_mean": 3052.1,
|
||
|
|
"valid_targets_min": 691
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.194847020933977,
|
||
|
|
"grad_norm": 0.5630100784699484,
|
||
|
|
"learning_rate": 1.659559899095667e-05,
|
||
|
|
"loss": 0.1895,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20966169238090515,
|
||
|
|
"step": 2605,
|
||
|
|
"valid_targets_mean": 5309.3,
|
||
|
|
"valid_targets_min": 1187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.202898550724638,
|
||
|
|
"grad_norm": 0.6807488645292253,
|
||
|
|
"learning_rate": 1.651649207508114e-05,
|
||
|
|
"loss": 0.1771,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19908779859542847,
|
||
|
|
"step": 2610,
|
||
|
|
"valid_targets_mean": 4216.5,
|
||
|
|
"valid_targets_min": 1027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.210950080515298,
|
||
|
|
"grad_norm": 0.49692416499926767,
|
||
|
|
"learning_rate": 1.643744132323801e-05,
|
||
|
|
"loss": 0.1844,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19740644097328186,
|
||
|
|
"step": 2615,
|
||
|
|
"valid_targets_mean": 4830.8,
|
||
|
|
"valid_targets_min": 1913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.219001610305958,
|
||
|
|
"grad_norm": 0.6036326467813666,
|
||
|
|
"learning_rate": 1.6358448009949714e-05,
|
||
|
|
"loss": 0.1855,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22196456789970398,
|
||
|
|
"step": 2620,
|
||
|
|
"valid_targets_mean": 5333.3,
|
||
|
|
"valid_targets_min": 2355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2270531400966185,
|
||
|
|
"grad_norm": 0.4829391874239536,
|
||
|
|
"learning_rate": 1.6279513408812603e-05,
|
||
|
|
"loss": 0.17,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1743869185447693,
|
||
|
|
"step": 2625,
|
||
|
|
"valid_targets_mean": 5413.8,
|
||
|
|
"valid_targets_min": 1251
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.235104669887279,
|
||
|
|
"grad_norm": 0.5048080599206834,
|
||
|
|
"learning_rate": 1.620063879247643e-05,
|
||
|
|
"loss": 0.1731,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16595719754695892,
|
||
|
|
"step": 2630,
|
||
|
|
"valid_targets_mean": 3924.2,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.243156199677939,
|
||
|
|
"grad_norm": 0.46752510731782826,
|
||
|
|
"learning_rate": 1.6121825432623827e-05,
|
||
|
|
"loss": 0.1814,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1762651652097702,
|
||
|
|
"step": 2635,
|
||
|
|
"valid_targets_mean": 5055.4,
|
||
|
|
"valid_targets_min": 1403
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.251207729468599,
|
||
|
|
"grad_norm": 0.4797644865040805,
|
||
|
|
"learning_rate": 1.6043074599949785e-05,
|
||
|
|
"loss": 0.1808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17031241953372955,
|
||
|
|
"step": 2640,
|
||
|
|
"valid_targets_mean": 4893.6,
|
||
|
|
"valid_targets_min": 966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2592592592592595,
|
||
|
|
"grad_norm": 1.0165849437996184,
|
||
|
|
"learning_rate": 1.5964387564141192e-05,
|
||
|
|
"loss": 0.1712,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19583508372306824,
|
||
|
|
"step": 2645,
|
||
|
|
"valid_targets_mean": 4988.9,
|
||
|
|
"valid_targets_min": 1611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.26731078904992,
|
||
|
|
"grad_norm": 0.6039054133342181,
|
||
|
|
"learning_rate": 1.588576559385635e-05,
|
||
|
|
"loss": 0.1827,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17843933403491974,
|
||
|
|
"step": 2650,
|
||
|
|
"valid_targets_mean": 4234.7,
|
||
|
|
"valid_targets_min": 1211
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.27536231884058,
|
||
|
|
"grad_norm": 0.512856167089867,
|
||
|
|
"learning_rate": 1.5807209956704505e-05,
|
||
|
|
"loss": 0.1945,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1996535062789917,
|
||
|
|
"step": 2655,
|
||
|
|
"valid_targets_mean": 4500.6,
|
||
|
|
"valid_targets_min": 1506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.28341384863124,
|
||
|
|
"grad_norm": 0.4991515466828228,
|
||
|
|
"learning_rate": 1.5728721919225428e-05,
|
||
|
|
"loss": 0.1629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14569713175296783,
|
||
|
|
"step": 2660,
|
||
|
|
"valid_targets_mean": 4720.1,
|
||
|
|
"valid_targets_min": 1211
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.291465378421901,
|
||
|
|
"grad_norm": 0.9601831661650259,
|
||
|
|
"learning_rate": 1.5650302746869004e-05,
|
||
|
|
"loss": 0.1857,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1880815625190735,
|
||
|
|
"step": 2665,
|
||
|
|
"valid_targets_mean": 4171.6,
|
||
|
|
"valid_targets_min": 833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.29951690821256,
|
||
|
|
"grad_norm": 0.4412736536027149,
|
||
|
|
"learning_rate": 1.5571953703974813e-05,
|
||
|
|
"loss": 0.1752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16794951260089874,
|
||
|
|
"step": 2670,
|
||
|
|
"valid_targets_mean": 5724.5,
|
||
|
|
"valid_targets_min": 2565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.30756843800322,
|
||
|
|
"grad_norm": 0.49660094639156666,
|
||
|
|
"learning_rate": 1.5493676053751747e-05,
|
||
|
|
"loss": 0.1701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17535799741744995,
|
||
|
|
"step": 2675,
|
||
|
|
"valid_targets_mean": 4800.3,
|
||
|
|
"valid_targets_min": 1829
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.3156199677938805,
|
||
|
|
"grad_norm": 0.5456531223844872,
|
||
|
|
"learning_rate": 1.5415471058257638e-05,
|
||
|
|
"loss": 0.1783,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17335663735866547,
|
||
|
|
"step": 2680,
|
||
|
|
"valid_targets_mean": 4570.4,
|
||
|
|
"valid_targets_min": 1658
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.323671497584541,
|
||
|
|
"grad_norm": 0.45678206025420953,
|
||
|
|
"learning_rate": 1.533733997837893e-05,
|
||
|
|
"loss": 0.187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.169780433177948,
|
||
|
|
"step": 2685,
|
||
|
|
"valid_targets_mean": 4216.1,
|
||
|
|
"valid_targets_min": 1449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.331723027375201,
|
||
|
|
"grad_norm": 0.5415606665772222,
|
||
|
|
"learning_rate": 1.5259284073810333e-05,
|
||
|
|
"loss": 0.1787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16021227836608887,
|
||
|
|
"step": 2690,
|
||
|
|
"valid_targets_mean": 3275.1,
|
||
|
|
"valid_targets_min": 984
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.339774557165861,
|
||
|
|
"grad_norm": 0.5136691048212793,
|
||
|
|
"learning_rate": 1.5181304603034513e-05,
|
||
|
|
"loss": 0.1914,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1827298104763031,
|
||
|
|
"step": 2695,
|
||
|
|
"valid_targets_mean": 4889.6,
|
||
|
|
"valid_targets_min": 1238
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.3478260869565215,
|
||
|
|
"grad_norm": 1.4914173173538663,
|
||
|
|
"learning_rate": 1.5103402823301819e-05,
|
||
|
|
"loss": 0.1838,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18776141107082367,
|
||
|
|
"step": 2700,
|
||
|
|
"valid_targets_mean": 4532.6,
|
||
|
|
"valid_targets_min": 850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.355877616747182,
|
||
|
|
"grad_norm": 0.5083803805655037,
|
||
|
|
"learning_rate": 1.5025579990609973e-05,
|
||
|
|
"loss": 0.1821,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16755394637584686,
|
||
|
|
"step": 2705,
|
||
|
|
"valid_targets_mean": 4128.1,
|
||
|
|
"valid_targets_min": 379
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.363929146537842,
|
||
|
|
"grad_norm": 0.500974762279468,
|
||
|
|
"learning_rate": 1.4947837359683882e-05,
|
||
|
|
"loss": 0.1829,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17978541553020477,
|
||
|
|
"step": 2710,
|
||
|
|
"valid_targets_mean": 5342.4,
|
||
|
|
"valid_targets_min": 2152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.371980676328502,
|
||
|
|
"grad_norm": 0.5603545589277986,
|
||
|
|
"learning_rate": 1.487017618395534e-05,
|
||
|
|
"loss": 0.1781,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.198044091463089,
|
||
|
|
"step": 2715,
|
||
|
|
"valid_targets_mean": 4740.4,
|
||
|
|
"valid_targets_min": 1746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.3800322061191626,
|
||
|
|
"grad_norm": 0.4771112752411543,
|
||
|
|
"learning_rate": 1.479259771554288e-05,
|
||
|
|
"loss": 0.1931,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1699572205543518,
|
||
|
|
"step": 2720,
|
||
|
|
"valid_targets_mean": 4872.8,
|
||
|
|
"valid_targets_min": 448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.388083735909823,
|
||
|
|
"grad_norm": 0.521661536652827,
|
||
|
|
"learning_rate": 1.4715103205231545e-05,
|
||
|
|
"loss": 0.1898,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18613924086093903,
|
||
|
|
"step": 2725,
|
||
|
|
"valid_targets_mean": 4061.7,
|
||
|
|
"valid_targets_min": 995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.396135265700483,
|
||
|
|
"grad_norm": 0.537250383283014,
|
||
|
|
"learning_rate": 1.463769390245273e-05,
|
||
|
|
"loss": 0.1794,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.178946852684021,
|
||
|
|
"step": 2730,
|
||
|
|
"valid_targets_mean": 4061.8,
|
||
|
|
"valid_targets_min": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.404186795491143,
|
||
|
|
"grad_norm": 0.5244437127559632,
|
||
|
|
"learning_rate": 1.4560371055264052e-05,
|
||
|
|
"loss": 0.1787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18043842911720276,
|
||
|
|
"step": 2735,
|
||
|
|
"valid_targets_mean": 4802.7,
|
||
|
|
"valid_targets_min": 1843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.412238325281804,
|
||
|
|
"grad_norm": 0.5146870682156327,
|
||
|
|
"learning_rate": 1.448313591032922e-05,
|
||
|
|
"loss": 0.1887,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17479149997234344,
|
||
|
|
"step": 2740,
|
||
|
|
"valid_targets_mean": 5214.2,
|
||
|
|
"valid_targets_min": 1866
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.420289855072464,
|
||
|
|
"grad_norm": 0.4691547439815975,
|
||
|
|
"learning_rate": 1.4405989712897923e-05,
|
||
|
|
"loss": 0.1853,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18224862217903137,
|
||
|
|
"step": 2745,
|
||
|
|
"valid_targets_mean": 5050.6,
|
||
|
|
"valid_targets_min": 1427
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.428341384863124,
|
||
|
|
"grad_norm": 0.5424155500541844,
|
||
|
|
"learning_rate": 1.4328933706785782e-05,
|
||
|
|
"loss": 0.1695,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1883089393377304,
|
||
|
|
"step": 2750,
|
||
|
|
"valid_targets_mean": 4057.4,
|
||
|
|
"valid_targets_min": 854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.436392914653784,
|
||
|
|
"grad_norm": 0.5100283152717094,
|
||
|
|
"learning_rate": 1.4251969134354247e-05,
|
||
|
|
"loss": 0.1703,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16344183683395386,
|
||
|
|
"step": 2755,
|
||
|
|
"valid_targets_mean": 4199.4,
|
||
|
|
"valid_targets_min": 1557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.444444444444445,
|
||
|
|
"grad_norm": 0.5071596893081406,
|
||
|
|
"learning_rate": 1.4175097236490627e-05,
|
||
|
|
"loss": 0.1772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1783141791820526,
|
||
|
|
"step": 2760,
|
||
|
|
"valid_targets_mean": 4905.3,
|
||
|
|
"valid_targets_min": 383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.452495974235105,
|
||
|
|
"grad_norm": 0.4808405938396642,
|
||
|
|
"learning_rate": 1.409831925258805e-05,
|
||
|
|
"loss": 0.1864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19857333600521088,
|
||
|
|
"step": 2765,
|
||
|
|
"valid_targets_mean": 4961.1,
|
||
|
|
"valid_targets_min": 847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.460547504025765,
|
||
|
|
"grad_norm": 0.48057741594526054,
|
||
|
|
"learning_rate": 1.4021636420525466e-05,
|
||
|
|
"loss": 0.1824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1644965559244156,
|
||
|
|
"step": 2770,
|
||
|
|
"valid_targets_mean": 4509.1,
|
||
|
|
"valid_targets_min": 1227
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.468599033816425,
|
||
|
|
"grad_norm": 0.5128321881887902,
|
||
|
|
"learning_rate": 1.3945049976647726e-05,
|
||
|
|
"loss": 0.1766,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1749866008758545,
|
||
|
|
"step": 2775,
|
||
|
|
"valid_targets_mean": 4825.0,
|
||
|
|
"valid_targets_min": 1201
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.476650563607086,
|
||
|
|
"grad_norm": 0.6327024505387485,
|
||
|
|
"learning_rate": 1.3868561155745628e-05,
|
||
|
|
"loss": 0.1917,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19712504744529724,
|
||
|
|
"step": 2780,
|
||
|
|
"valid_targets_mean": 4430.3,
|
||
|
|
"valid_targets_min": 703
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.484702093397746,
|
||
|
|
"grad_norm": 0.4913167577071679,
|
||
|
|
"learning_rate": 1.3792171191036001e-05,
|
||
|
|
"loss": 0.1769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15714874863624573,
|
||
|
|
"step": 2785,
|
||
|
|
"valid_targets_mean": 4104.7,
|
||
|
|
"valid_targets_min": 1663
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.492753623188406,
|
||
|
|
"grad_norm": 0.5394186960786538,
|
||
|
|
"learning_rate": 1.3715881314141835e-05,
|
||
|
|
"loss": 0.1902,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19951429963111877,
|
||
|
|
"step": 2790,
|
||
|
|
"valid_targets_mean": 4620.1,
|
||
|
|
"valid_targets_min": 1843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.500805152979066,
|
||
|
|
"grad_norm": 0.5953737763754138,
|
||
|
|
"learning_rate": 1.3639692755072429e-05,
|
||
|
|
"loss": 0.1882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2141798585653305,
|
||
|
|
"step": 2795,
|
||
|
|
"valid_targets_mean": 4660.8,
|
||
|
|
"valid_targets_min": 1401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.508856682769727,
|
||
|
|
"grad_norm": 0.5453366269316137,
|
||
|
|
"learning_rate": 1.3563606742203548e-05,
|
||
|
|
"loss": 0.1969,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20897048711776733,
|
||
|
|
"step": 2800,
|
||
|
|
"valid_targets_mean": 4447.4,
|
||
|
|
"valid_targets_min": 1113
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.516908212560386,
|
||
|
|
"grad_norm": 0.531617180983723,
|
||
|
|
"learning_rate": 1.3487624502257598e-05,
|
||
|
|
"loss": 0.18,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20468959212303162,
|
||
|
|
"step": 2805,
|
||
|
|
"valid_targets_mean": 4891.9,
|
||
|
|
"valid_targets_min": 2624
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.524959742351046,
|
||
|
|
"grad_norm": 1.0175978905323302,
|
||
|
|
"learning_rate": 1.3411747260283905e-05,
|
||
|
|
"loss": 0.1882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1867702454328537,
|
||
|
|
"step": 2810,
|
||
|
|
"valid_targets_mean": 4380.1,
|
||
|
|
"valid_targets_min": 800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.533011272141707,
|
||
|
|
"grad_norm": 0.5172120222306495,
|
||
|
|
"learning_rate": 1.333597623963892e-05,
|
||
|
|
"loss": 0.178,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17019790410995483,
|
||
|
|
"step": 2815,
|
||
|
|
"valid_targets_mean": 4795.6,
|
||
|
|
"valid_targets_min": 626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.541062801932367,
|
||
|
|
"grad_norm": 0.6059009807629652,
|
||
|
|
"learning_rate": 1.3260312661966487e-05,
|
||
|
|
"loss": 0.18,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17550814151763916,
|
||
|
|
"step": 2820,
|
||
|
|
"valid_targets_mean": 5061.1,
|
||
|
|
"valid_targets_min": 1263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.549114331723027,
|
||
|
|
"grad_norm": 0.613180864431363,
|
||
|
|
"learning_rate": 1.3184757747178187e-05,
|
||
|
|
"loss": 0.1758,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19658298790454865,
|
||
|
|
"step": 2825,
|
||
|
|
"valid_targets_mean": 5041.3,
|
||
|
|
"valid_targets_min": 1795
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.557165861513687,
|
||
|
|
"grad_norm": 0.5633900947396653,
|
||
|
|
"learning_rate": 1.3109312713433642e-05,
|
||
|
|
"loss": 0.1734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18244323134422302,
|
||
|
|
"step": 2830,
|
||
|
|
"valid_targets_mean": 4282.2,
|
||
|
|
"valid_targets_min": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.565217391304348,
|
||
|
|
"grad_norm": 0.9588780998775635,
|
||
|
|
"learning_rate": 1.3033978777120861e-05,
|
||
|
|
"loss": 0.1718,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1690833568572998,
|
||
|
|
"step": 2835,
|
||
|
|
"valid_targets_mean": 3991.0,
|
||
|
|
"valid_targets_min": 1088
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.573268921095008,
|
||
|
|
"grad_norm": 0.5205077973662069,
|
||
|
|
"learning_rate": 1.2958757152836671e-05,
|
||
|
|
"loss": 0.1842,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22376713156700134,
|
||
|
|
"step": 2840,
|
||
|
|
"valid_targets_mean": 4458.4,
|
||
|
|
"valid_targets_min": 2143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.581320450885668,
|
||
|
|
"grad_norm": 0.5253874104264332,
|
||
|
|
"learning_rate": 1.2883649053367106e-05,
|
||
|
|
"loss": 0.1808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1675848662853241,
|
||
|
|
"step": 2845,
|
||
|
|
"valid_targets_mean": 4003.0,
|
||
|
|
"valid_targets_min": 845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5893719806763285,
|
||
|
|
"grad_norm": 0.5452379881212879,
|
||
|
|
"learning_rate": 1.2808655689667846e-05,
|
||
|
|
"loss": 0.1818,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1796257197856903,
|
||
|
|
"step": 2850,
|
||
|
|
"valid_targets_mean": 4152.3,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.597423510466989,
|
||
|
|
"grad_norm": 0.5239905621350787,
|
||
|
|
"learning_rate": 1.2733778270844712e-05,
|
||
|
|
"loss": 0.1889,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18232783675193787,
|
||
|
|
"step": 2855,
|
||
|
|
"valid_targets_mean": 4419.6,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.605475040257649,
|
||
|
|
"grad_norm": 0.5607032815508621,
|
||
|
|
"learning_rate": 1.265901800413416e-05,
|
||
|
|
"loss": 0.1853,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17835384607315063,
|
||
|
|
"step": 2860,
|
||
|
|
"valid_targets_mean": 3395.8,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.613526570048309,
|
||
|
|
"grad_norm": 0.9496967272226038,
|
||
|
|
"learning_rate": 1.2584376094883832e-05,
|
||
|
|
"loss": 0.1845,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17716357111930847,
|
||
|
|
"step": 2865,
|
||
|
|
"valid_targets_mean": 4483.6,
|
||
|
|
"valid_targets_min": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6215780998389695,
|
||
|
|
"grad_norm": 0.7842117859067653,
|
||
|
|
"learning_rate": 1.250985374653311e-05,
|
||
|
|
"loss": 0.1784,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17786236107349396,
|
||
|
|
"step": 2870,
|
||
|
|
"valid_targets_mean": 4693.0,
|
||
|
|
"valid_targets_min": 1134
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.62962962962963,
|
||
|
|
"grad_norm": 0.5033684151280889,
|
||
|
|
"learning_rate": 1.2435452160593698e-05,
|
||
|
|
"loss": 0.1874,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17148058116436005,
|
||
|
|
"step": 2875,
|
||
|
|
"valid_targets_mean": 4303.3,
|
||
|
|
"valid_targets_min": 1429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.63768115942029,
|
||
|
|
"grad_norm": 0.5032006756891246,
|
||
|
|
"learning_rate": 1.2361172536630288e-05,
|
||
|
|
"loss": 0.182,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16900071501731873,
|
||
|
|
"step": 2880,
|
||
|
|
"valid_targets_mean": 4471.4,
|
||
|
|
"valid_targets_min": 1507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.64573268921095,
|
||
|
|
"grad_norm": 0.5449401674057853,
|
||
|
|
"learning_rate": 1.2287016072241195e-05,
|
||
|
|
"loss": 0.2019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19873306155204773,
|
||
|
|
"step": 2885,
|
||
|
|
"valid_targets_mean": 4050.9,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6537842190016105,
|
||
|
|
"grad_norm": 0.5002283792241559,
|
||
|
|
"learning_rate": 1.221298396303904e-05,
|
||
|
|
"loss": 0.1746,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17124618589878082,
|
||
|
|
"step": 2890,
|
||
|
|
"valid_targets_mean": 5187.2,
|
||
|
|
"valid_targets_min": 1399
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.661835748792271,
|
||
|
|
"grad_norm": 0.4706451280493202,
|
||
|
|
"learning_rate": 1.2139077402631495e-05,
|
||
|
|
"loss": 0.1721,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14620235562324524,
|
||
|
|
"step": 2895,
|
||
|
|
"valid_targets_mean": 4477.4,
|
||
|
|
"valid_targets_min": 1454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.669887278582931,
|
||
|
|
"grad_norm": 0.5330785987823722,
|
||
|
|
"learning_rate": 1.2065297582602037e-05,
|
||
|
|
"loss": 0.1724,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17816966772079468,
|
||
|
|
"step": 2900,
|
||
|
|
"valid_targets_mean": 4355.4,
|
||
|
|
"valid_targets_min": 2065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.677938808373591,
|
||
|
|
"grad_norm": 0.4942305241126504,
|
||
|
|
"learning_rate": 1.199164569249071e-05,
|
||
|
|
"loss": 0.1832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18558406829833984,
|
||
|
|
"step": 2905,
|
||
|
|
"valid_targets_mean": 4951.6,
|
||
|
|
"valid_targets_min": 1299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.685990338164252,
|
||
|
|
"grad_norm": 0.537628904978011,
|
||
|
|
"learning_rate": 1.191812291977497e-05,
|
||
|
|
"loss": 0.1826,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19403138756752014,
|
||
|
|
"step": 2910,
|
||
|
|
"valid_targets_mean": 4024.0,
|
||
|
|
"valid_targets_min": 1450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.694041867954912,
|
||
|
|
"grad_norm": 0.5439040484202802,
|
||
|
|
"learning_rate": 1.1844730449850546e-05,
|
||
|
|
"loss": 0.1782,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19234639406204224,
|
||
|
|
"step": 2915,
|
||
|
|
"valid_targets_mean": 5036.8,
|
||
|
|
"valid_targets_min": 1058
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.702093397745571,
|
||
|
|
"grad_norm": 0.583242909808935,
|
||
|
|
"learning_rate": 1.1771469466012309e-05,
|
||
|
|
"loss": 0.1768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20131342113018036,
|
||
|
|
"step": 2920,
|
||
|
|
"valid_targets_mean": 4106.1,
|
||
|
|
"valid_targets_min": 983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.710144927536232,
|
||
|
|
"grad_norm": 0.5038233636197879,
|
||
|
|
"learning_rate": 1.1698341149435196e-05,
|
||
|
|
"loss": 0.1849,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1684148907661438,
|
||
|
|
"step": 2925,
|
||
|
|
"valid_targets_mean": 4802.6,
|
||
|
|
"valid_targets_min": 1952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.718196457326892,
|
||
|
|
"grad_norm": 0.4600414999991662,
|
||
|
|
"learning_rate": 1.1625346679155179e-05,
|
||
|
|
"loss": 0.1878,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15985998511314392,
|
||
|
|
"step": 2930,
|
||
|
|
"valid_targets_mean": 4799.9,
|
||
|
|
"valid_targets_min": 1859
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.726247987117552,
|
||
|
|
"grad_norm": 0.4936755991303249,
|
||
|
|
"learning_rate": 1.1552487232050242e-05,
|
||
|
|
"loss": 0.1827,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17861491441726685,
|
||
|
|
"step": 2935,
|
||
|
|
"valid_targets_mean": 4306.8,
|
||
|
|
"valid_targets_min": 1605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.734299516908212,
|
||
|
|
"grad_norm": 0.44442070244437953,
|
||
|
|
"learning_rate": 1.1479763982821414e-05,
|
||
|
|
"loss": 0.1711,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17784011363983154,
|
||
|
|
"step": 2940,
|
||
|
|
"valid_targets_mean": 5608.9,
|
||
|
|
"valid_targets_min": 1956
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7423510466988725,
|
||
|
|
"grad_norm": 0.5637401886713175,
|
||
|
|
"learning_rate": 1.1407178103973834e-05,
|
||
|
|
"loss": 0.1983,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18229396641254425,
|
||
|
|
"step": 2945,
|
||
|
|
"valid_targets_mean": 3870.8,
|
||
|
|
"valid_targets_min": 864
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.750402576489533,
|
||
|
|
"grad_norm": 0.45308334583470944,
|
||
|
|
"learning_rate": 1.1334730765797843e-05,
|
||
|
|
"loss": 0.1841,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19959984719753265,
|
||
|
|
"step": 2950,
|
||
|
|
"valid_targets_mean": 5085.4,
|
||
|
|
"valid_targets_min": 1791
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.758454106280193,
|
||
|
|
"grad_norm": 0.5571182515606663,
|
||
|
|
"learning_rate": 1.1262423136350087e-05,
|
||
|
|
"loss": 0.1956,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22160165011882782,
|
||
|
|
"step": 2955,
|
||
|
|
"valid_targets_mean": 3792.0,
|
||
|
|
"valid_targets_min": 1099
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.766505636070853,
|
||
|
|
"grad_norm": 0.5901554994508056,
|
||
|
|
"learning_rate": 1.1190256381434738e-05,
|
||
|
|
"loss": 0.186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17873704433441162,
|
||
|
|
"step": 2960,
|
||
|
|
"valid_targets_mean": 3899.3,
|
||
|
|
"valid_targets_min": 778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.774557165861514,
|
||
|
|
"grad_norm": 0.5158336477050018,
|
||
|
|
"learning_rate": 1.1118231664584674e-05,
|
||
|
|
"loss": 0.1689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1886855959892273,
|
||
|
|
"step": 2965,
|
||
|
|
"valid_targets_mean": 4861.4,
|
||
|
|
"valid_targets_min": 1898
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.782608695652174,
|
||
|
|
"grad_norm": 0.4887213717876692,
|
||
|
|
"learning_rate": 1.1046350147042681e-05,
|
||
|
|
"loss": 0.187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20001167058944702,
|
||
|
|
"step": 2970,
|
||
|
|
"valid_targets_mean": 4675.2,
|
||
|
|
"valid_targets_min": 952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.790660225442834,
|
||
|
|
"grad_norm": 0.48768071610571156,
|
||
|
|
"learning_rate": 1.0974612987742807e-05,
|
||
|
|
"loss": 0.1864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18149898946285248,
|
||
|
|
"step": 2975,
|
||
|
|
"valid_targets_mean": 4411.8,
|
||
|
|
"valid_targets_min": 1129
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.798711755233494,
|
||
|
|
"grad_norm": 0.4873268036344298,
|
||
|
|
"learning_rate": 1.0903021343291613e-05,
|
||
|
|
"loss": 0.1926,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20546609163284302,
|
||
|
|
"step": 2980,
|
||
|
|
"valid_targets_mean": 4418.5,
|
||
|
|
"valid_targets_min": 608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.806763285024155,
|
||
|
|
"grad_norm": 0.5054671612514106,
|
||
|
|
"learning_rate": 1.0831576367949555e-05,
|
||
|
|
"loss": 0.1886,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18941310048103333,
|
||
|
|
"step": 2985,
|
||
|
|
"valid_targets_mean": 4868.4,
|
||
|
|
"valid_targets_min": 2311
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.814814814814815,
|
||
|
|
"grad_norm": 0.5242076423511247,
|
||
|
|
"learning_rate": 1.0760279213612362e-05,
|
||
|
|
"loss": 0.1738,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16793029010295868,
|
||
|
|
"step": 2990,
|
||
|
|
"valid_targets_mean": 5404.2,
|
||
|
|
"valid_targets_min": 2117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.822866344605475,
|
||
|
|
"grad_norm": 0.6828436818331252,
|
||
|
|
"learning_rate": 1.068913102979248e-05,
|
||
|
|
"loss": 0.1858,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19285649061203003,
|
||
|
|
"step": 2995,
|
||
|
|
"valid_targets_mean": 3826.8,
|
||
|
|
"valid_targets_min": 1070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.830917874396135,
|
||
|
|
"grad_norm": 0.47219942742537285,
|
||
|
|
"learning_rate": 1.0618132963600507e-05,
|
||
|
|
"loss": 0.1784,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18546539545059204,
|
||
|
|
"step": 3000,
|
||
|
|
"valid_targets_mean": 5087.4,
|
||
|
|
"valid_targets_min": 1959
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.838969404186796,
|
||
|
|
"grad_norm": 0.4457820811603146,
|
||
|
|
"learning_rate": 1.0547286159726743e-05,
|
||
|
|
"loss": 0.1801,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19360345602035522,
|
||
|
|
"step": 3005,
|
||
|
|
"valid_targets_mean": 5867.6,
|
||
|
|
"valid_targets_min": 1035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.847020933977456,
|
||
|
|
"grad_norm": 0.5847539672595908,
|
||
|
|
"learning_rate": 1.047659176042268e-05,
|
||
|
|
"loss": 0.1645,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16391941905021667,
|
||
|
|
"step": 3010,
|
||
|
|
"valid_targets_mean": 3891.6,
|
||
|
|
"valid_targets_min": 578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.855072463768116,
|
||
|
|
"grad_norm": 0.5080943188478495,
|
||
|
|
"learning_rate": 1.0406050905482647e-05,
|
||
|
|
"loss": 0.1756,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1736719310283661,
|
||
|
|
"step": 3015,
|
||
|
|
"valid_targets_mean": 4204.6,
|
||
|
|
"valid_targets_min": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.8631239935587764,
|
||
|
|
"grad_norm": 0.4950021070139557,
|
||
|
|
"learning_rate": 1.033566473222539e-05,
|
||
|
|
"loss": 0.1688,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16896328330039978,
|
||
|
|
"step": 3020,
|
||
|
|
"valid_targets_mean": 5143.3,
|
||
|
|
"valid_targets_min": 2767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.871175523349437,
|
||
|
|
"grad_norm": 0.5149173904980839,
|
||
|
|
"learning_rate": 1.0265434375475744e-05,
|
||
|
|
"loss": 0.1751,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1894487589597702,
|
||
|
|
"step": 3025,
|
||
|
|
"valid_targets_mean": 4708.5,
|
||
|
|
"valid_targets_min": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.879227053140097,
|
||
|
|
"grad_norm": 0.5128722203180325,
|
||
|
|
"learning_rate": 1.0195360967546342e-05,
|
||
|
|
"loss": 0.1723,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18005244433879852,
|
||
|
|
"step": 3030,
|
||
|
|
"valid_targets_mean": 5025.9,
|
||
|
|
"valid_targets_min": 1716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.887278582930757,
|
||
|
|
"grad_norm": 0.5010958581493115,
|
||
|
|
"learning_rate": 1.0125445638219369e-05,
|
||
|
|
"loss": 0.1871,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19406850636005402,
|
||
|
|
"step": 3035,
|
||
|
|
"valid_targets_mean": 4813.1,
|
||
|
|
"valid_targets_min": 908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.8953301127214175,
|
||
|
|
"grad_norm": 0.5210294803713594,
|
||
|
|
"learning_rate": 1.00556895147283e-05,
|
||
|
|
"loss": 0.1727,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16892248392105103,
|
||
|
|
"step": 3040,
|
||
|
|
"valid_targets_mean": 3893.8,
|
||
|
|
"valid_targets_min": 1273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.903381642512077,
|
||
|
|
"grad_norm": 0.6293372498483756,
|
||
|
|
"learning_rate": 9.986093721739793e-06,
|
||
|
|
"loss": 0.1872,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1870511919260025,
|
||
|
|
"step": 3045,
|
||
|
|
"valid_targets_mean": 3776.4,
|
||
|
|
"valid_targets_min": 296
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.911433172302738,
|
||
|
|
"grad_norm": 0.5480638562689124,
|
||
|
|
"learning_rate": 9.916659381335524e-06,
|
||
|
|
"loss": 0.1888,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15634512901306152,
|
||
|
|
"step": 3050,
|
||
|
|
"valid_targets_mean": 4189.9,
|
||
|
|
"valid_targets_min": 1195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.919484702093397,
|
||
|
|
"grad_norm": 0.5734404649785564,
|
||
|
|
"learning_rate": 9.847387612994065e-06,
|
||
|
|
"loss": 0.1648,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1715972125530243,
|
||
|
|
"step": 3055,
|
||
|
|
"valid_targets_mean": 4624.1,
|
||
|
|
"valid_targets_min": 999
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.927536231884058,
|
||
|
|
"grad_norm": 0.4755904568874477,
|
||
|
|
"learning_rate": 9.778279533572894e-06,
|
||
|
|
"loss": 0.1792,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1521787941455841,
|
||
|
|
"step": 3060,
|
||
|
|
"valid_targets_mean": 4754.1,
|
||
|
|
"valid_targets_min": 1047
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.935587761674718,
|
||
|
|
"grad_norm": 0.7122666442809198,
|
||
|
|
"learning_rate": 9.70933625729035e-06,
|
||
|
|
"loss": 0.1822,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17114746570587158,
|
||
|
|
"step": 3065,
|
||
|
|
"valid_targets_mean": 5142.9,
|
||
|
|
"valid_targets_min": 1175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.943639291465378,
|
||
|
|
"grad_norm": 0.7359607180808936,
|
||
|
|
"learning_rate": 9.640558895707681e-06,
|
||
|
|
"loss": 0.1732,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18806086480617523,
|
||
|
|
"step": 3070,
|
||
|
|
"valid_targets_mean": 4064.7,
|
||
|
|
"valid_targets_min": 1615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.951690821256038,
|
||
|
|
"grad_norm": 0.5379368590841539,
|
||
|
|
"learning_rate": 9.571948557711104e-06,
|
||
|
|
"loss": 0.1825,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17751240730285645,
|
||
|
|
"step": 3075,
|
||
|
|
"valid_targets_mean": 3381.2,
|
||
|
|
"valid_targets_min": 249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.959742351046699,
|
||
|
|
"grad_norm": 0.4777499224064591,
|
||
|
|
"learning_rate": 9.503506349493959e-06,
|
||
|
|
"loss": 0.1772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1592390239238739,
|
||
|
|
"step": 3080,
|
||
|
|
"valid_targets_mean": 4629.0,
|
||
|
|
"valid_targets_min": 1254
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.967793880837359,
|
||
|
|
"grad_norm": 0.5139616446857665,
|
||
|
|
"learning_rate": 9.435233374538848e-06,
|
||
|
|
"loss": 0.1778,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18165704607963562,
|
||
|
|
"step": 3085,
|
||
|
|
"valid_targets_mean": 4665.1,
|
||
|
|
"valid_targets_min": 1736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.975845410628019,
|
||
|
|
"grad_norm": 0.5162555365137094,
|
||
|
|
"learning_rate": 9.367130733599863e-06,
|
||
|
|
"loss": 0.1706,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19067494571208954,
|
||
|
|
"step": 3090,
|
||
|
|
"valid_targets_mean": 4688.7,
|
||
|
|
"valid_targets_min": 1872
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.9838969404186795,
|
||
|
|
"grad_norm": 0.569575182274062,
|
||
|
|
"learning_rate": 9.299199524684815e-06,
|
||
|
|
"loss": 0.1671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16250163316726685,
|
||
|
|
"step": 3095,
|
||
|
|
"valid_targets_mean": 4311.8,
|
||
|
|
"valid_targets_min": 1994
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.99194847020934,
|
||
|
|
"grad_norm": 0.5520222968798817,
|
||
|
|
"learning_rate": 9.23144084303756e-06,
|
||
|
|
"loss": 0.1803,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1921304166316986,
|
||
|
|
"step": 3100,
|
||
|
|
"valid_targets_mean": 4493.3,
|
||
|
|
"valid_targets_min": 2050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.7884221741023202,
|
||
|
|
"learning_rate": 9.163855781120302e-06,
|
||
|
|
"loss": 0.1794,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20062533020973206,
|
||
|
|
"step": 3105,
|
||
|
|
"valid_targets_mean": 3796.8,
|
||
|
|
"valid_targets_min": 374
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.00805152979066,
|
||
|
|
"grad_norm": 0.5195634108838502,
|
||
|
|
"learning_rate": 9.096445428596026e-06,
|
||
|
|
"loss": 0.1668,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1869000345468521,
|
||
|
|
"step": 3110,
|
||
|
|
"valid_targets_mean": 4366.8,
|
||
|
|
"valid_targets_min": 1253
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0161030595813205,
|
||
|
|
"grad_norm": 0.5278497870764118,
|
||
|
|
"learning_rate": 9.029210872310884e-06,
|
||
|
|
"loss": 0.1648,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1439373940229416,
|
||
|
|
"step": 3115,
|
||
|
|
"valid_targets_mean": 4365.9,
|
||
|
|
"valid_targets_min": 554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.024154589371981,
|
||
|
|
"grad_norm": 0.5284272892101102,
|
||
|
|
"learning_rate": 8.962153196276713e-06,
|
||
|
|
"loss": 0.1692,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14253228902816772,
|
||
|
|
"step": 3120,
|
||
|
|
"valid_targets_mean": 4662.7,
|
||
|
|
"valid_targets_min": 312
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.032206119162641,
|
||
|
|
"grad_norm": 0.5474998021835664,
|
||
|
|
"learning_rate": 8.895273481653527e-06,
|
||
|
|
"loss": 0.1734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17854666709899902,
|
||
|
|
"step": 3125,
|
||
|
|
"valid_targets_mean": 4428.5,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.040257648953301,
|
||
|
|
"grad_norm": 1.2329548853780503,
|
||
|
|
"learning_rate": 8.828572806732103e-06,
|
||
|
|
"loss": 0.1709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1708088219165802,
|
||
|
|
"step": 3130,
|
||
|
|
"valid_targets_mean": 4686.6,
|
||
|
|
"valid_targets_min": 1916
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.048309178743962,
|
||
|
|
"grad_norm": 0.5097310950347612,
|
||
|
|
"learning_rate": 8.76205224691659e-06,
|
||
|
|
"loss": 0.1707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17174428701400757,
|
||
|
|
"step": 3135,
|
||
|
|
"valid_targets_mean": 4785.3,
|
||
|
|
"valid_targets_min": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.056360708534622,
|
||
|
|
"grad_norm": 0.46008600902359126,
|
||
|
|
"learning_rate": 8.695712874707169e-06,
|
||
|
|
"loss": 0.167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16250503063201904,
|
||
|
|
"step": 3140,
|
||
|
|
"valid_targets_mean": 4901.8,
|
||
|
|
"valid_targets_min": 402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.064412238325282,
|
||
|
|
"grad_norm": 0.47656692190311706,
|
||
|
|
"learning_rate": 8.629555759682756e-06,
|
||
|
|
"loss": 0.1676,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15433499217033386,
|
||
|
|
"step": 3145,
|
||
|
|
"valid_targets_mean": 5206.9,
|
||
|
|
"valid_targets_min": 892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.072463768115942,
|
||
|
|
"grad_norm": 0.46684035634871635,
|
||
|
|
"learning_rate": 8.563581968483774e-06,
|
||
|
|
"loss": 0.1882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15580695867538452,
|
||
|
|
"step": 3150,
|
||
|
|
"valid_targets_mean": 4968.5,
|
||
|
|
"valid_targets_min": 1514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.080515297906603,
|
||
|
|
"grad_norm": 0.4817709325187039,
|
||
|
|
"learning_rate": 8.497792564794935e-06,
|
||
|
|
"loss": 0.1742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1596037745475769,
|
||
|
|
"step": 3155,
|
||
|
|
"valid_targets_mean": 4873.9,
|
||
|
|
"valid_targets_min": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.088566827697263,
|
||
|
|
"grad_norm": 0.49561234768208556,
|
||
|
|
"learning_rate": 8.432188609328112e-06,
|
||
|
|
"loss": 0.1795,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18620729446411133,
|
||
|
|
"step": 3160,
|
||
|
|
"valid_targets_mean": 4277.6,
|
||
|
|
"valid_targets_min": 429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.096618357487923,
|
||
|
|
"grad_norm": 0.5342689640020111,
|
||
|
|
"learning_rate": 8.366771159805222e-06,
|
||
|
|
"loss": 0.1614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1623041033744812,
|
||
|
|
"step": 3165,
|
||
|
|
"valid_targets_mean": 3849.9,
|
||
|
|
"valid_targets_min": 1692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.1046698872785825,
|
||
|
|
"grad_norm": 0.5558864483320473,
|
||
|
|
"learning_rate": 8.301541270941178e-06,
|
||
|
|
"loss": 0.1744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19302302598953247,
|
||
|
|
"step": 3170,
|
||
|
|
"valid_targets_mean": 3954.7,
|
||
|
|
"valid_targets_min": 894
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.112721417069243,
|
||
|
|
"grad_norm": 0.5483085784932584,
|
||
|
|
"learning_rate": 8.236499994426886e-06,
|
||
|
|
"loss": 0.1635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17752034962177277,
|
||
|
|
"step": 3175,
|
||
|
|
"valid_targets_mean": 4114.2,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.120772946859903,
|
||
|
|
"grad_norm": 0.5129349338303597,
|
||
|
|
"learning_rate": 8.171648378912272e-06,
|
||
|
|
"loss": 0.1732,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1786811649799347,
|
||
|
|
"step": 3180,
|
||
|
|
"valid_targets_mean": 5010.1,
|
||
|
|
"valid_targets_min": 1693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.128824476650563,
|
||
|
|
"grad_norm": 0.47618052790196236,
|
||
|
|
"learning_rate": 8.1069874699894e-06,
|
||
|
|
"loss": 0.1617,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14539986848831177,
|
||
|
|
"step": 3185,
|
||
|
|
"valid_targets_mean": 4915.1,
|
||
|
|
"valid_targets_min": 1295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.1368760064412236,
|
||
|
|
"grad_norm": 0.5408793562526828,
|
||
|
|
"learning_rate": 8.042518310175607e-06,
|
||
|
|
"loss": 0.1729,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1692083477973938,
|
||
|
|
"step": 3190,
|
||
|
|
"valid_targets_mean": 4689.8,
|
||
|
|
"valid_targets_min": 693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.144927536231884,
|
||
|
|
"grad_norm": 0.427889087391779,
|
||
|
|
"learning_rate": 7.978241938896679e-06,
|
||
|
|
"loss": 0.1541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13784319162368774,
|
||
|
|
"step": 3195,
|
||
|
|
"valid_targets_mean": 4693.8,
|
||
|
|
"valid_targets_min": 2255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.152979066022544,
|
||
|
|
"grad_norm": 0.5199711838065452,
|
||
|
|
"learning_rate": 7.914159392470118e-06,
|
||
|
|
"loss": 0.1698,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16833321750164032,
|
||
|
|
"step": 3200,
|
||
|
|
"valid_targets_mean": 4362.7,
|
||
|
|
"valid_targets_min": 848
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.161030595813204,
|
||
|
|
"grad_norm": 0.47753654786071936,
|
||
|
|
"learning_rate": 7.850271704088396e-06,
|
||
|
|
"loss": 0.1586,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14919407665729523,
|
||
|
|
"step": 3205,
|
||
|
|
"valid_targets_mean": 4483.1,
|
||
|
|
"valid_targets_min": 2048
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.169082125603865,
|
||
|
|
"grad_norm": 0.47729884629127567,
|
||
|
|
"learning_rate": 7.786579903802342e-06,
|
||
|
|
"loss": 0.1818,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15295487642288208,
|
||
|
|
"step": 3210,
|
||
|
|
"valid_targets_mean": 4766.6,
|
||
|
|
"valid_targets_min": 1817
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.177133655394525,
|
||
|
|
"grad_norm": 0.5186129305793417,
|
||
|
|
"learning_rate": 7.723085018504512e-06,
|
||
|
|
"loss": 0.1688,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1621103286743164,
|
||
|
|
"step": 3215,
|
||
|
|
"valid_targets_mean": 4525.6,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.185185185185185,
|
||
|
|
"grad_norm": 0.562831677149766,
|
||
|
|
"learning_rate": 7.659788071912612e-06,
|
||
|
|
"loss": 0.1709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16404971480369568,
|
||
|
|
"step": 3220,
|
||
|
|
"valid_targets_mean": 3796.4,
|
||
|
|
"valid_targets_min": 1477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.193236714975845,
|
||
|
|
"grad_norm": 0.48044538637214657,
|
||
|
|
"learning_rate": 7.59669008455304e-06,
|
||
|
|
"loss": 0.1606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16649338603019714,
|
||
|
|
"step": 3225,
|
||
|
|
"valid_targets_mean": 5432.8,
|
||
|
|
"valid_targets_min": 1858
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.201288244766506,
|
||
|
|
"grad_norm": 0.5790721887585502,
|
||
|
|
"learning_rate": 7.533792073744395e-06,
|
||
|
|
"loss": 0.1751,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17744669318199158,
|
||
|
|
"step": 3230,
|
||
|
|
"valid_targets_mean": 4592.4,
|
||
|
|
"valid_targets_min": 935
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.209339774557166,
|
||
|
|
"grad_norm": 0.5717495383522813,
|
||
|
|
"learning_rate": 7.471095053581086e-06,
|
||
|
|
"loss": 0.1758,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1721886694431305,
|
||
|
|
"step": 3235,
|
||
|
|
"valid_targets_mean": 4073.8,
|
||
|
|
"valid_targets_min": 823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.217391304347826,
|
||
|
|
"grad_norm": 0.5319904040215578,
|
||
|
|
"learning_rate": 7.4086000349169864e-06,
|
||
|
|
"loss": 0.1753,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15195900201797485,
|
||
|
|
"step": 3240,
|
||
|
|
"valid_targets_mean": 4558.5,
|
||
|
|
"valid_targets_min": 296
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.225442834138486,
|
||
|
|
"grad_norm": 0.6199276315504786,
|
||
|
|
"learning_rate": 7.346308025349138e-06,
|
||
|
|
"loss": 0.1709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16166193783283234,
|
||
|
|
"step": 3245,
|
||
|
|
"valid_targets_mean": 3986.2,
|
||
|
|
"valid_targets_min": 848
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.233494363929147,
|
||
|
|
"grad_norm": 0.6244366963899998,
|
||
|
|
"learning_rate": 7.2842200292014805e-06,
|
||
|
|
"loss": 0.188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17683975398540497,
|
||
|
|
"step": 3250,
|
||
|
|
"valid_targets_mean": 3601.2,
|
||
|
|
"valid_targets_min": 760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.241545893719807,
|
||
|
|
"grad_norm": 0.5241028588187276,
|
||
|
|
"learning_rate": 7.2223370475086896e-06,
|
||
|
|
"loss": 0.173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1595272421836853,
|
||
|
|
"step": 3255,
|
||
|
|
"valid_targets_mean": 4269.6,
|
||
|
|
"valid_targets_min": 1279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.249597423510467,
|
||
|
|
"grad_norm": 0.4473386775088612,
|
||
|
|
"learning_rate": 7.160660078000028e-06,
|
||
|
|
"loss": 0.1621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16722847521305084,
|
||
|
|
"step": 3260,
|
||
|
|
"valid_targets_mean": 5286.9,
|
||
|
|
"valid_targets_min": 2305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.2576489533011275,
|
||
|
|
"grad_norm": 0.5009816722230198,
|
||
|
|
"learning_rate": 7.099190115083259e-06,
|
||
|
|
"loss": 0.1615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17297816276550293,
|
||
|
|
"step": 3265,
|
||
|
|
"valid_targets_mean": 4930.7,
|
||
|
|
"valid_targets_min": 2434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.265700483091788,
|
||
|
|
"grad_norm": 0.5600085277272723,
|
||
|
|
"learning_rate": 7.037928149828608e-06,
|
||
|
|
"loss": 0.1734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18866902589797974,
|
||
|
|
"step": 3270,
|
||
|
|
"valid_targets_mean": 4635.0,
|
||
|
|
"valid_targets_min": 1444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.273752012882448,
|
||
|
|
"grad_norm": 0.5153595033812705,
|
||
|
|
"learning_rate": 6.97687516995279e-06,
|
||
|
|
"loss": 0.1608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15133386850357056,
|
||
|
|
"step": 3275,
|
||
|
|
"valid_targets_mean": 4087.8,
|
||
|
|
"valid_targets_min": 1111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.281803542673108,
|
||
|
|
"grad_norm": 0.5588332326443939,
|
||
|
|
"learning_rate": 6.916032159803088e-06,
|
||
|
|
"loss": 0.1838,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20162728428840637,
|
||
|
|
"step": 3280,
|
||
|
|
"valid_targets_mean": 4709.9,
|
||
|
|
"valid_targets_min": 2151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.2898550724637685,
|
||
|
|
"grad_norm": 0.5640390114972573,
|
||
|
|
"learning_rate": 6.855400100341458e-06,
|
||
|
|
"loss": 0.1642,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17432144284248352,
|
||
|
|
"step": 3285,
|
||
|
|
"valid_targets_mean": 3788.3,
|
||
|
|
"valid_targets_min": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.297906602254429,
|
||
|
|
"grad_norm": 0.5376565233222369,
|
||
|
|
"learning_rate": 6.794979969128755e-06,
|
||
|
|
"loss": 0.1761,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16141164302825928,
|
||
|
|
"step": 3290,
|
||
|
|
"valid_targets_mean": 4118.3,
|
||
|
|
"valid_targets_min": 1706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.305958132045088,
|
||
|
|
"grad_norm": 0.5378942356008535,
|
||
|
|
"learning_rate": 6.7347727403089325e-06,
|
||
|
|
"loss": 0.173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17127177119255066,
|
||
|
|
"step": 3295,
|
||
|
|
"valid_targets_mean": 4600.5,
|
||
|
|
"valid_targets_min": 1136
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.314009661835748,
|
||
|
|
"grad_norm": 0.48776680886655227,
|
||
|
|
"learning_rate": 6.674779384593373e-06,
|
||
|
|
"loss": 0.1591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13992200791835785,
|
||
|
|
"step": 3300,
|
||
|
|
"valid_targets_mean": 4386.2,
|
||
|
|
"valid_targets_min": 1725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.322061191626409,
|
||
|
|
"grad_norm": 0.5329626208836868,
|
||
|
|
"learning_rate": 6.61500086924519e-06,
|
||
|
|
"loss": 0.1657,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17515888810157776,
|
||
|
|
"step": 3305,
|
||
|
|
"valid_targets_mean": 4181.7,
|
||
|
|
"valid_targets_min": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.330112721417069,
|
||
|
|
"grad_norm": 0.46016033498034964,
|
||
|
|
"learning_rate": 6.555438158063683e-06,
|
||
|
|
"loss": 0.1707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17031973600387573,
|
||
|
|
"step": 3310,
|
||
|
|
"valid_targets_mean": 5609.8,
|
||
|
|
"valid_targets_min": 1475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.338164251207729,
|
||
|
|
"grad_norm": 0.7890593958793812,
|
||
|
|
"learning_rate": 6.4960922113687695e-06,
|
||
|
|
"loss": 0.1678,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17659761011600494,
|
||
|
|
"step": 3315,
|
||
|
|
"valid_targets_mean": 3941.4,
|
||
|
|
"valid_targets_min": 1450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.3462157809983895,
|
||
|
|
"grad_norm": 0.5703421349872555,
|
||
|
|
"learning_rate": 6.4369639859855115e-06,
|
||
|
|
"loss": 0.1637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17057812213897705,
|
||
|
|
"step": 3320,
|
||
|
|
"valid_targets_mean": 4658.4,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.35426731078905,
|
||
|
|
"grad_norm": 0.564126182226765,
|
||
|
|
"learning_rate": 6.378054435228671e-06,
|
||
|
|
"loss": 0.166,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18041761219501495,
|
||
|
|
"step": 3325,
|
||
|
|
"valid_targets_mean": 4159.0,
|
||
|
|
"valid_targets_min": 759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.36231884057971,
|
||
|
|
"grad_norm": 0.4973048643598519,
|
||
|
|
"learning_rate": 6.319364508887371e-06,
|
||
|
|
"loss": 0.1719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1592758297920227,
|
||
|
|
"step": 3330,
|
||
|
|
"valid_targets_mean": 4736.2,
|
||
|
|
"valid_targets_min": 1812
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.37037037037037,
|
||
|
|
"grad_norm": 0.569769342446689,
|
||
|
|
"learning_rate": 6.260895153209763e-06,
|
||
|
|
"loss": 0.1644,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18152937293052673,
|
||
|
|
"step": 3335,
|
||
|
|
"valid_targets_mean": 4268.9,
|
||
|
|
"valid_targets_min": 1431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.3784219001610305,
|
||
|
|
"grad_norm": 0.5016879405573162,
|
||
|
|
"learning_rate": 6.202647310887764e-06,
|
||
|
|
"loss": 0.1832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17833814024925232,
|
||
|
|
"step": 3340,
|
||
|
|
"valid_targets_mean": 4741.9,
|
||
|
|
"valid_targets_min": 1679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.386473429951691,
|
||
|
|
"grad_norm": 0.5247407647892519,
|
||
|
|
"learning_rate": 6.14462192104188e-06,
|
||
|
|
"loss": 0.1691,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20044955611228943,
|
||
|
|
"step": 3345,
|
||
|
|
"valid_targets_mean": 4656.0,
|
||
|
|
"valid_targets_min": 2099
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.394524959742351,
|
||
|
|
"grad_norm": 0.47014870620352595,
|
||
|
|
"learning_rate": 6.086819919206051e-06,
|
||
|
|
"loss": 0.168,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17010408639907837,
|
||
|
|
"step": 3350,
|
||
|
|
"valid_targets_mean": 5653.4,
|
||
|
|
"valid_targets_min": 2097
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.402576489533011,
|
||
|
|
"grad_norm": 0.6060057637724512,
|
||
|
|
"learning_rate": 6.029242237312554e-06,
|
||
|
|
"loss": 0.1742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18121179938316345,
|
||
|
|
"step": 3355,
|
||
|
|
"valid_targets_mean": 5222.9,
|
||
|
|
"valid_targets_min": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4106280193236715,
|
||
|
|
"grad_norm": 0.5270241265183322,
|
||
|
|
"learning_rate": 5.971889803676996e-06,
|
||
|
|
"loss": 0.1648,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16754843294620514,
|
||
|
|
"step": 3360,
|
||
|
|
"valid_targets_mean": 4372.1,
|
||
|
|
"valid_targets_min": 1892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.418679549114332,
|
||
|
|
"grad_norm": 0.5131324243265083,
|
||
|
|
"learning_rate": 5.914763542983355e-06,
|
||
|
|
"loss": 0.1986,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18260961771011353,
|
||
|
|
"step": 3365,
|
||
|
|
"valid_targets_mean": 5181.3,
|
||
|
|
"valid_targets_min": 1175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.426731078904992,
|
||
|
|
"grad_norm": 0.5192149826300284,
|
||
|
|
"learning_rate": 5.857864376269051e-06,
|
||
|
|
"loss": 0.1837,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18823757767677307,
|
||
|
|
"step": 3370,
|
||
|
|
"valid_targets_mean": 4809.2,
|
||
|
|
"valid_targets_min": 1171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.434782608695652,
|
||
|
|
"grad_norm": 0.5980250860155677,
|
||
|
|
"learning_rate": 5.801193220910108e-06,
|
||
|
|
"loss": 0.1539,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16703063249588013,
|
||
|
|
"step": 3375,
|
||
|
|
"valid_targets_mean": 4509.7,
|
||
|
|
"valid_targets_min": 1367
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.442834138486313,
|
||
|
|
"grad_norm": 0.551837937511895,
|
||
|
|
"learning_rate": 5.744750990606356e-06,
|
||
|
|
"loss": 0.1723,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15874727070331573,
|
||
|
|
"step": 3380,
|
||
|
|
"valid_targets_mean": 3712.6,
|
||
|
|
"valid_targets_min": 311
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.450885668276973,
|
||
|
|
"grad_norm": 0.49161398228706626,
|
||
|
|
"learning_rate": 5.688538595366706e-06,
|
||
|
|
"loss": 0.1569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.155176043510437,
|
||
|
|
"step": 3385,
|
||
|
|
"valid_targets_mean": 4476.1,
|
||
|
|
"valid_targets_min": 1936
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.458937198067633,
|
||
|
|
"grad_norm": 0.5355381631745951,
|
||
|
|
"learning_rate": 5.632556941494482e-06,
|
||
|
|
"loss": 0.1687,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.153697669506073,
|
||
|
|
"step": 3390,
|
||
|
|
"valid_targets_mean": 3974.2,
|
||
|
|
"valid_targets_min": 1569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.466988727858293,
|
||
|
|
"grad_norm": 0.5115948764108772,
|
||
|
|
"learning_rate": 5.5768069315727895e-06,
|
||
|
|
"loss": 0.1772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1722053587436676,
|
||
|
|
"step": 3395,
|
||
|
|
"valid_targets_mean": 4293.4,
|
||
|
|
"valid_targets_min": 2260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.475040257648954,
|
||
|
|
"grad_norm": 0.4799188086454644,
|
||
|
|
"learning_rate": 5.521289464449975e-06,
|
||
|
|
"loss": 0.1784,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17728543281555176,
|
||
|
|
"step": 3400,
|
||
|
|
"valid_targets_mean": 5211.7,
|
||
|
|
"valid_targets_min": 363
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.483091787439614,
|
||
|
|
"grad_norm": 0.5570316610757228,
|
||
|
|
"learning_rate": 5.46600543522515e-06,
|
||
|
|
"loss": 0.1694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16196802258491516,
|
||
|
|
"step": 3405,
|
||
|
|
"valid_targets_mean": 4058.4,
|
||
|
|
"valid_targets_min": 653
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.491143317230274,
|
||
|
|
"grad_norm": 0.6736055907952796,
|
||
|
|
"learning_rate": 5.410955735233736e-06,
|
||
|
|
"loss": 0.1683,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.171076238155365,
|
||
|
|
"step": 3410,
|
||
|
|
"valid_targets_mean": 5974.8,
|
||
|
|
"valid_targets_min": 2681
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.499194847020934,
|
||
|
|
"grad_norm": 0.5508461582818147,
|
||
|
|
"learning_rate": 5.3561412520331025e-06,
|
||
|
|
"loss": 0.1673,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14637523889541626,
|
||
|
|
"step": 3415,
|
||
|
|
"valid_targets_mean": 4094.3,
|
||
|
|
"valid_targets_min": 904
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.507246376811594,
|
||
|
|
"grad_norm": 0.5142188355081284,
|
||
|
|
"learning_rate": 5.30156286938826e-06,
|
||
|
|
"loss": 0.1655,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1733294427394867,
|
||
|
|
"step": 3420,
|
||
|
|
"valid_targets_mean": 4461.4,
|
||
|
|
"valid_targets_min": 1105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.515297906602254,
|
||
|
|
"grad_norm": 0.5905481814384393,
|
||
|
|
"learning_rate": 5.24722146725761e-06,
|
||
|
|
"loss": 0.1817,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17892040312290192,
|
||
|
|
"step": 3425,
|
||
|
|
"valid_targets_mean": 4002.8,
|
||
|
|
"valid_targets_min": 1672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.523349436392914,
|
||
|
|
"grad_norm": 0.45334104473741593,
|
||
|
|
"learning_rate": 5.193117921778743e-06,
|
||
|
|
"loss": 0.1616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13965705037117004,
|
||
|
|
"step": 3430,
|
||
|
|
"valid_targets_mean": 4802.2,
|
||
|
|
"valid_targets_min": 1812
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.531400966183575,
|
||
|
|
"grad_norm": 0.742354374585886,
|
||
|
|
"learning_rate": 5.139253105254336e-06,
|
||
|
|
"loss": 0.1795,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18780140578746796,
|
||
|
|
"step": 3435,
|
||
|
|
"valid_targets_mean": 4082.1,
|
||
|
|
"valid_targets_min": 1116
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.539452495974235,
|
||
|
|
"grad_norm": 0.44385771676975855,
|
||
|
|
"learning_rate": 5.085627886138078e-06,
|
||
|
|
"loss": 0.159,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14621910452842712,
|
||
|
|
"step": 3440,
|
||
|
|
"valid_targets_mean": 5131.4,
|
||
|
|
"valid_targets_min": 1610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.547504025764895,
|
||
|
|
"grad_norm": 0.47820099076305106,
|
||
|
|
"learning_rate": 5.032243129020671e-06,
|
||
|
|
"loss": 0.1734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16836556792259216,
|
||
|
|
"step": 3445,
|
||
|
|
"valid_targets_mean": 5182.5,
|
||
|
|
"valid_targets_min": 1770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.555555555555555,
|
||
|
|
"grad_norm": 0.5335135512220336,
|
||
|
|
"learning_rate": 4.9790996946158695e-06,
|
||
|
|
"loss": 0.1688,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18833035230636597,
|
||
|
|
"step": 3450,
|
||
|
|
"valid_targets_mean": 4468.0,
|
||
|
|
"valid_targets_min": 1464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.563607085346216,
|
||
|
|
"grad_norm": 0.5452849565299809,
|
||
|
|
"learning_rate": 4.926198439746641e-06,
|
||
|
|
"loss": 0.1769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16549217700958252,
|
||
|
|
"step": 3455,
|
||
|
|
"valid_targets_mean": 3943.1,
|
||
|
|
"valid_targets_min": 1663
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.571658615136876,
|
||
|
|
"grad_norm": 0.47569229716225025,
|
||
|
|
"learning_rate": 4.873540217331325e-06,
|
||
|
|
"loss": 0.1736,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14578670263290405,
|
||
|
|
"step": 3460,
|
||
|
|
"valid_targets_mean": 4942.0,
|
||
|
|
"valid_targets_min": 1587
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.579710144927536,
|
||
|
|
"grad_norm": 0.6257220241925262,
|
||
|
|
"learning_rate": 4.82112587636989e-06,
|
||
|
|
"loss": 0.1653,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14457204937934875,
|
||
|
|
"step": 3465,
|
||
|
|
"valid_targets_mean": 4796.1,
|
||
|
|
"valid_targets_min": 1025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.587761674718196,
|
||
|
|
"grad_norm": 0.5126249751180715,
|
||
|
|
"learning_rate": 4.768956261930233e-06,
|
||
|
|
"loss": 0.1723,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21914073824882507,
|
||
|
|
"step": 3470,
|
||
|
|
"valid_targets_mean": 5159.1,
|
||
|
|
"valid_targets_min": 1672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.595813204508857,
|
||
|
|
"grad_norm": 0.5030438743653675,
|
||
|
|
"learning_rate": 4.717032215134576e-06,
|
||
|
|
"loss": 0.1787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1816645860671997,
|
||
|
|
"step": 3475,
|
||
|
|
"valid_targets_mean": 5501.6,
|
||
|
|
"valid_targets_min": 1066
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.603864734299517,
|
||
|
|
"grad_norm": 0.7789904581155889,
|
||
|
|
"learning_rate": 4.66535457314589e-06,
|
||
|
|
"loss": 0.1658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1629607379436493,
|
||
|
|
"step": 3480,
|
||
|
|
"valid_targets_mean": 5301.9,
|
||
|
|
"valid_targets_min": 1650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.611916264090177,
|
||
|
|
"grad_norm": 0.5094527470833449,
|
||
|
|
"learning_rate": 4.613924169154406e-06,
|
||
|
|
"loss": 0.1747,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16425767540931702,
|
||
|
|
"step": 3485,
|
||
|
|
"valid_targets_mean": 4554.6,
|
||
|
|
"valid_targets_min": 1654
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.6199677938808374,
|
||
|
|
"grad_norm": 0.48660247544704416,
|
||
|
|
"learning_rate": 4.5627418323641705e-06,
|
||
|
|
"loss": 0.1644,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17449656128883362,
|
||
|
|
"step": 3490,
|
||
|
|
"valid_targets_mean": 5440.4,
|
||
|
|
"valid_targets_min": 3305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.628019323671498,
|
||
|
|
"grad_norm": 0.6588365172160204,
|
||
|
|
"learning_rate": 4.51180838797969e-06,
|
||
|
|
"loss": 0.1768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16700059175491333,
|
||
|
|
"step": 3495,
|
||
|
|
"valid_targets_mean": 3524.9,
|
||
|
|
"valid_targets_min": 431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.636070853462158,
|
||
|
|
"grad_norm": 0.5649424861690088,
|
||
|
|
"learning_rate": 4.461124657192612e-06,
|
||
|
|
"loss": 0.1923,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16424767673015594,
|
||
|
|
"step": 3500,
|
||
|
|
"valid_targets_mean": 3976.2,
|
||
|
|
"valid_targets_min": 327
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.644122383252818,
|
||
|
|
"grad_norm": 0.5402856059826509,
|
||
|
|
"learning_rate": 4.410691457168488e-06,
|
||
|
|
"loss": 0.179,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17043063044548035,
|
||
|
|
"step": 3505,
|
||
|
|
"valid_targets_mean": 4236.4,
|
||
|
|
"valid_targets_min": 1605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.6521739130434785,
|
||
|
|
"grad_norm": 0.6565234736534431,
|
||
|
|
"learning_rate": 4.3605096010336115e-06,
|
||
|
|
"loss": 0.1631,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19410403072834015,
|
||
|
|
"step": 3510,
|
||
|
|
"valid_targets_mean": 3966.3,
|
||
|
|
"valid_targets_min": 374
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.660225442834139,
|
||
|
|
"grad_norm": 0.46809524453228724,
|
||
|
|
"learning_rate": 4.310579897861902e-06,
|
||
|
|
"loss": 0.167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14890244603157043,
|
||
|
|
"step": 3515,
|
||
|
|
"valid_targets_mean": 5078.9,
|
||
|
|
"valid_targets_min": 1866
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.668276972624799,
|
||
|
|
"grad_norm": 0.5402583356363803,
|
||
|
|
"learning_rate": 4.26090315266185e-06,
|
||
|
|
"loss": 0.1718,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18138787150382996,
|
||
|
|
"step": 3520,
|
||
|
|
"valid_targets_mean": 4479.4,
|
||
|
|
"valid_targets_min": 1667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.676328502415459,
|
||
|
|
"grad_norm": 0.5567198301706011,
|
||
|
|
"learning_rate": 4.2114801663635504e-06,
|
||
|
|
"loss": 0.1689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19387924671173096,
|
||
|
|
"step": 3525,
|
||
|
|
"valid_targets_mean": 4855.1,
|
||
|
|
"valid_targets_min": 1475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.6843800322061195,
|
||
|
|
"grad_norm": 0.5076182712626346,
|
||
|
|
"learning_rate": 4.1623117358057865e-06,
|
||
|
|
"loss": 0.169,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16965937614440918,
|
||
|
|
"step": 3530,
|
||
|
|
"valid_targets_mean": 4802.6,
|
||
|
|
"valid_targets_min": 2133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.692431561996779,
|
||
|
|
"grad_norm": 0.5638233787105313,
|
||
|
|
"learning_rate": 4.113398653723168e-06,
|
||
|
|
"loss": 0.1703,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1688123494386673,
|
||
|
|
"step": 3535,
|
||
|
|
"valid_targets_mean": 4551.8,
|
||
|
|
"valid_targets_min": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.70048309178744,
|
||
|
|
"grad_norm": 0.6026682576927842,
|
||
|
|
"learning_rate": 4.0647417087333776e-06,
|
||
|
|
"loss": 0.1655,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17929375171661377,
|
||
|
|
"step": 3540,
|
||
|
|
"valid_targets_mean": 4203.6,
|
||
|
|
"valid_targets_min": 1303
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.708534621578099,
|
||
|
|
"grad_norm": 0.4840327343136473,
|
||
|
|
"learning_rate": 4.0163416853244385e-06,
|
||
|
|
"loss": 0.1767,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15817004442214966,
|
||
|
|
"step": 3545,
|
||
|
|
"valid_targets_mean": 4813.0,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.71658615136876,
|
||
|
|
"grad_norm": 0.5122311264991213,
|
||
|
|
"learning_rate": 3.968199363842056e-06,
|
||
|
|
"loss": 0.1701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15911847352981567,
|
||
|
|
"step": 3550,
|
||
|
|
"valid_targets_mean": 4163.8,
|
||
|
|
"valid_targets_min": 2141
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.72463768115942,
|
||
|
|
"grad_norm": 0.5526613981940242,
|
||
|
|
"learning_rate": 3.920315520477065e-06,
|
||
|
|
"loss": 0.1723,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16690890491008759,
|
||
|
|
"step": 3555,
|
||
|
|
"valid_targets_mean": 3781.4,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.73268921095008,
|
||
|
|
"grad_norm": 0.5154673260140975,
|
||
|
|
"learning_rate": 3.872690927252891e-06,
|
||
|
|
"loss": 0.1775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18027852475643158,
|
||
|
|
"step": 3560,
|
||
|
|
"valid_targets_mean": 4373.6,
|
||
|
|
"valid_targets_min": 1594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7407407407407405,
|
||
|
|
"grad_norm": 0.48703994856912225,
|
||
|
|
"learning_rate": 3.825326352013119e-06,
|
||
|
|
"loss": 0.169,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16559578478336334,
|
||
|
|
"step": 3565,
|
||
|
|
"valid_targets_mean": 4382.0,
|
||
|
|
"valid_targets_min": 1603
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.748792270531401,
|
||
|
|
"grad_norm": 0.5472851978165418,
|
||
|
|
"learning_rate": 3.7782225584091016e-06,
|
||
|
|
"loss": 0.1681,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15613017976284027,
|
||
|
|
"step": 3570,
|
||
|
|
"valid_targets_mean": 4052.4,
|
||
|
|
"valid_targets_min": 339
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.756843800322061,
|
||
|
|
"grad_norm": 0.5873806542864478,
|
||
|
|
"learning_rate": 3.731380305887644e-06,
|
||
|
|
"loss": 0.1716,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17352142930030823,
|
||
|
|
"step": 3575,
|
||
|
|
"valid_targets_mean": 4468.3,
|
||
|
|
"valid_targets_min": 860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.764895330112721,
|
||
|
|
"grad_norm": 0.562715584188104,
|
||
|
|
"learning_rate": 3.684800349678781e-06,
|
||
|
|
"loss": 0.1685,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19309145212173462,
|
||
|
|
"step": 3580,
|
||
|
|
"valid_targets_mean": 3819.3,
|
||
|
|
"valid_targets_min": 831
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7729468599033815,
|
||
|
|
"grad_norm": 0.584825018902196,
|
||
|
|
"learning_rate": 3.638483440783576e-06,
|
||
|
|
"loss": 0.1728,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22266249358654022,
|
||
|
|
"step": 3585,
|
||
|
|
"valid_targets_mean": 4084.6,
|
||
|
|
"valid_targets_min": 1924
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.780998389694042,
|
||
|
|
"grad_norm": 0.5288099238647191,
|
||
|
|
"learning_rate": 3.5924303259620307e-06,
|
||
|
|
"loss": 0.1605,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15193787217140198,
|
||
|
|
"step": 3590,
|
||
|
|
"valid_targets_mean": 4209.9,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.789049919484702,
|
||
|
|
"grad_norm": 0.5275609216950717,
|
||
|
|
"learning_rate": 3.546641747721036e-06,
|
||
|
|
"loss": 0.1892,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16384585201740265,
|
||
|
|
"step": 3595,
|
||
|
|
"valid_targets_mean": 4627.3,
|
||
|
|
"valid_targets_min": 1626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.797101449275362,
|
||
|
|
"grad_norm": 0.725525221737914,
|
||
|
|
"learning_rate": 3.501118444302394e-06,
|
||
|
|
"loss": 0.1684,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18506191670894623,
|
||
|
|
"step": 3600,
|
||
|
|
"valid_targets_mean": 3873.7,
|
||
|
|
"valid_targets_min": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.805152979066023,
|
||
|
|
"grad_norm": 0.5082224637456563,
|
||
|
|
"learning_rate": 3.4558611496709384e-06,
|
||
|
|
"loss": 0.1633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.165011465549469,
|
||
|
|
"step": 3605,
|
||
|
|
"valid_targets_mean": 4355.2,
|
||
|
|
"valid_targets_min": 776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.813204508856683,
|
||
|
|
"grad_norm": 0.8025807922419831,
|
||
|
|
"learning_rate": 3.4108705935026685e-06,
|
||
|
|
"loss": 0.1633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15931957960128784,
|
||
|
|
"step": 3610,
|
||
|
|
"valid_targets_mean": 3695.9,
|
||
|
|
"valid_targets_min": 1119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.821256038647343,
|
||
|
|
"grad_norm": 0.5168844656413334,
|
||
|
|
"learning_rate": 3.3661475011730206e-06,
|
||
|
|
"loss": 0.1639,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15300890803337097,
|
||
|
|
"step": 3615,
|
||
|
|
"valid_targets_mean": 4747.3,
|
||
|
|
"valid_targets_min": 1185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.829307568438003,
|
||
|
|
"grad_norm": 0.5249631418738182,
|
||
|
|
"learning_rate": 3.321692593745147e-06,
|
||
|
|
"loss": 0.1617,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15493711829185486,
|
||
|
|
"step": 3620,
|
||
|
|
"valid_targets_mean": 3726.0,
|
||
|
|
"valid_targets_min": 1610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.837359098228664,
|
||
|
|
"grad_norm": 0.5331107342319671,
|
||
|
|
"learning_rate": 3.2775065879582948e-06,
|
||
|
|
"loss": 0.1697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17267771065235138,
|
||
|
|
"step": 3625,
|
||
|
|
"valid_targets_mean": 5503.0,
|
||
|
|
"valid_targets_min": 1978
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.845410628019324,
|
||
|
|
"grad_norm": 0.5222892591386691,
|
||
|
|
"learning_rate": 3.233590196216263e-06,
|
||
|
|
"loss": 0.1659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1816999316215515,
|
||
|
|
"step": 3630,
|
||
|
|
"valid_targets_mean": 4842.2,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.853462157809984,
|
||
|
|
"grad_norm": 0.5132154525081916,
|
||
|
|
"learning_rate": 3.1899441265759036e-06,
|
||
|
|
"loss": 0.1808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19336271286010742,
|
||
|
|
"step": 3635,
|
||
|
|
"valid_targets_mean": 5282.2,
|
||
|
|
"valid_targets_min": 1948
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.861513687600644,
|
||
|
|
"grad_norm": 0.5087976961623909,
|
||
|
|
"learning_rate": 3.1465690827356955e-06,
|
||
|
|
"loss": 0.173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17752087116241455,
|
||
|
|
"step": 3640,
|
||
|
|
"valid_targets_mean": 4912.0,
|
||
|
|
"valid_targets_min": 1166
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.869565217391305,
|
||
|
|
"grad_norm": 0.5723530802904422,
|
||
|
|
"learning_rate": 3.103465764024438e-06,
|
||
|
|
"loss": 0.1659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1567440927028656,
|
||
|
|
"step": 3645,
|
||
|
|
"valid_targets_mean": 3882.6,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.877616747181965,
|
||
|
|
"grad_norm": 0.6212444706400745,
|
||
|
|
"learning_rate": 3.0606348653899288e-06,
|
||
|
|
"loss": 0.1777,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16555482149124146,
|
||
|
|
"step": 3650,
|
||
|
|
"valid_targets_mean": 4280.1,
|
||
|
|
"valid_targets_min": 1429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.885668276972625,
|
||
|
|
"grad_norm": 0.6037643784134114,
|
||
|
|
"learning_rate": 3.0180770773877866e-06,
|
||
|
|
"loss": 0.1767,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1539427787065506,
|
||
|
|
"step": 3655,
|
||
|
|
"valid_targets_mean": 3446.1,
|
||
|
|
"valid_targets_min": 864
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.8937198067632846,
|
||
|
|
"grad_norm": 0.5417204859207484,
|
||
|
|
"learning_rate": 2.9757930861703223e-06,
|
||
|
|
"loss": 0.1546,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1333686113357544,
|
||
|
|
"step": 3660,
|
||
|
|
"valid_targets_mean": 4272.0,
|
||
|
|
"valid_targets_min": 993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.901771336553946,
|
||
|
|
"grad_norm": 0.545060770024728,
|
||
|
|
"learning_rate": 2.9337835734754504e-06,
|
||
|
|
"loss": 0.1685,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16815856099128723,
|
||
|
|
"step": 3665,
|
||
|
|
"valid_targets_mean": 4364.4,
|
||
|
|
"valid_targets_min": 1968
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.909822866344605,
|
||
|
|
"grad_norm": 0.4807913485701676,
|
||
|
|
"learning_rate": 2.892049216615724e-06,
|
||
|
|
"loss": 0.1705,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16856727004051208,
|
||
|
|
"step": 3670,
|
||
|
|
"valid_targets_mean": 4645.9,
|
||
|
|
"valid_targets_min": 1236
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.917874396135265,
|
||
|
|
"grad_norm": 0.5872896027676431,
|
||
|
|
"learning_rate": 2.850590688467405e-06,
|
||
|
|
"loss": 0.1761,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17276933789253235,
|
||
|
|
"step": 3675,
|
||
|
|
"valid_targets_mean": 4079.1,
|
||
|
|
"valid_targets_min": 758
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.925925925925926,
|
||
|
|
"grad_norm": 0.5081772741434722,
|
||
|
|
"learning_rate": 2.8094086574595934e-06,
|
||
|
|
"loss": 0.1846,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17815062403678894,
|
||
|
|
"step": 3680,
|
||
|
|
"valid_targets_mean": 4622.2,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.933977455716586,
|
||
|
|
"grad_norm": 0.5314566948595292,
|
||
|
|
"learning_rate": 2.768503787563497e-06,
|
||
|
|
"loss": 0.1659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15440833568572998,
|
||
|
|
"step": 3685,
|
||
|
|
"valid_targets_mean": 4177.4,
|
||
|
|
"valid_targets_min": 1764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.942028985507246,
|
||
|
|
"grad_norm": 0.5944657537451915,
|
||
|
|
"learning_rate": 2.7278767382816828e-06,
|
||
|
|
"loss": 0.172,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15870711207389832,
|
||
|
|
"step": 3690,
|
||
|
|
"valid_targets_mean": 4034.2,
|
||
|
|
"valid_targets_min": 346
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.950080515297906,
|
||
|
|
"grad_norm": 0.5150609346752764,
|
||
|
|
"learning_rate": 2.687528164637474e-06,
|
||
|
|
"loss": 0.1682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16893523931503296,
|
||
|
|
"step": 3695,
|
||
|
|
"valid_targets_mean": 4341.7,
|
||
|
|
"valid_targets_min": 544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.958132045088567,
|
||
|
|
"grad_norm": 0.622800014898735,
|
||
|
|
"learning_rate": 2.647458717164357e-06,
|
||
|
|
"loss": 0.1686,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1420368254184723,
|
||
|
|
"step": 3700,
|
||
|
|
"valid_targets_mean": 4181.0,
|
||
|
|
"valid_targets_min": 930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.966183574879227,
|
||
|
|
"grad_norm": 0.5118850984097806,
|
||
|
|
"learning_rate": 2.607669041895535e-06,
|
||
|
|
"loss": 0.1689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17651256918907166,
|
||
|
|
"step": 3705,
|
||
|
|
"valid_targets_mean": 5766.2,
|
||
|
|
"valid_targets_min": 2526
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.974235104669887,
|
||
|
|
"grad_norm": 0.5922490816235029,
|
||
|
|
"learning_rate": 2.568159780353476e-06,
|
||
|
|
"loss": 0.173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15372015535831451,
|
||
|
|
"step": 3710,
|
||
|
|
"valid_targets_mean": 3693.7,
|
||
|
|
"valid_targets_min": 614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.982286634460547,
|
||
|
|
"grad_norm": 0.5223810749820793,
|
||
|
|
"learning_rate": 2.5289315695395834e-06,
|
||
|
|
"loss": 0.1709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17238521575927734,
|
||
|
|
"step": 3715,
|
||
|
|
"valid_targets_mean": 3895.1,
|
||
|
|
"valid_targets_min": 703
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.990338164251208,
|
||
|
|
"grad_norm": 0.6140059891446761,
|
||
|
|
"learning_rate": 2.489985041923928e-06,
|
||
|
|
"loss": 0.1763,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18047456443309784,
|
||
|
|
"step": 3720,
|
||
|
|
"valid_targets_mean": 4203.8,
|
||
|
|
"valid_targets_min": 989
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.998389694041868,
|
||
|
|
"grad_norm": 0.5110659722384324,
|
||
|
|
"learning_rate": 2.4513208254350486e-06,
|
||
|
|
"loss": 0.1749,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1803944855928421,
|
||
|
|
"step": 3725,
|
||
|
|
"valid_targets_mean": 4611.9,
|
||
|
|
"valid_targets_min": 1053
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.006441223832528,
|
||
|
|
"grad_norm": 0.46649632563361154,
|
||
|
|
"learning_rate": 2.412939543449828e-06,
|
||
|
|
"loss": 0.1608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1679632067680359,
|
||
|
|
"step": 3730,
|
||
|
|
"valid_targets_mean": 4868.8,
|
||
|
|
"valid_targets_min": 1373
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0144927536231885,
|
||
|
|
"grad_norm": 0.5752464912619374,
|
||
|
|
"learning_rate": 2.3748418147834394e-06,
|
||
|
|
"loss": 0.1728,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14780664443969727,
|
||
|
|
"step": 3735,
|
||
|
|
"valid_targets_mean": 3625.4,
|
||
|
|
"valid_targets_min": 705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.022544283413849,
|
||
|
|
"grad_norm": 0.4873295942554316,
|
||
|
|
"learning_rate": 2.337028253679381e-06,
|
||
|
|
"loss": 0.1575,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14143216609954834,
|
||
|
|
"step": 3740,
|
||
|
|
"valid_targets_mean": 4609.2,
|
||
|
|
"valid_targets_min": 1716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.030595813204509,
|
||
|
|
"grad_norm": 0.4572827913243831,
|
||
|
|
"learning_rate": 2.299499469799542e-06,
|
||
|
|
"loss": 0.1715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1580767184495926,
|
||
|
|
"step": 3745,
|
||
|
|
"valid_targets_mean": 5816.4,
|
||
|
|
"valid_targets_min": 1119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.038647342995169,
|
||
|
|
"grad_norm": 0.5186439466567816,
|
||
|
|
"learning_rate": 2.262256068214421e-06,
|
||
|
|
"loss": 0.1651,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17476163804531097,
|
||
|
|
"step": 3750,
|
||
|
|
"valid_targets_mean": 4437.0,
|
||
|
|
"valid_targets_min": 757
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0466988727858295,
|
||
|
|
"grad_norm": 0.5118711095108422,
|
||
|
|
"learning_rate": 2.2252986493933237e-06,
|
||
|
|
"loss": 0.1684,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16541600227355957,
|
||
|
|
"step": 3755,
|
||
|
|
"valid_targets_mean": 4525.7,
|
||
|
|
"valid_targets_min": 2162
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.05475040257649,
|
||
|
|
"grad_norm": 0.5109434582294012,
|
||
|
|
"learning_rate": 2.18862780919471e-06,
|
||
|
|
"loss": 0.1589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15595577657222748,
|
||
|
|
"step": 3760,
|
||
|
|
"valid_targets_mean": 4511.2,
|
||
|
|
"valid_targets_min": 1010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.06280193236715,
|
||
|
|
"grad_norm": 0.5324485507591655,
|
||
|
|
"learning_rate": 2.152244138856585e-06,
|
||
|
|
"loss": 0.1559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1731036901473999,
|
||
|
|
"step": 3765,
|
||
|
|
"valid_targets_mean": 4594.2,
|
||
|
|
"valid_targets_min": 1788
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.07085346215781,
|
||
|
|
"grad_norm": 0.4936303320259845,
|
||
|
|
"learning_rate": 2.1161482249869513e-06,
|
||
|
|
"loss": 0.1499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14089539647102356,
|
||
|
|
"step": 3770,
|
||
|
|
"valid_targets_mean": 5116.8,
|
||
|
|
"valid_targets_min": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.078904991948471,
|
||
|
|
"grad_norm": 0.5698965758566077,
|
||
|
|
"learning_rate": 2.080340649554369e-06,
|
||
|
|
"loss": 0.1662,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16755038499832153,
|
||
|
|
"step": 3775,
|
||
|
|
"valid_targets_mean": 4225.4,
|
||
|
|
"valid_targets_min": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.086956521739131,
|
||
|
|
"grad_norm": 0.5229210197667631,
|
||
|
|
"learning_rate": 2.044821989878558e-06,
|
||
|
|
"loss": 0.1722,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16623234748840332,
|
||
|
|
"step": 3780,
|
||
|
|
"valid_targets_mean": 4262.9,
|
||
|
|
"valid_targets_min": 1577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.095008051529791,
|
||
|
|
"grad_norm": 0.5459701049906057,
|
||
|
|
"learning_rate": 2.0095928186210956e-06,
|
||
|
|
"loss": 0.1625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.147607684135437,
|
||
|
|
"step": 3785,
|
||
|
|
"valid_targets_mean": 4724.4,
|
||
|
|
"valid_targets_min": 1787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.1030595813204505,
|
||
|
|
"grad_norm": 0.5581543823448861,
|
||
|
|
"learning_rate": 1.974653703776188e-06,
|
||
|
|
"loss": 0.1705,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18220457434654236,
|
||
|
|
"step": 3790,
|
||
|
|
"valid_targets_mean": 5031.3,
|
||
|
|
"valid_targets_min": 392
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.111111111111111,
|
||
|
|
"grad_norm": 0.5197842393741545,
|
||
|
|
"learning_rate": 1.9400052086615153e-06,
|
||
|
|
"loss": 0.175,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1598493456840515,
|
||
|
|
"step": 3795,
|
||
|
|
"valid_targets_mean": 4375.8,
|
||
|
|
"valid_targets_min": 1759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.119162640901771,
|
||
|
|
"grad_norm": 0.5494186611639191,
|
||
|
|
"learning_rate": 1.9056478919091236e-06,
|
||
|
|
"loss": 0.1735,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19067052006721497,
|
||
|
|
"step": 3800,
|
||
|
|
"valid_targets_mean": 4341.0,
|
||
|
|
"valid_targets_min": 692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.127214170692431,
|
||
|
|
"grad_norm": 0.5103976820611801,
|
||
|
|
"learning_rate": 1.8715823074564587e-06,
|
||
|
|
"loss": 0.16,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15958425402641296,
|
||
|
|
"step": 3805,
|
||
|
|
"valid_targets_mean": 4729.6,
|
||
|
|
"valid_targets_min": 362
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.1352657004830915,
|
||
|
|
"grad_norm": 0.5752435508668234,
|
||
|
|
"learning_rate": 1.837809004537401e-06,
|
||
|
|
"loss": 0.1579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1595839112997055,
|
||
|
|
"step": 3810,
|
||
|
|
"valid_targets_mean": 4986.8,
|
||
|
|
"valid_targets_min": 897
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.143317230273752,
|
||
|
|
"grad_norm": 0.6232969103900382,
|
||
|
|
"learning_rate": 1.8043285276734334e-06,
|
||
|
|
"loss": 0.1663,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17083576321601868,
|
||
|
|
"step": 3815,
|
||
|
|
"valid_targets_mean": 3214.8,
|
||
|
|
"valid_targets_min": 547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.151368760064412,
|
||
|
|
"grad_norm": 0.5085611865605083,
|
||
|
|
"learning_rate": 1.7711414166648365e-06,
|
||
|
|
"loss": 0.1522,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15489043295383453,
|
||
|
|
"step": 3820,
|
||
|
|
"valid_targets_mean": 4428.2,
|
||
|
|
"valid_targets_min": 1416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.159420289855072,
|
||
|
|
"grad_norm": 0.5686690339243041,
|
||
|
|
"learning_rate": 1.7382482065820138e-06,
|
||
|
|
"loss": 0.1578,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16790664196014404,
|
||
|
|
"step": 3825,
|
||
|
|
"valid_targets_mean": 4888.5,
|
||
|
|
"valid_targets_min": 1755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.1674718196457325,
|
||
|
|
"grad_norm": 0.6929429650574811,
|
||
|
|
"learning_rate": 1.7056494277568503e-06,
|
||
|
|
"loss": 0.167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1905716359615326,
|
||
|
|
"step": 3830,
|
||
|
|
"valid_targets_mean": 3701.2,
|
||
|
|
"valid_targets_min": 1170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.175523349436393,
|
||
|
|
"grad_norm": 0.6367228356359814,
|
||
|
|
"learning_rate": 1.6733456057741592e-06,
|
||
|
|
"loss": 0.1636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1782214492559433,
|
||
|
|
"step": 3835,
|
||
|
|
"valid_targets_mean": 3788.4,
|
||
|
|
"valid_targets_min": 1742
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.183574879227053,
|
||
|
|
"grad_norm": 0.532748190271915,
|
||
|
|
"learning_rate": 1.641337261463216e-06,
|
||
|
|
"loss": 0.1691,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18782779574394226,
|
||
|
|
"step": 3840,
|
||
|
|
"valid_targets_mean": 4424.2,
|
||
|
|
"valid_targets_min": 745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.191626409017713,
|
||
|
|
"grad_norm": 0.5944352055775861,
|
||
|
|
"learning_rate": 1.6096249108893602e-06,
|
||
|
|
"loss": 0.1591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14986079931259155,
|
||
|
|
"step": 3845,
|
||
|
|
"valid_targets_mean": 4633.4,
|
||
|
|
"valid_targets_min": 817
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.199677938808374,
|
||
|
|
"grad_norm": 0.4718113061336701,
|
||
|
|
"learning_rate": 1.5782090653456616e-06,
|
||
|
|
"loss": 0.1596,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15445095300674438,
|
||
|
|
"step": 3850,
|
||
|
|
"valid_targets_mean": 5366.3,
|
||
|
|
"valid_targets_min": 2465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.207729468599034,
|
||
|
|
"grad_norm": 0.5299032276018538,
|
||
|
|
"learning_rate": 1.547090231344699e-06,
|
||
|
|
"loss": 0.1761,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1652597188949585,
|
||
|
|
"step": 3855,
|
||
|
|
"valid_targets_mean": 4490.8,
|
||
|
|
"valid_targets_min": 2050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.215780998389694,
|
||
|
|
"grad_norm": 0.4937926792080132,
|
||
|
|
"learning_rate": 1.5162689106103746e-06,
|
||
|
|
"loss": 0.1571,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16262544691562653,
|
||
|
|
"step": 3860,
|
||
|
|
"valid_targets_mean": 5297.2,
|
||
|
|
"valid_targets_min": 758
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.223832528180354,
|
||
|
|
"grad_norm": 0.5983118290721653,
|
||
|
|
"learning_rate": 1.4857456000698366e-06,
|
||
|
|
"loss": 0.176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21267688274383545,
|
||
|
|
"step": 3865,
|
||
|
|
"valid_targets_mean": 5259.9,
|
||
|
|
"valid_targets_min": 1772
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.231884057971015,
|
||
|
|
"grad_norm": 0.5186630493468379,
|
||
|
|
"learning_rate": 1.4555207918454662e-06,
|
||
|
|
"loss": 0.1758,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20158061385154724,
|
||
|
|
"step": 3870,
|
||
|
|
"valid_targets_mean": 5251.4,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.239935587761675,
|
||
|
|
"grad_norm": 0.5732567692410732,
|
||
|
|
"learning_rate": 1.4255949732469309e-06,
|
||
|
|
"loss": 0.1649,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15088540315628052,
|
||
|
|
"step": 3875,
|
||
|
|
"valid_targets_mean": 3437.3,
|
||
|
|
"valid_targets_min": 1053
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.247987117552335,
|
||
|
|
"grad_norm": 0.5122360577374575,
|
||
|
|
"learning_rate": 1.3959686267633488e-06,
|
||
|
|
"loss": 0.1817,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1651184856891632,
|
||
|
|
"step": 3880,
|
||
|
|
"valid_targets_mean": 5088.6,
|
||
|
|
"valid_targets_min": 1995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.256038647342995,
|
||
|
|
"grad_norm": 0.48110861215986517,
|
||
|
|
"learning_rate": 1.3666422300554905e-06,
|
||
|
|
"loss": 0.1705,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16647136211395264,
|
||
|
|
"step": 3885,
|
||
|
|
"valid_targets_mean": 5231.4,
|
||
|
|
"valid_targets_min": 2355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.264090177133656,
|
||
|
|
"grad_norm": 0.582851491108072,
|
||
|
|
"learning_rate": 1.3376162559480822e-06,
|
||
|
|
"loss": 0.1769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18534335494041443,
|
||
|
|
"step": 3890,
|
||
|
|
"valid_targets_mean": 4512.3,
|
||
|
|
"valid_targets_min": 2039
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.272141706924316,
|
||
|
|
"grad_norm": 0.5404662794082068,
|
||
|
|
"learning_rate": 1.308891172422193e-06,
|
||
|
|
"loss": 0.1737,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1919654756784439,
|
||
|
|
"step": 3895,
|
||
|
|
"valid_targets_mean": 4829.6,
|
||
|
|
"valid_targets_min": 972
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.280193236714976,
|
||
|
|
"grad_norm": 0.628437639331324,
|
||
|
|
"learning_rate": 1.2804674426076757e-06,
|
||
|
|
"loss": 0.174,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21055009961128235,
|
||
|
|
"step": 3900,
|
||
|
|
"valid_targets_mean": 4256.2,
|
||
|
|
"valid_targets_min": 846
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2882447665056365,
|
||
|
|
"grad_norm": 0.4797286979992782,
|
||
|
|
"learning_rate": 1.2523455247757088e-06,
|
||
|
|
"loss": 0.1575,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14173908531665802,
|
||
|
|
"step": 3905,
|
||
|
|
"valid_targets_mean": 5012.4,
|
||
|
|
"valid_targets_min": 1956
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.296296296296296,
|
||
|
|
"grad_norm": 0.5914383870555415,
|
||
|
|
"learning_rate": 1.224525872331408e-06,
|
||
|
|
"loss": 0.1672,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17256557941436768,
|
||
|
|
"step": 3910,
|
||
|
|
"valid_targets_mean": 3722.4,
|
||
|
|
"valid_targets_min": 1185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.304347826086957,
|
||
|
|
"grad_norm": 0.5475160234396864,
|
||
|
|
"learning_rate": 1.1970089338065071e-06,
|
||
|
|
"loss": 0.1651,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16874639689922333,
|
||
|
|
"step": 3915,
|
||
|
|
"valid_targets_mean": 4964.1,
|
||
|
|
"valid_targets_min": 1411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.312399355877616,
|
||
|
|
"grad_norm": 0.6352630163683538,
|
||
|
|
"learning_rate": 1.1697951528521422e-06,
|
||
|
|
"loss": 0.1722,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16274841129779816,
|
||
|
|
"step": 3920,
|
||
|
|
"valid_targets_mean": 3514.2,
|
||
|
|
"valid_targets_min": 1903
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.320450885668277,
|
||
|
|
"grad_norm": 0.5450852465931986,
|
||
|
|
"learning_rate": 1.1428849682316766e-06,
|
||
|
|
"loss": 0.1545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16449615359306335,
|
||
|
|
"step": 3925,
|
||
|
|
"valid_targets_mean": 4061.4,
|
||
|
|
"valid_targets_min": 1262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.328502415458937,
|
||
|
|
"grad_norm": 0.48987328370522754,
|
||
|
|
"learning_rate": 1.116278813813647e-06,
|
||
|
|
"loss": 0.1556,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13572901487350464,
|
||
|
|
"step": 3930,
|
||
|
|
"valid_targets_mean": 4505.6,
|
||
|
|
"valid_targets_min": 1616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.336553945249597,
|
||
|
|
"grad_norm": 0.503141267860805,
|
||
|
|
"learning_rate": 1.08997711856476e-06,
|
||
|
|
"loss": 0.1612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16356249153614044,
|
||
|
|
"step": 3935,
|
||
|
|
"valid_targets_mean": 4598.4,
|
||
|
|
"valid_targets_min": 1028
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.344605475040257,
|
||
|
|
"grad_norm": 0.7893130983095663,
|
||
|
|
"learning_rate": 1.0639803065429755e-06,
|
||
|
|
"loss": 0.1831,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20338299870491028,
|
||
|
|
"step": 3940,
|
||
|
|
"valid_targets_mean": 5540.6,
|
||
|
|
"valid_targets_min": 2279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.352657004830918,
|
||
|
|
"grad_norm": 0.5464519825128046,
|
||
|
|
"learning_rate": 1.0382887968906718e-06,
|
||
|
|
"loss": 0.1699,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1746448576450348,
|
||
|
|
"step": 3945,
|
||
|
|
"valid_targets_mean": 3995.6,
|
||
|
|
"valid_targets_min": 1677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.360708534621578,
|
||
|
|
"grad_norm": 0.5620117079550653,
|
||
|
|
"learning_rate": 1.012903003827883e-06,
|
||
|
|
"loss": 0.1704,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1740240752696991,
|
||
|
|
"step": 3950,
|
||
|
|
"valid_targets_mean": 3910.2,
|
||
|
|
"valid_targets_min": 1147
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.368760064412238,
|
||
|
|
"grad_norm": 0.5014202584177402,
|
||
|
|
"learning_rate": 9.87823336645628e-07,
|
||
|
|
"loss": 0.166,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15709742903709412,
|
||
|
|
"step": 3955,
|
||
|
|
"valid_targets_mean": 4344.8,
|
||
|
|
"valid_targets_min": 2201
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.3768115942028984,
|
||
|
|
"grad_norm": 0.5362938011516779,
|
||
|
|
"learning_rate": 9.630501996993091e-07,
|
||
|
|
"loss": 0.163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17129208147525787,
|
||
|
|
"step": 3960,
|
||
|
|
"valid_targets_mean": 4717.5,
|
||
|
|
"valid_targets_min": 1989
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.384863123993559,
|
||
|
|
"grad_norm": 0.44355010700619574,
|
||
|
|
"learning_rate": 9.385839924021844e-07,
|
||
|
|
"loss": 0.1773,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14925384521484375,
|
||
|
|
"step": 3965,
|
||
|
|
"valid_targets_mean": 5392.1,
|
||
|
|
"valid_targets_min": 2034
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.392914653784219,
|
||
|
|
"grad_norm": 0.5637410567366077,
|
||
|
|
"learning_rate": 9.144251092189416e-07,
|
||
|
|
"loss": 0.1721,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2033105343580246,
|
||
|
|
"step": 3970,
|
||
|
|
"valid_targets_mean": 4397.6,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.400966183574879,
|
||
|
|
"grad_norm": 0.5246003448080508,
|
||
|
|
"learning_rate": 8.905739396593316e-07,
|
||
|
|
"loss": 0.1636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16925036907196045,
|
||
|
|
"step": 3975,
|
||
|
|
"valid_targets_mean": 4739.1,
|
||
|
|
"valid_targets_min": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4090177133655395,
|
||
|
|
"grad_norm": 0.5406121384840578,
|
||
|
|
"learning_rate": 8.670308682718853e-07,
|
||
|
|
"loss": 0.1587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1594831645488739,
|
||
|
|
"step": 3980,
|
||
|
|
"valid_targets_mean": 4337.4,
|
||
|
|
"valid_targets_min": 800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4170692431562,
|
||
|
|
"grad_norm": 0.4785613416301052,
|
||
|
|
"learning_rate": 8.437962746377204e-07,
|
||
|
|
"loss": 0.1668,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1622602641582489,
|
||
|
|
"step": 3985,
|
||
|
|
"valid_targets_mean": 5365.6,
|
||
|
|
"valid_targets_min": 1316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.42512077294686,
|
||
|
|
"grad_norm": 0.4972685992182721,
|
||
|
|
"learning_rate": 8.208705333644129e-07,
|
||
|
|
"loss": 0.1641,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1673496663570404,
|
||
|
|
"step": 3990,
|
||
|
|
"valid_targets_mean": 4327.2,
|
||
|
|
"valid_targets_min": 873
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.43317230273752,
|
||
|
|
"grad_norm": 0.51768549782515,
|
||
|
|
"learning_rate": 7.982540140799688e-07,
|
||
|
|
"loss": 0.1571,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.161638081073761,
|
||
|
|
"step": 3995,
|
||
|
|
"valid_targets_mean": 4245.7,
|
||
|
|
"valid_targets_min": 1725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4412238325281805,
|
||
|
|
"grad_norm": 0.6255107806087558,
|
||
|
|
"learning_rate": 7.759470814268489e-07,
|
||
|
|
"loss": 0.1531,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16319170594215393,
|
||
|
|
"step": 4000,
|
||
|
|
"valid_targets_mean": 3573.4,
|
||
|
|
"valid_targets_min": 216
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.449275362318841,
|
||
|
|
"grad_norm": 0.5117136147623905,
|
||
|
|
"learning_rate": 7.539500950561063e-07,
|
||
|
|
"loss": 0.1649,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18081682920455933,
|
||
|
|
"step": 4005,
|
||
|
|
"valid_targets_mean": 5030.6,
|
||
|
|
"valid_targets_min": 941
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.457326892109501,
|
||
|
|
"grad_norm": 0.5410913172933577,
|
||
|
|
"learning_rate": 7.322634096215831e-07,
|
||
|
|
"loss": 0.1568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15669681131839752,
|
||
|
|
"step": 4010,
|
||
|
|
"valid_targets_mean": 4348.8,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.465378421900161,
|
||
|
|
"grad_norm": 0.4986868222815654,
|
||
|
|
"learning_rate": 7.108873747741807e-07,
|
||
|
|
"loss": 0.1648,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14274120330810547,
|
||
|
|
"step": 4015,
|
||
|
|
"valid_targets_mean": 4543.1,
|
||
|
|
"valid_targets_min": 1717
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.473429951690822,
|
||
|
|
"grad_norm": 0.469433549650835,
|
||
|
|
"learning_rate": 6.898223351562405e-07,
|
||
|
|
"loss": 0.1686,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15934091806411743,
|
||
|
|
"step": 4020,
|
||
|
|
"valid_targets_mean": 5432.7,
|
||
|
|
"valid_targets_min": 2526
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.481481481481482,
|
||
|
|
"grad_norm": 0.622115989263229,
|
||
|
|
"learning_rate": 6.690686303959748e-07,
|
||
|
|
"loss": 0.1746,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18453718721866608,
|
||
|
|
"step": 4025,
|
||
|
|
"valid_targets_mean": 3467.2,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.489533011272142,
|
||
|
|
"grad_norm": 0.5270621108254152,
|
||
|
|
"learning_rate": 6.48626595101991e-07,
|
||
|
|
"loss": 0.1752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19507281482219696,
|
||
|
|
"step": 4030,
|
||
|
|
"valid_targets_mean": 5011.7,
|
||
|
|
"valid_targets_min": 2115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4975845410628015,
|
||
|
|
"grad_norm": 0.5614424641002262,
|
||
|
|
"learning_rate": 6.284965588579028e-07,
|
||
|
|
"loss": 0.1683,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1735997200012207,
|
||
|
|
"step": 4035,
|
||
|
|
"valid_targets_mean": 4858.7,
|
||
|
|
"valid_targets_min": 1516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.505636070853463,
|
||
|
|
"grad_norm": 0.6277678468034912,
|
||
|
|
"learning_rate": 6.08678846217019e-07,
|
||
|
|
"loss": 0.1553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.173121377825737,
|
||
|
|
"step": 4040,
|
||
|
|
"valid_targets_mean": 4697.3,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.513687600644122,
|
||
|
|
"grad_norm": 0.557886874518275,
|
||
|
|
"learning_rate": 5.891737766970984e-07,
|
||
|
|
"loss": 0.1615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16518916189670563,
|
||
|
|
"step": 4045,
|
||
|
|
"valid_targets_mean": 4401.9,
|
||
|
|
"valid_targets_min": 596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.521739130434782,
|
||
|
|
"grad_norm": 0.5026467498773549,
|
||
|
|
"learning_rate": 5.699816647752077e-07,
|
||
|
|
"loss": 0.1601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16781866550445557,
|
||
|
|
"step": 4050,
|
||
|
|
"valid_targets_mean": 4928.8,
|
||
|
|
"valid_targets_min": 2215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5297906602254425,
|
||
|
|
"grad_norm": 0.5451182471548547,
|
||
|
|
"learning_rate": 5.511028198826496e-07,
|
||
|
|
"loss": 0.1679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17092815041542053,
|
||
|
|
"step": 4055,
|
||
|
|
"valid_targets_mean": 4106.2,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.537842190016103,
|
||
|
|
"grad_norm": 0.5743885198199623,
|
||
|
|
"learning_rate": 5.32537546399976e-07,
|
||
|
|
"loss": 0.1672,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16788442432880402,
|
||
|
|
"step": 4060,
|
||
|
|
"valid_targets_mean": 4289.1,
|
||
|
|
"valid_targets_min": 1684
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.545893719806763,
|
||
|
|
"grad_norm": 0.6037224090967725,
|
||
|
|
"learning_rate": 5.142861436520763e-07,
|
||
|
|
"loss": 0.1685,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20297744870185852,
|
||
|
|
"step": 4065,
|
||
|
|
"valid_targets_mean": 3997.9,
|
||
|
|
"valid_targets_min": 383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.553945249597423,
|
||
|
|
"grad_norm": 0.8388825418667704,
|
||
|
|
"learning_rate": 4.963489059033477e-07,
|
||
|
|
"loss": 0.1642,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16553989052772522,
|
||
|
|
"step": 4070,
|
||
|
|
"valid_targets_mean": 4784.5,
|
||
|
|
"valid_targets_min": 2033
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.561996779388084,
|
||
|
|
"grad_norm": 0.5970930999537574,
|
||
|
|
"learning_rate": 4.787261223529616e-07,
|
||
|
|
"loss": 0.165,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17344771325588226,
|
||
|
|
"step": 4075,
|
||
|
|
"valid_targets_mean": 3786.6,
|
||
|
|
"valid_targets_min": 898
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.570048309178744,
|
||
|
|
"grad_norm": 0.471204909767901,
|
||
|
|
"learning_rate": 4.6141807713019793e-07,
|
||
|
|
"loss": 0.155,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13417160511016846,
|
||
|
|
"step": 4080,
|
||
|
|
"valid_targets_mean": 5032.4,
|
||
|
|
"valid_targets_min": 1912
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.578099838969404,
|
||
|
|
"grad_norm": 0.4884625900862524,
|
||
|
|
"learning_rate": 4.444250492898539e-07,
|
||
|
|
"loss": 0.1746,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16039326786994934,
|
||
|
|
"step": 4085,
|
||
|
|
"valid_targets_mean": 5670.8,
|
||
|
|
"valid_targets_min": 1963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.586151368760064,
|
||
|
|
"grad_norm": 1.3562926760409872,
|
||
|
|
"learning_rate": 4.277473128077625e-07,
|
||
|
|
"loss": 0.1798,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15605992078781128,
|
||
|
|
"step": 4090,
|
||
|
|
"valid_targets_mean": 5004.9,
|
||
|
|
"valid_targets_min": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.594202898550725,
|
||
|
|
"grad_norm": 0.5293266001333903,
|
||
|
|
"learning_rate": 4.113851365763544e-07,
|
||
|
|
"loss": 0.1626,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1735745668411255,
|
||
|
|
"step": 4095,
|
||
|
|
"valid_targets_mean": 5240.4,
|
||
|
|
"valid_targets_min": 1701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.602254428341385,
|
||
|
|
"grad_norm": 0.5578624317503144,
|
||
|
|
"learning_rate": 3.953387844003431e-07,
|
||
|
|
"loss": 0.1607,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1669292151927948,
|
||
|
|
"step": 4100,
|
||
|
|
"valid_targets_mean": 4350.2,
|
||
|
|
"valid_targets_min": 2309
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.610305958132045,
|
||
|
|
"grad_norm": 0.604399390427962,
|
||
|
|
"learning_rate": 3.7960851499245554e-07,
|
||
|
|
"loss": 0.1566,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14910921454429626,
|
||
|
|
"step": 4105,
|
||
|
|
"valid_targets_mean": 3900.4,
|
||
|
|
"valid_targets_min": 823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.618357487922705,
|
||
|
|
"grad_norm": 0.5270613962718504,
|
||
|
|
"learning_rate": 3.6419458196926825e-07,
|
||
|
|
"loss": 0.1713,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1642162799835205,
|
||
|
|
"step": 4110,
|
||
|
|
"valid_targets_mean": 4613.5,
|
||
|
|
"valid_targets_min": 1191
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.626409017713366,
|
||
|
|
"grad_norm": 0.4768249623670376,
|
||
|
|
"learning_rate": 3.4909723384712436e-07,
|
||
|
|
"loss": 0.1647,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15536585450172424,
|
||
|
|
"step": 4115,
|
||
|
|
"valid_targets_mean": 4920.4,
|
||
|
|
"valid_targets_min": 1743
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.634460547504026,
|
||
|
|
"grad_norm": 0.7880877824521434,
|
||
|
|
"learning_rate": 3.3431671403811207e-07,
|
||
|
|
"loss": 0.1651,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14975430071353912,
|
||
|
|
"step": 4120,
|
||
|
|
"valid_targets_mean": 4408.2,
|
||
|
|
"valid_targets_min": 431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.642512077294686,
|
||
|
|
"grad_norm": 0.5055031311515263,
|
||
|
|
"learning_rate": 3.198532608461524e-07,
|
||
|
|
"loss": 0.1627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18191225826740265,
|
||
|
|
"step": 4125,
|
||
|
|
"valid_targets_mean": 5325.4,
|
||
|
|
"valid_targets_min": 881
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.650563607085346,
|
||
|
|
"grad_norm": 0.7377711817281254,
|
||
|
|
"learning_rate": 3.0570710746314903e-07,
|
||
|
|
"loss": 0.1785,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1757345199584961,
|
||
|
|
"step": 4130,
|
||
|
|
"valid_targets_mean": 4204.0,
|
||
|
|
"valid_targets_min": 851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.658615136876007,
|
||
|
|
"grad_norm": 0.49184719888721345,
|
||
|
|
"learning_rate": 2.9187848196524205e-07,
|
||
|
|
"loss": 0.1601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1542222797870636,
|
||
|
|
"step": 4135,
|
||
|
|
"valid_targets_mean": 4731.2,
|
||
|
|
"valid_targets_min": 429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.666666666666667,
|
||
|
|
"grad_norm": 0.6105305368391408,
|
||
|
|
"learning_rate": 2.7836760730910464e-07,
|
||
|
|
"loss": 0.1666,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17159178853034973,
|
||
|
|
"step": 4140,
|
||
|
|
"valid_targets_mean": 3496.8,
|
||
|
|
"valid_targets_min": 397
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.674718196457327,
|
||
|
|
"grad_norm": 0.5519746733795567,
|
||
|
|
"learning_rate": 2.6517470132838117e-07,
|
||
|
|
"loss": 0.1536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1730884313583374,
|
||
|
|
"step": 4145,
|
||
|
|
"valid_targets_mean": 4672.2,
|
||
|
|
"valid_targets_min": 1995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.6827697262479875,
|
||
|
|
"grad_norm": 0.5635266522753974,
|
||
|
|
"learning_rate": 2.522999767301482e-07,
|
||
|
|
"loss": 0.1652,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15275679528713226,
|
||
|
|
"step": 4150,
|
||
|
|
"valid_targets_mean": 3927.0,
|
||
|
|
"valid_targets_min": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.690821256038648,
|
||
|
|
"grad_norm": 0.5061168238375812,
|
||
|
|
"learning_rate": 2.3974364109149886e-07,
|
||
|
|
"loss": 0.1656,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14958354830741882,
|
||
|
|
"step": 4155,
|
||
|
|
"valid_targets_mean": 3901.1,
|
||
|
|
"valid_targets_min": 1810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.698872785829307,
|
||
|
|
"grad_norm": 0.5040398527635441,
|
||
|
|
"learning_rate": 2.2750589685619495e-07,
|
||
|
|
"loss": 0.1654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17061063647270203,
|
||
|
|
"step": 4160,
|
||
|
|
"valid_targets_mean": 4585.4,
|
||
|
|
"valid_targets_min": 1591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.706924315619968,
|
||
|
|
"grad_norm": 0.5035025471661269,
|
||
|
|
"learning_rate": 2.1558694133139823e-07,
|
||
|
|
"loss": 0.1589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16706478595733643,
|
||
|
|
"step": 4165,
|
||
|
|
"valid_targets_mean": 4401.9,
|
||
|
|
"valid_targets_min": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.714975845410628,
|
||
|
|
"grad_norm": 0.5623618030459923,
|
||
|
|
"learning_rate": 2.039869666844929e-07,
|
||
|
|
"loss": 0.1697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1561906784772873,
|
||
|
|
"step": 4170,
|
||
|
|
"valid_targets_mean": 3450.5,
|
||
|
|
"valid_targets_min": 1034
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.723027375201288,
|
||
|
|
"grad_norm": 0.5216795359225037,
|
||
|
|
"learning_rate": 1.9270615993998375e-07,
|
||
|
|
"loss": 0.1618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1636110544204712,
|
||
|
|
"step": 4175,
|
||
|
|
"valid_targets_mean": 4135.8,
|
||
|
|
"valid_targets_min": 1656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.731078904991948,
|
||
|
|
"grad_norm": 0.5368469010480501,
|
||
|
|
"learning_rate": 1.817447029764874e-07,
|
||
|
|
"loss": 0.1627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16674436628818512,
|
||
|
|
"step": 4180,
|
||
|
|
"valid_targets_mean": 4527.6,
|
||
|
|
"valid_targets_min": 1381
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.739130434782608,
|
||
|
|
"grad_norm": 0.6438973279011997,
|
||
|
|
"learning_rate": 1.7110277252379238e-07,
|
||
|
|
"loss": 0.1554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15082503855228424,
|
||
|
|
"step": 4185,
|
||
|
|
"valid_targets_mean": 4736.3,
|
||
|
|
"valid_targets_min": 1379
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.747181964573269,
|
||
|
|
"grad_norm": 0.4998287056476511,
|
||
|
|
"learning_rate": 1.607805401600149e-07,
|
||
|
|
"loss": 0.167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17229287326335907,
|
||
|
|
"step": 4190,
|
||
|
|
"valid_targets_mean": 4969.6,
|
||
|
|
"valid_targets_min": 1283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.755233494363929,
|
||
|
|
"grad_norm": 0.599169633423643,
|
||
|
|
"learning_rate": 1.5077817230883419e-07,
|
||
|
|
"loss": 0.1608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15904340147972107,
|
||
|
|
"step": 4195,
|
||
|
|
"valid_targets_mean": 4552.9,
|
||
|
|
"valid_targets_min": 1866
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.763285024154589,
|
||
|
|
"grad_norm": 0.570759522637566,
|
||
|
|
"learning_rate": 1.4109583023679706e-07,
|
||
|
|
"loss": 0.1662,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15024811029434204,
|
||
|
|
"step": 4200,
|
||
|
|
"valid_targets_mean": 5746.9,
|
||
|
|
"valid_targets_min": 1892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7713365539452495,
|
||
|
|
"grad_norm": 0.5403647002807387,
|
||
|
|
"learning_rate": 1.3173367005073545e-07,
|
||
|
|
"loss": 0.1619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17785856127738953,
|
||
|
|
"step": 4205,
|
||
|
|
"valid_targets_mean": 4047.6,
|
||
|
|
"valid_targets_min": 2002
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.77938808373591,
|
||
|
|
"grad_norm": 0.5226238218670288,
|
||
|
|
"learning_rate": 1.2269184269523282e-07,
|
||
|
|
"loss": 0.1589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16204693913459778,
|
||
|
|
"step": 4210,
|
||
|
|
"valid_targets_mean": 4730.7,
|
||
|
|
"valid_targets_min": 1508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.78743961352657,
|
||
|
|
"grad_norm": 0.5555947999177602,
|
||
|
|
"learning_rate": 1.1397049395020842e-07,
|
||
|
|
"loss": 0.154,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16874821484088898,
|
||
|
|
"step": 4215,
|
||
|
|
"valid_targets_mean": 4100.5,
|
||
|
|
"valid_targets_min": 1171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.79549114331723,
|
||
|
|
"grad_norm": 0.5473832146836152,
|
||
|
|
"learning_rate": 1.0556976442854805e-07,
|
||
|
|
"loss": 0.1628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18583595752716064,
|
||
|
|
"step": 4220,
|
||
|
|
"valid_targets_mean": 4135.4,
|
||
|
|
"valid_targets_min": 344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.8035426731078905,
|
||
|
|
"grad_norm": 0.5690884856327217,
|
||
|
|
"learning_rate": 9.748978957385025e-08,
|
||
|
|
"loss": 0.1629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1753610521554947,
|
||
|
|
"step": 4225,
|
||
|
|
"valid_targets_mean": 4328.1,
|
||
|
|
"valid_targets_min": 669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.811594202898551,
|
||
|
|
"grad_norm": 0.5625787536362429,
|
||
|
|
"learning_rate": 8.9730699658237e-08,
|
||
|
|
"loss": 0.1606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15724310278892517,
|
||
|
|
"step": 4230,
|
||
|
|
"valid_targets_mean": 4711.9,
|
||
|
|
"valid_targets_min": 2299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.819645732689211,
|
||
|
|
"grad_norm": 0.5162132722455169,
|
||
|
|
"learning_rate": 8.229261978025316e-08,
|
||
|
|
"loss": 0.1611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17424245178699493,
|
||
|
|
"step": 4235,
|
||
|
|
"valid_targets_mean": 4760.0,
|
||
|
|
"valid_targets_min": 2276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.827697262479871,
|
||
|
|
"grad_norm": 0.6156039773080834,
|
||
|
|
"learning_rate": 7.517566986285474e-08,
|
||
|
|
"loss": 0.182,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1574200689792633,
|
||
|
|
"step": 4240,
|
||
|
|
"valid_targets_mean": 3681.5,
|
||
|
|
"valid_targets_min": 751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.835748792270532,
|
||
|
|
"grad_norm": 0.5832771610140742,
|
||
|
|
"learning_rate": 6.837996465146823e-08,
|
||
|
|
"loss": 0.1677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17290878295898438,
|
||
|
|
"step": 4245,
|
||
|
|
"valid_targets_mean": 3748.6,
|
||
|
|
"valid_targets_min": 284
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.843800322061192,
|
||
|
|
"grad_norm": 0.5576720036336581,
|
||
|
|
"learning_rate": 6.190561371214321e-08,
|
||
|
|
"loss": 0.1672,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16148659586906433,
|
||
|
|
"step": 4250,
|
||
|
|
"valid_targets_mean": 4166.0,
|
||
|
|
"valid_targets_min": 930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.851851851851852,
|
||
|
|
"grad_norm": 0.5878319666889452,
|
||
|
|
"learning_rate": 5.575272142978927e-08,
|
||
|
|
"loss": 0.1799,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.172722727060318,
|
||
|
|
"step": 4255,
|
||
|
|
"valid_targets_mean": 4018.2,
|
||
|
|
"valid_targets_min": 754
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.859903381642512,
|
||
|
|
"grad_norm": 0.479443149906381,
|
||
|
|
"learning_rate": 4.992138700649074e-08,
|
||
|
|
"loss": 0.1607,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13948974013328552,
|
||
|
|
"step": 4260,
|
||
|
|
"valid_targets_mean": 4858.1,
|
||
|
|
"valid_targets_min": 1813
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.867954911433173,
|
||
|
|
"grad_norm": 0.5541385500661399,
|
||
|
|
"learning_rate": 4.4411704459903506e-08,
|
||
|
|
"loss": 0.167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1660829335451126,
|
||
|
|
"step": 4265,
|
||
|
|
"valid_targets_mean": 4232.4,
|
||
|
|
"valid_targets_min": 1054
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.876006441223833,
|
||
|
|
"grad_norm": 0.6179225703483298,
|
||
|
|
"learning_rate": 3.92237626217451e-08,
|
||
|
|
"loss": 0.1713,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16906727850437164,
|
||
|
|
"step": 4270,
|
||
|
|
"valid_targets_mean": 4460.8,
|
||
|
|
"valid_targets_min": 1530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.884057971014493,
|
||
|
|
"grad_norm": 0.5783966854225258,
|
||
|
|
"learning_rate": 3.435764513635809e-08,
|
||
|
|
"loss": 0.1671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17848993837833405,
|
||
|
|
"step": 4275,
|
||
|
|
"valid_targets_mean": 4309.5,
|
||
|
|
"valid_targets_min": 1717
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.892109500805153,
|
||
|
|
"grad_norm": 0.5351867682144865,
|
||
|
|
"learning_rate": 2.9813430459364465e-08,
|
||
|
|
"loss": 0.1707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17882059514522552,
|
||
|
|
"step": 4280,
|
||
|
|
"valid_targets_mean": 4416.9,
|
||
|
|
"valid_targets_min": 1611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.900161030595813,
|
||
|
|
"grad_norm": 0.573670983161385,
|
||
|
|
"learning_rate": 2.5591191856397802e-08,
|
||
|
|
"loss": 0.1714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15430384874343872,
|
||
|
|
"step": 4285,
|
||
|
|
"valid_targets_mean": 4570.9,
|
||
|
|
"valid_targets_min": 1407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.908212560386474,
|
||
|
|
"grad_norm": 0.5344897585917823,
|
||
|
|
"learning_rate": 2.1690997401928593e-08,
|
||
|
|
"loss": 0.1538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16887977719306946,
|
||
|
|
"step": 4290,
|
||
|
|
"valid_targets_mean": 4439.5,
|
||
|
|
"valid_targets_min": 2172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.916264090177133,
|
||
|
|
"grad_norm": 0.551697601245166,
|
||
|
|
"learning_rate": 1.811290997815851e-08,
|
||
|
|
"loss": 0.1762,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17137417197227478,
|
||
|
|
"step": 4295,
|
||
|
|
"valid_targets_mean": 4672.2,
|
||
|
|
"valid_targets_min": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.9243156199677935,
|
||
|
|
"grad_norm": 0.5535638309128108,
|
||
|
|
"learning_rate": 1.485698727400564e-08,
|
||
|
|
"loss": 0.1673,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14466409385204315,
|
||
|
|
"step": 4300,
|
||
|
|
"valid_targets_mean": 3469.3,
|
||
|
|
"valid_targets_min": 1197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.932367149758454,
|
||
|
|
"grad_norm": 0.5119337022576111,
|
||
|
|
"learning_rate": 1.1923281784185226e-08,
|
||
|
|
"loss": 0.1673,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16587556898593903,
|
||
|
|
"step": 4305,
|
||
|
|
"valid_targets_mean": 4396.5,
|
||
|
|
"valid_targets_min": 1299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.940418679549114,
|
||
|
|
"grad_norm": 0.5923102041383612,
|
||
|
|
"learning_rate": 9.311840808357009e-09,
|
||
|
|
"loss": 0.1712,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1823520064353943,
|
||
|
|
"step": 4310,
|
||
|
|
"valid_targets_mean": 4216.1,
|
||
|
|
"valid_targets_min": 1303
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.948470209339774,
|
||
|
|
"grad_norm": 0.5103445914494756,
|
||
|
|
"learning_rate": 7.022706450354744e-09,
|
||
|
|
"loss": 0.1677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15162253379821777,
|
||
|
|
"step": 4315,
|
||
|
|
"valid_targets_mean": 5503.6,
|
||
|
|
"valid_targets_min": 2231
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.956521739130435,
|
||
|
|
"grad_norm": 0.533406608295802,
|
||
|
|
"learning_rate": 5.055915617522278e-09,
|
||
|
|
"loss": 0.158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14400282502174377,
|
||
|
|
"step": 4320,
|
||
|
|
"valid_targets_mean": 4314.7,
|
||
|
|
"valid_targets_min": 1448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.964573268921095,
|
||
|
|
"grad_norm": 0.6879701190779768,
|
||
|
|
"learning_rate": 3.411500020109593e-09,
|
||
|
|
"loss": 0.161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1823330670595169,
|
||
|
|
"step": 4325,
|
||
|
|
"valid_targets_mean": 3070.6,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.972624798711755,
|
||
|
|
"grad_norm": 0.6418600229930465,
|
||
|
|
"learning_rate": 2.08948617075988e-09,
|
||
|
|
"loss": 0.1642,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15779449045658112,
|
||
|
|
"step": 4330,
|
||
|
|
"valid_targets_mean": 3922.4,
|
||
|
|
"valid_targets_min": 1291
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.980676328502415,
|
||
|
|
"grad_norm": 0.5848882869027459,
|
||
|
|
"learning_rate": 1.0898953840898786e-09,
|
||
|
|
"loss": 0.1616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18286889791488647,
|
||
|
|
"step": 4335,
|
||
|
|
"valid_targets_mean": 4633.8,
|
||
|
|
"valid_targets_min": 695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.988727858293076,
|
||
|
|
"grad_norm": 0.45093430680878505,
|
||
|
|
"learning_rate": 4.127437763390418e-10,
|
||
|
|
"loss": 0.1679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15890920162200928,
|
||
|
|
"step": 4340,
|
||
|
|
"valid_targets_mean": 5343.4,
|
||
|
|
"valid_targets_min": 2478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.996779388083736,
|
||
|
|
"grad_norm": 0.5378585489037748,
|
||
|
|
"learning_rate": 5.804226511196831e-11,
|
||
|
|
"loss": 0.1718,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15117883682250977,
|
||
|
|
"step": 4345,
|
||
|
|
"valid_targets_mean": 3975.8,
|
||
|
|
"valid_targets_min": 2081
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17696434259414673,
|
||
|
|
"step": 4347,
|
||
|
|
"total_flos": 1518375161495552.0,
|
||
|
|
"train_loss": 0.21155054198480733,
|
||
|
|
"train_runtime": 25607.106,
|
||
|
|
"train_samples_per_second": 2.714,
|
||
|
|
"train_steps_per_second": 0.17,
|
||
|
|
"valid_targets_mean": 4512.6,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 4347,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 1500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 1518375161495552.0,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|