3337 lines
92 KiB
JSON
3337 lines
92 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1495,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.016722408026755852,
|
|
"grad_norm": 1.7245030312935132,
|
|
"learning_rate": 1.066666666666667e-06,
|
|
"loss": 0.6095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.61011803150177,
|
|
"step": 5,
|
|
"valid_targets_mean": 4391.3,
|
|
"valid_targets_min": 2139
|
|
},
|
|
{
|
|
"epoch": 0.033444816053511704,
|
|
"grad_norm": 1.624140492424831,
|
|
"learning_rate": 2.4000000000000003e-06,
|
|
"loss": 0.6041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6080305576324463,
|
|
"step": 10,
|
|
"valid_targets_mean": 4457.5,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 0.05016722408026756,
|
|
"grad_norm": 0.7928560412243121,
|
|
"learning_rate": 3.7333333333333337e-06,
|
|
"loss": 0.5469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5168237090110779,
|
|
"step": 15,
|
|
"valid_targets_mean": 5488.9,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 0.06688963210702341,
|
|
"grad_norm": 0.7205074736901347,
|
|
"learning_rate": 5.0666666666666676e-06,
|
|
"loss": 0.5019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4919741153717041,
|
|
"step": 20,
|
|
"valid_targets_mean": 5287.0,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 0.08361204013377926,
|
|
"grad_norm": 0.534571821916365,
|
|
"learning_rate": 6.4000000000000006e-06,
|
|
"loss": 0.4857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.477079302072525,
|
|
"step": 25,
|
|
"valid_targets_mean": 5306.9,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 0.10033444816053512,
|
|
"grad_norm": 0.44837090708345023,
|
|
"learning_rate": 7.733333333333334e-06,
|
|
"loss": 0.4705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4555327892303467,
|
|
"step": 30,
|
|
"valid_targets_mean": 5720.4,
|
|
"valid_targets_min": 2222
|
|
},
|
|
{
|
|
"epoch": 0.11705685618729098,
|
|
"grad_norm": 0.35788786516329824,
|
|
"learning_rate": 9.066666666666667e-06,
|
|
"loss": 0.4417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44564685225486755,
|
|
"step": 35,
|
|
"valid_targets_mean": 5430.3,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 0.13377926421404682,
|
|
"grad_norm": 0.31240093691004855,
|
|
"learning_rate": 1.04e-05,
|
|
"loss": 0.4382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4372028410434723,
|
|
"step": 40,
|
|
"valid_targets_mean": 5302.0,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 0.1505016722408027,
|
|
"grad_norm": 0.29715421306979134,
|
|
"learning_rate": 1.1733333333333335e-05,
|
|
"loss": 0.4157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41423460841178894,
|
|
"step": 45,
|
|
"valid_targets_mean": 5404.0,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 0.16722408026755853,
|
|
"grad_norm": 0.2763389239522811,
|
|
"learning_rate": 1.3066666666666668e-05,
|
|
"loss": 0.4069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41930079460144043,
|
|
"step": 50,
|
|
"valid_targets_mean": 4993.2,
|
|
"valid_targets_min": 1994
|
|
},
|
|
{
|
|
"epoch": 0.18394648829431437,
|
|
"grad_norm": 0.3019811563327018,
|
|
"learning_rate": 1.4400000000000001e-05,
|
|
"loss": 0.3584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29694250226020813,
|
|
"step": 55,
|
|
"valid_targets_mean": 6149.0,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 0.20066889632107024,
|
|
"grad_norm": 0.2682535799475,
|
|
"learning_rate": 1.5733333333333334e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2889573276042938,
|
|
"step": 60,
|
|
"valid_targets_mean": 6292.9,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 0.21739130434782608,
|
|
"grad_norm": 0.26850576964528255,
|
|
"learning_rate": 1.706666666666667e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28359195590019226,
|
|
"step": 65,
|
|
"valid_targets_mean": 6224.4,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 0.23411371237458195,
|
|
"grad_norm": 0.2171336590087016,
|
|
"learning_rate": 1.8400000000000003e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649698257446289,
|
|
"step": 70,
|
|
"valid_targets_mean": 6282.0,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 0.2508361204013378,
|
|
"grad_norm": 0.21962923752273064,
|
|
"learning_rate": 1.9733333333333336e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535257637500763,
|
|
"step": 75,
|
|
"valid_targets_mean": 6212.7,
|
|
"valid_targets_min": 3362
|
|
},
|
|
{
|
|
"epoch": 0.26755852842809363,
|
|
"grad_norm": 0.3424384488222486,
|
|
"learning_rate": 2.1066666666666666e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4487169682979584,
|
|
"step": 80,
|
|
"valid_targets_mean": 6572.3,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 0.2842809364548495,
|
|
"grad_norm": 0.2506953591604314,
|
|
"learning_rate": 2.2400000000000002e-05,
|
|
"loss": 0.437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4307152330875397,
|
|
"step": 85,
|
|
"valid_targets_mean": 7014.9,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 0.3010033444816054,
|
|
"grad_norm": 0.2576531612366157,
|
|
"learning_rate": 2.373333333333334e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.415714830160141,
|
|
"step": 90,
|
|
"valid_targets_mean": 7358.1,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 0.3177257525083612,
|
|
"grad_norm": 0.2397054199288198,
|
|
"learning_rate": 2.5066666666666672e-05,
|
|
"loss": 0.409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4056137502193451,
|
|
"step": 95,
|
|
"valid_targets_mean": 6830.7,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 0.33444816053511706,
|
|
"grad_norm": 0.24629116268464377,
|
|
"learning_rate": 2.6400000000000005e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22848910093307495,
|
|
"step": 100,
|
|
"valid_targets_mean": 6237.5,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 0.3511705685618729,
|
|
"grad_norm": 0.24910802980831465,
|
|
"learning_rate": 2.7733333333333338e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23061847686767578,
|
|
"step": 105,
|
|
"valid_targets_mean": 6085.3,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 0.36789297658862874,
|
|
"grad_norm": 0.22223000690921413,
|
|
"learning_rate": 2.906666666666667e-05,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22238491475582123,
|
|
"step": 110,
|
|
"valid_targets_mean": 5970.5,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 0.38461538461538464,
|
|
"grad_norm": 0.2001390317036958,
|
|
"learning_rate": 3.0400000000000004e-05,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20133914053440094,
|
|
"step": 115,
|
|
"valid_targets_mean": 5825.3,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 0.4013377926421405,
|
|
"grad_norm": 0.19885004548254503,
|
|
"learning_rate": 3.173333333333334e-05,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2087135761976242,
|
|
"step": 120,
|
|
"valid_targets_mean": 5954.2,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 0.4180602006688963,
|
|
"grad_norm": 0.1992290882112083,
|
|
"learning_rate": 3.3066666666666666e-05,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20589184761047363,
|
|
"step": 125,
|
|
"valid_targets_mean": 6130.7,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 0.43478260869565216,
|
|
"grad_norm": 0.20745021595110374,
|
|
"learning_rate": 3.44e-05,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20443178713321686,
|
|
"step": 130,
|
|
"valid_targets_mean": 5785.0,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 0.451505016722408,
|
|
"grad_norm": 0.19259367440321293,
|
|
"learning_rate": 3.573333333333333e-05,
|
|
"loss": 0.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19859756529331207,
|
|
"step": 135,
|
|
"valid_targets_mean": 6841.9,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 0.4682274247491639,
|
|
"grad_norm": 0.21765254448892532,
|
|
"learning_rate": 3.706666666666667e-05,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20548826456069946,
|
|
"step": 140,
|
|
"valid_targets_mean": 5637.8,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 0.48494983277591974,
|
|
"grad_norm": 0.34411406995359606,
|
|
"learning_rate": 3.8400000000000005e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4181755483150482,
|
|
"step": 145,
|
|
"valid_targets_mean": 4637.5,
|
|
"valid_targets_min": 2037
|
|
},
|
|
{
|
|
"epoch": 0.5016722408026756,
|
|
"grad_norm": 0.3082956901441369,
|
|
"learning_rate": 3.9733333333333335e-05,
|
|
"loss": 0.4008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4054408371448517,
|
|
"step": 150,
|
|
"valid_targets_mean": 4843.1,
|
|
"valid_targets_min": 1913
|
|
},
|
|
{
|
|
"epoch": 0.5183946488294314,
|
|
"grad_norm": 0.2965599280874575,
|
|
"learning_rate": 3.999912708491203e-05,
|
|
"loss": 0.395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38285741209983826,
|
|
"step": 155,
|
|
"valid_targets_mean": 4593.1,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 0.5351170568561873,
|
|
"grad_norm": 0.26345274846839833,
|
|
"learning_rate": 3.9995580997960116e-05,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3705311119556427,
|
|
"step": 160,
|
|
"valid_targets_mean": 4561.8,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 0.5518394648829431,
|
|
"grad_norm": 0.26837189335189193,
|
|
"learning_rate": 3.998930766523859e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3551887571811676,
|
|
"step": 165,
|
|
"valid_targets_mean": 4529.3,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 0.568561872909699,
|
|
"grad_norm": 0.284434934811865,
|
|
"learning_rate": 3.9980307942382504e-05,
|
|
"loss": 0.3632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36333727836608887,
|
|
"step": 170,
|
|
"valid_targets_mean": 4488.3,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 0.5852842809364549,
|
|
"grad_norm": 0.25676025193538143,
|
|
"learning_rate": 3.9968583056885895e-05,
|
|
"loss": 0.3698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3708552420139313,
|
|
"step": 175,
|
|
"valid_targets_mean": 4814.0,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 0.6020066889632107,
|
|
"grad_norm": 0.34519669738003705,
|
|
"learning_rate": 3.995413460793431e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3409523069858551,
|
|
"step": 180,
|
|
"valid_targets_mean": 4526.5,
|
|
"valid_targets_min": 2181
|
|
},
|
|
{
|
|
"epoch": 0.6187290969899666,
|
|
"grad_norm": 0.322672098987964,
|
|
"learning_rate": 3.9936964566186735e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.318267285823822,
|
|
"step": 185,
|
|
"valid_targets_mean": 4525.8,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 0.6354515050167224,
|
|
"grad_norm": 0.28803537408637997,
|
|
"learning_rate": 3.991707527350679e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29651132225990295,
|
|
"step": 190,
|
|
"valid_targets_mean": 4629.6,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 0.6521739130434783,
|
|
"grad_norm": 0.28093349762265185,
|
|
"learning_rate": 3.989446944264332e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2959485352039337,
|
|
"step": 195,
|
|
"valid_targets_mean": 4321.1,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 0.6688963210702341,
|
|
"grad_norm": 0.31127990832964925,
|
|
"learning_rate": 3.98691501568604e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27535346150398254,
|
|
"step": 200,
|
|
"valid_targets_mean": 4405.4,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 0.68561872909699,
|
|
"grad_norm": 0.28207944703004695,
|
|
"learning_rate": 3.9841120869516815e-05,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27812016010284424,
|
|
"step": 205,
|
|
"valid_targets_mean": 4333.9,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 0.7023411371237458,
|
|
"grad_norm": 0.27736191150225886,
|
|
"learning_rate": 3.9810385403595004e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29039719700813293,
|
|
"step": 210,
|
|
"valid_targets_mean": 4402.1,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 0.7190635451505016,
|
|
"grad_norm": 0.3242898315315502,
|
|
"learning_rate": 3.977694795117969e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31549105048179626,
|
|
"step": 215,
|
|
"valid_targets_mean": 4771.9,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 0.7357859531772575,
|
|
"grad_norm": 0.24565322352847732,
|
|
"learning_rate": 3.974081307288607e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681817412376404,
|
|
"step": 220,
|
|
"valid_targets_mean": 4539.5,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 0.7525083612040134,
|
|
"grad_norm": 0.4658927198258249,
|
|
"learning_rate": 3.970198569723779e-05,
|
|
"loss": 0.5155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3206879794597626,
|
|
"step": 225,
|
|
"valid_targets_mean": 3351.5,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 0.7692307692307693,
|
|
"grad_norm": 0.3132650030949883,
|
|
"learning_rate": 3.966047111999477e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30487164855003357,
|
|
"step": 230,
|
|
"valid_targets_mean": 3843.1,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 0.7859531772575251,
|
|
"grad_norm": 0.6991339613682909,
|
|
"learning_rate": 3.9616275003430836e-05,
|
|
"loss": 0.5737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3571525812149048,
|
|
"step": 235,
|
|
"valid_targets_mean": 2841.5,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 0.802675585284281,
|
|
"grad_norm": 0.39882258778738783,
|
|
"learning_rate": 3.9569403375561475e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861405313014984,
|
|
"step": 240,
|
|
"valid_targets_mean": 4773.6,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 0.8193979933110368,
|
|
"grad_norm": 0.8547662584274843,
|
|
"learning_rate": 3.9519862629321645e-05,
|
|
"loss": 0.5134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6095802187919617,
|
|
"step": 245,
|
|
"valid_targets_mean": 718.6,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 0.8361204013377926,
|
|
"grad_norm": 0.2961625998638665,
|
|
"learning_rate": 3.9467659521693836e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27202290296554565,
|
|
"step": 250,
|
|
"valid_targets_mean": 4782.0,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 0.8528428093645485,
|
|
"grad_norm": 1.011866205178202,
|
|
"learning_rate": 3.9412801172786453e-05,
|
|
"loss": 0.4749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6505089402198792,
|
|
"step": 255,
|
|
"valid_targets_mean": 711.2,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 0.8695652173913043,
|
|
"grad_norm": 0.2791415574080233,
|
|
"learning_rate": 3.935529506486272e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27265265583992004,
|
|
"step": 260,
|
|
"valid_targets_mean": 4644.9,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.8862876254180602,
|
|
"grad_norm": 0.9130625473905867,
|
|
"learning_rate": 3.9295149041320087e-05,
|
|
"loss": 0.4796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.59287428855896,
|
|
"step": 265,
|
|
"valid_targets_mean": 735.2,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 0.903010033444816,
|
|
"grad_norm": 0.27384980992154423,
|
|
"learning_rate": 3.923237130562054e-05,
|
|
"loss": 0.3604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29318317770957947,
|
|
"step": 270,
|
|
"valid_targets_mean": 4780.1,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 0.919732441471572,
|
|
"grad_norm": 1.2863030433776386,
|
|
"learning_rate": 3.916697042017165e-05,
|
|
"loss": 0.4278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6375625133514404,
|
|
"step": 275,
|
|
"valid_targets_mean": 692.4,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 0.9364548494983278,
|
|
"grad_norm": 0.3633438212857499,
|
|
"learning_rate": 3.909895530515874e-05,
|
|
"loss": 0.4214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29790613055229187,
|
|
"step": 280,
|
|
"valid_targets_mean": 4800.1,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 0.9531772575250836,
|
|
"grad_norm": 1.4192481716536165,
|
|
"learning_rate": 3.902833523732824e-05,
|
|
"loss": 0.3486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6474001407623291,
|
|
"step": 285,
|
|
"valid_targets_mean": 671.4,
|
|
"valid_targets_min": 175
|
|
},
|
|
{
|
|
"epoch": 0.9698996655518395,
|
|
"grad_norm": 0.40183371605769863,
|
|
"learning_rate": 3.895511984872241e-05,
|
|
"loss": 0.4674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006552457809448,
|
|
"step": 290,
|
|
"valid_targets_mean": 4780.8,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 0.9866220735785953,
|
|
"grad_norm": 0.267962660255704,
|
|
"learning_rate": 3.887931912536561e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28743860125541687,
|
|
"step": 295,
|
|
"valid_targets_mean": 3581.8,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 1.0033444816053512,
|
|
"grad_norm": 0.6093597417751938,
|
|
"learning_rate": 3.8800943405902275e-05,
|
|
"loss": 0.5388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4148422181606293,
|
|
"step": 300,
|
|
"valid_targets_mean": 4744.1,
|
|
"valid_targets_min": 1979
|
|
},
|
|
{
|
|
"epoch": 1.020066889632107,
|
|
"grad_norm": 0.4592272597393982,
|
|
"learning_rate": 3.8720003380186785e-05,
|
|
"loss": 0.4122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4079246520996094,
|
|
"step": 305,
|
|
"valid_targets_mean": 4749.8,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 1.0367892976588629,
|
|
"grad_norm": 0.2457894053645451,
|
|
"learning_rate": 3.863651008782549e-05,
|
|
"loss": 0.3912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3785780966281891,
|
|
"step": 310,
|
|
"valid_targets_mean": 4249.1,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 1.0535117056856187,
|
|
"grad_norm": 0.24947510777727397,
|
|
"learning_rate": 3.855047491667094e-05,
|
|
"loss": 0.3536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3452471196651459,
|
|
"step": 315,
|
|
"valid_targets_mean": 4861.7,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 1.0702341137123745,
|
|
"grad_norm": 0.21154619985515752,
|
|
"learning_rate": 3.846190960126873e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33000990748405457,
|
|
"step": 320,
|
|
"valid_targets_mean": 5420.3,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 1.0869565217391304,
|
|
"grad_norm": 0.21992928939009868,
|
|
"learning_rate": 3.837082622125694e-05,
|
|
"loss": 0.3363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3382793664932251,
|
|
"step": 325,
|
|
"valid_targets_mean": 5058.1,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 1.1036789297658862,
|
|
"grad_norm": 0.22532191173117702,
|
|
"learning_rate": 3.827723719971858e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3184288442134857,
|
|
"step": 330,
|
|
"valid_targets_mean": 5303.0,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 1.120401337792642,
|
|
"grad_norm": 0.2042403427663198,
|
|
"learning_rate": 3.818115530148721e-05,
|
|
"loss": 0.3249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3313908278942108,
|
|
"step": 335,
|
|
"valid_targets_mean": 4888.6,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 1.137123745819398,
|
|
"grad_norm": 0.2113989978626341,
|
|
"learning_rate": 3.808259363140588e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3202518820762634,
|
|
"step": 340,
|
|
"valid_targets_mean": 5227.7,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 1.1538461538461537,
|
|
"grad_norm": 0.18252806016422507,
|
|
"learning_rate": 3.7981565632539695e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3117549419403076,
|
|
"step": 345,
|
|
"valid_targets_mean": 5713.1,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 1.1705685618729098,
|
|
"grad_norm": 0.19139045064244864,
|
|
"learning_rate": 3.7878085084342375e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3228846490383148,
|
|
"step": 350,
|
|
"valid_targets_mean": 5431.2,
|
|
"valid_targets_min": 2059
|
|
},
|
|
{
|
|
"epoch": 1.1872909698996654,
|
|
"grad_norm": 0.16601714910583143,
|
|
"learning_rate": 3.777216610077676e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21630533039569855,
|
|
"step": 355,
|
|
"valid_targets_mean": 6543.9,
|
|
"valid_targets_min": 2505
|
|
},
|
|
{
|
|
"epoch": 1.2040133779264215,
|
|
"grad_norm": 0.19207578400249292,
|
|
"learning_rate": 3.766382312838981e-05,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2166496068239212,
|
|
"step": 360,
|
|
"valid_targets_mean": 6602.0,
|
|
"valid_targets_min": 3272
|
|
},
|
|
{
|
|
"epoch": 1.2207357859531773,
|
|
"grad_norm": 0.18069657462503616,
|
|
"learning_rate": 3.75530709443422e-05,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20401179790496826,
|
|
"step": 365,
|
|
"valid_targets_mean": 6464.9,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 1.2374581939799332,
|
|
"grad_norm": 0.20008780485704025,
|
|
"learning_rate": 3.743992465439281e-05,
|
|
"loss": 0.2121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20383743941783905,
|
|
"step": 370,
|
|
"valid_targets_mean": 6352.6,
|
|
"valid_targets_min": 1681
|
|
},
|
|
{
|
|
"epoch": 1.254180602006689,
|
|
"grad_norm": 0.2090526902380887,
|
|
"learning_rate": 3.732439969083845e-05,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22135812044143677,
|
|
"step": 375,
|
|
"valid_targets_mean": 6400.7,
|
|
"valid_targets_min": 2589
|
|
},
|
|
{
|
|
"epoch": 1.2709030100334449,
|
|
"grad_norm": 0.1919090529284783,
|
|
"learning_rate": 3.7206511810408964e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3602113723754883,
|
|
"step": 380,
|
|
"valid_targets_mean": 6548.3,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 1.2876254180602007,
|
|
"grad_norm": 0.24676327799469464,
|
|
"learning_rate": 3.708627709211818e-05,
|
|
"loss": 0.362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3631698191165924,
|
|
"step": 385,
|
|
"valid_targets_mean": 7437.5,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 1.3043478260869565,
|
|
"grad_norm": 0.17717675885306233,
|
|
"learning_rate": 3.6963711935070824e-05,
|
|
"loss": 0.3543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34016939997673035,
|
|
"step": 390,
|
|
"valid_targets_mean": 7530.9,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.3210702341137124,
|
|
"grad_norm": 0.18878808771817251,
|
|
"learning_rate": 3.683883305622582e-05,
|
|
"loss": 0.3483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3341453969478607,
|
|
"step": 395,
|
|
"valid_targets_mean": 7109.4,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 1.3377926421404682,
|
|
"grad_norm": 0.19607794599883782,
|
|
"learning_rate": 3.6711657488116185e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18399910628795624,
|
|
"step": 400,
|
|
"valid_targets_mean": 6218.3,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 1.354515050167224,
|
|
"grad_norm": 0.18217893369868915,
|
|
"learning_rate": 3.6582202576526e-05,
|
|
"loss": 0.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18440212309360504,
|
|
"step": 405,
|
|
"valid_targets_mean": 6090.0,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 1.37123745819398,
|
|
"grad_norm": 0.1857925270997747,
|
|
"learning_rate": 3.6450485978124494e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17455260455608368,
|
|
"step": 410,
|
|
"valid_targets_mean": 5632.2,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 1.3879598662207357,
|
|
"grad_norm": 0.16394718108592246,
|
|
"learning_rate": 3.6316525658057876e-05,
|
|
"loss": 0.1711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17674054205417633,
|
|
"step": 415,
|
|
"valid_targets_mean": 5734.3,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 1.4046822742474916,
|
|
"grad_norm": 0.19453873887472128,
|
|
"learning_rate": 3.6180339887498953e-05,
|
|
"loss": 0.1719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1782982349395752,
|
|
"step": 420,
|
|
"valid_targets_mean": 5851.3,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 1.4214046822742474,
|
|
"grad_norm": 0.15308386340839525,
|
|
"learning_rate": 3.604194724115515e-05,
|
|
"loss": 0.1688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16447855532169342,
|
|
"step": 425,
|
|
"valid_targets_mean": 5988.3,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 1.4381270903010033,
|
|
"grad_norm": 0.1638769012447472,
|
|
"learning_rate": 3.590136659473502e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15939190983772278,
|
|
"step": 430,
|
|
"valid_targets_mean": 5928.3,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 1.4548494983277591,
|
|
"grad_norm": 0.1865863393490602,
|
|
"learning_rate": 3.575861712237375e-05,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16939140856266022,
|
|
"step": 435,
|
|
"valid_targets_mean": 6708.4,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 1.471571906354515,
|
|
"grad_norm": 0.15924997227607846,
|
|
"learning_rate": 3.561371829401796e-05,
|
|
"loss": 0.171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16857747733592987,
|
|
"step": 440,
|
|
"valid_targets_mean": 5901.7,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 1.488294314381271,
|
|
"grad_norm": 0.254097411212028,
|
|
"learning_rate": 3.546668987277014e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32115718722343445,
|
|
"step": 445,
|
|
"valid_targets_mean": 4722.6,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 1.5050167224080266,
|
|
"grad_norm": 0.24044078707975364,
|
|
"learning_rate": 3.531755191219312e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31480270624160767,
|
|
"step": 450,
|
|
"valid_targets_mean": 4651.1,
|
|
"valid_targets_min": 1681
|
|
},
|
|
{
|
|
"epoch": 1.5217391304347827,
|
|
"grad_norm": 0.2489736102838297,
|
|
"learning_rate": 3.516632475357491e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31182220578193665,
|
|
"step": 455,
|
|
"valid_targets_mean": 4644.5,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 1.5384615384615383,
|
|
"grad_norm": 0.25278396588972096,
|
|
"learning_rate": 3.501302902315432e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.320780485868454,
|
|
"step": 460,
|
|
"valid_targets_mean": 4504.0,
|
|
"valid_targets_min": 1994
|
|
},
|
|
{
|
|
"epoch": 1.5551839464882944,
|
|
"grad_norm": 0.2272679305070563,
|
|
"learning_rate": 3.4857685629307664e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31168070435523987,
|
|
"step": 465,
|
|
"valid_targets_mean": 4584.9,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 1.57190635451505,
|
|
"grad_norm": 0.24115211586713795,
|
|
"learning_rate": 3.4700315759697045e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3153783082962036,
|
|
"step": 470,
|
|
"valid_targets_mean": 4716.0,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 1.588628762541806,
|
|
"grad_norm": 0.2337397996054974,
|
|
"learning_rate": 3.454094087838051e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29250749945640564,
|
|
"step": 475,
|
|
"valid_targets_mean": 4789.8,
|
|
"valid_targets_min": 1328
|
|
},
|
|
{
|
|
"epoch": 1.605351170568562,
|
|
"grad_norm": 0.28525845944953254,
|
|
"learning_rate": 3.4379582722884496e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26603245735168457,
|
|
"step": 480,
|
|
"valid_targets_mean": 4673.9,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 1.6220735785953178,
|
|
"grad_norm": 0.23964069161847545,
|
|
"learning_rate": 3.4216263301239047e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2560085952281952,
|
|
"step": 485,
|
|
"valid_targets_mean": 4649.5,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 1.6387959866220736,
|
|
"grad_norm": 0.25242621271422044,
|
|
"learning_rate": 3.405100488897603e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26679348945617676,
|
|
"step": 490,
|
|
"valid_targets_mean": 4470.8,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 1.6555183946488294,
|
|
"grad_norm": 0.25301573935148786,
|
|
"learning_rate": 3.388383002609093e-05,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2450329065322876,
|
|
"step": 495,
|
|
"valid_targets_mean": 4466.0,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 1.6722408026755853,
|
|
"grad_norm": 0.21629002885715765,
|
|
"learning_rate": 3.371476151396861e-05,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2431306391954422,
|
|
"step": 500,
|
|
"valid_targets_mean": 4503.0,
|
|
"valid_targets_min": 1623
|
|
},
|
|
{
|
|
"epoch": 1.6889632107023411,
|
|
"grad_norm": 0.22238463082465948,
|
|
"learning_rate": 3.354382241227332e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501046061515808,
|
|
"step": 505,
|
|
"valid_targets_mean": 4517.9,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 1.705685618729097,
|
|
"grad_norm": 0.20384227023451656,
|
|
"learning_rate": 3.3371036035803576e-05,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24747498333454132,
|
|
"step": 510,
|
|
"valid_targets_mean": 4592.1,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 1.7224080267558528,
|
|
"grad_norm": 0.20233756237500755,
|
|
"learning_rate": 3.319642595131216e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24641738831996918,
|
|
"step": 515,
|
|
"valid_targets_mean": 4745.8,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 1.7391304347826086,
|
|
"grad_norm": 0.294550220574233,
|
|
"learning_rate": 3.3020015974291814e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2826707065105438,
|
|
"step": 520,
|
|
"valid_targets_mean": 1939.9,
|
|
"valid_targets_min": 153
|
|
},
|
|
{
|
|
"epoch": 1.7558528428093645,
|
|
"grad_norm": 0.2973009229821899,
|
|
"learning_rate": 3.284183016572701e-05,
|
|
"loss": 0.3846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24068395793437958,
|
|
"step": 525,
|
|
"valid_targets_mean": 4975.5,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 1.7725752508361206,
|
|
"grad_norm": 1.0041281625390261,
|
|
"learning_rate": 3.2661892828812155e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5361660122871399,
|
|
"step": 530,
|
|
"valid_targets_mean": 674.2,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 1.7892976588628762,
|
|
"grad_norm": 0.43182735363141195,
|
|
"learning_rate": 3.248022850563688e-05,
|
|
"loss": 0.3994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2646257281303406,
|
|
"step": 535,
|
|
"valid_targets_mean": 4872.7,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 1.8060200668896322,
|
|
"grad_norm": 0.2517809597933826,
|
|
"learning_rate": 3.2296861973838644e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629435956478119,
|
|
"step": 540,
|
|
"valid_targets_mean": 4683.3,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.8227424749163879,
|
|
"grad_norm": 0.5081675116737157,
|
|
"learning_rate": 3.211181824322328e-05,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28255191445350647,
|
|
"step": 545,
|
|
"valid_targets_mean": 3734.7,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 1.839464882943144,
|
|
"grad_norm": 0.31789874000689705,
|
|
"learning_rate": 3.192512255235382e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25019964575767517,
|
|
"step": 550,
|
|
"valid_targets_mean": 4794.3,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 1.8561872909698995,
|
|
"grad_norm": 0.4454745577819962,
|
|
"learning_rate": 3.1736800365108176e-05,
|
|
"loss": 0.4104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3151957094669342,
|
|
"step": 555,
|
|
"valid_targets_mean": 1785.8,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 1.8729096989966556,
|
|
"grad_norm": 0.2585604191000923,
|
|
"learning_rate": 3.1546877367206026e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23732544481754303,
|
|
"step": 560,
|
|
"valid_targets_mean": 4309.1,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 1.8896321070234112,
|
|
"grad_norm": 0.8074572430964205,
|
|
"learning_rate": 3.135537946270551e-05,
|
|
"loss": 0.4531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4839265048503876,
|
|
"step": 565,
|
|
"valid_targets_mean": 733.1,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 1.9063545150501673,
|
|
"grad_norm": 0.25392758442689983,
|
|
"learning_rate": 3.116233277047008e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24726684391498566,
|
|
"step": 570,
|
|
"valid_targets_mean": 4884.3,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 1.9230769230769231,
|
|
"grad_norm": 0.7132205103519725,
|
|
"learning_rate": 3.096776362060612e-05,
|
|
"loss": 0.4093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4857273995876312,
|
|
"step": 575,
|
|
"valid_targets_mean": 727.4,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.939799331103679,
|
|
"grad_norm": 0.28030206808075175,
|
|
"learning_rate": 3.0771698550871696e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24414603412151337,
|
|
"step": 580,
|
|
"valid_targets_mean": 4414.5,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 1.9565217391304348,
|
|
"grad_norm": 1.0722643385299264,
|
|
"learning_rate": 3.057416430305701e-05,
|
|
"loss": 0.3698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5436132550239563,
|
|
"step": 585,
|
|
"valid_targets_mean": 696.1,
|
|
"valid_targets_min": 158
|
|
},
|
|
{
|
|
"epoch": 1.9732441471571907,
|
|
"grad_norm": 0.40778249296899327,
|
|
"learning_rate": 3.0375187819337014e-05,
|
|
"loss": 0.3482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2578917443752289,
|
|
"step": 590,
|
|
"valid_targets_mean": 5053.5,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 1.9899665551839465,
|
|
"grad_norm": 1.0135715593568684,
|
|
"learning_rate": 3.0174796238596733e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5402693748474121,
|
|
"step": 595,
|
|
"valid_targets_mean": 668.8,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 2.0066889632107023,
|
|
"grad_norm": 0.37079905131169244,
|
|
"learning_rate": 2.997301689272968e-05,
|
|
"loss": 0.4223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36734867095947266,
|
|
"step": 600,
|
|
"valid_targets_mean": 4411.8,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 2.0234113712374584,
|
|
"grad_norm": 0.26895676064590374,
|
|
"learning_rate": 2.9769877302910046e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3412858247756958,
|
|
"step": 605,
|
|
"valid_targets_mean": 4468.2,
|
|
"valid_targets_min": 1592
|
|
},
|
|
{
|
|
"epoch": 2.040133779264214,
|
|
"grad_norm": 0.2766472449540659,
|
|
"learning_rate": 2.9565405175838968e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3096095025539398,
|
|
"step": 610,
|
|
"valid_targets_mean": 5093.7,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 2.05685618729097,
|
|
"grad_norm": 0.1882641912886419,
|
|
"learning_rate": 2.9359628399965586e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830745279788971,
|
|
"step": 615,
|
|
"valid_targets_mean": 5645.9,
|
|
"valid_targets_min": 2151
|
|
},
|
|
{
|
|
"epoch": 2.0735785953177257,
|
|
"grad_norm": 0.1957042236075367,
|
|
"learning_rate": 2.915257504168324e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29117295145988464,
|
|
"step": 620,
|
|
"valid_targets_mean": 5221.6,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 2.0903010033444818,
|
|
"grad_norm": 0.20588483629956084,
|
|
"learning_rate": 2.8944273341501454e-05,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28053486347198486,
|
|
"step": 625,
|
|
"valid_targets_mean": 5244.2,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 2.1070234113712374,
|
|
"grad_norm": 0.2193935949331056,
|
|
"learning_rate": 2.8734751710194118e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28705260157585144,
|
|
"step": 630,
|
|
"valid_targets_mean": 5358.2,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 2.1237458193979935,
|
|
"grad_norm": 0.2023089427188991,
|
|
"learning_rate": 2.852403872492449e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726423442363739,
|
|
"step": 635,
|
|
"valid_targets_mean": 4957.5,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 2.140468227424749,
|
|
"grad_norm": 0.19160273637535705,
|
|
"learning_rate": 2.8312163125347485e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27666404843330383,
|
|
"step": 640,
|
|
"valid_targets_mean": 5104.9,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 2.157190635451505,
|
|
"grad_norm": 0.19046257097512967,
|
|
"learning_rate": 2.8099153809689804e-05,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816013693809509,
|
|
"step": 645,
|
|
"valid_targets_mean": 5555.8,
|
|
"valid_targets_min": 1934
|
|
},
|
|
{
|
|
"epoch": 2.1739130434782608,
|
|
"grad_norm": 0.21148504544765329,
|
|
"learning_rate": 2.7885039830808422e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803577184677124,
|
|
"step": 650,
|
|
"valid_targets_mean": 5031.8,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 2.190635451505017,
|
|
"grad_norm": 0.18874296277514546,
|
|
"learning_rate": 2.7669850392228023e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17442499101161957,
|
|
"step": 655,
|
|
"valid_targets_mean": 6045.2,
|
|
"valid_targets_min": 3341
|
|
},
|
|
{
|
|
"epoch": 2.2073578595317724,
|
|
"grad_norm": 0.16601951159980557,
|
|
"learning_rate": 2.7453614844157842e-05,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1943759173154831,
|
|
"step": 660,
|
|
"valid_targets_mean": 6267.9,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 2.2240802675585285,
|
|
"grad_norm": 0.15129041350767916,
|
|
"learning_rate": 2.7236362679488514e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18566341698169708,
|
|
"step": 665,
|
|
"valid_targets_mean": 6566.6,
|
|
"valid_targets_min": 3266
|
|
},
|
|
{
|
|
"epoch": 2.240802675585284,
|
|
"grad_norm": 0.14262447168470901,
|
|
"learning_rate": 2.7018123529769484e-05,
|
|
"loss": 0.1754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17142410576343536,
|
|
"step": 670,
|
|
"valid_targets_mean": 6732.1,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 2.25752508361204,
|
|
"grad_norm": 0.15243264863957715,
|
|
"learning_rate": 2.6798927161167485e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18653543293476105,
|
|
"step": 675,
|
|
"valid_targets_mean": 6640.0,
|
|
"valid_targets_min": 3057
|
|
},
|
|
{
|
|
"epoch": 2.274247491638796,
|
|
"grad_norm": 0.17591037990966985,
|
|
"learning_rate": 2.6578803470406658e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3433483839035034,
|
|
"step": 680,
|
|
"valid_targets_mean": 7170.9,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 2.290969899665552,
|
|
"grad_norm": 0.16368510902418054,
|
|
"learning_rate": 2.6357782480690846e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3333417475223541,
|
|
"step": 685,
|
|
"valid_targets_mean": 7125.2,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 2.3076923076923075,
|
|
"grad_norm": 0.1908597063091604,
|
|
"learning_rate": 2.6135894337608686e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3223503828048706,
|
|
"step": 690,
|
|
"valid_targets_mean": 6939.3,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 2.3244147157190636,
|
|
"grad_norm": 0.21925207504093933,
|
|
"learning_rate": 2.5913169305021944e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2101258486509323,
|
|
"step": 695,
|
|
"valid_targets_mean": 6239.0,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 2.3411371237458196,
|
|
"grad_norm": 0.16653728056313943,
|
|
"learning_rate": 2.56896377609378e-05,
|
|
"loss": 0.1621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16172702610492706,
|
|
"step": 700,
|
|
"valid_targets_mean": 5994.6,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 2.3578595317725752,
|
|
"grad_norm": 0.16240152922087314,
|
|
"learning_rate": 2.5465330193365483e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15301357209682465,
|
|
"step": 705,
|
|
"valid_targets_mean": 5792.4,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 2.374581939799331,
|
|
"grad_norm": 0.15853636657516545,
|
|
"learning_rate": 2.5240277196157947e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14691826701164246,
|
|
"step": 710,
|
|
"valid_targets_mean": 6527.7,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 2.391304347826087,
|
|
"grad_norm": 0.14813404734568322,
|
|
"learning_rate": 2.5014509464839095e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14025376737117767,
|
|
"step": 715,
|
|
"valid_targets_mean": 6059.1,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 2.408026755852843,
|
|
"grad_norm": 0.14905714508058546,
|
|
"learning_rate": 2.4788057792417147e-05,
|
|
"loss": 0.147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1445135623216629,
|
|
"step": 720,
|
|
"valid_targets_mean": 6294.4,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 2.4247491638795986,
|
|
"grad_norm": 0.15540934919273947,
|
|
"learning_rate": 2.4560953065184724e-05,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14712664484977722,
|
|
"step": 725,
|
|
"valid_targets_mean": 6088.0,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 2.4414715719063547,
|
|
"grad_norm": 0.15004313679530892,
|
|
"learning_rate": 2.433322625850617e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14438866078853607,
|
|
"step": 730,
|
|
"valid_targets_mean": 6253.1,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 2.4581939799331103,
|
|
"grad_norm": 0.18665964891293943,
|
|
"learning_rate": 2.4104908432592732e-05,
|
|
"loss": 0.1476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14549827575683594,
|
|
"step": 735,
|
|
"valid_targets_mean": 6431.6,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 2.4749163879598663,
|
|
"grad_norm": 0.16064268926124256,
|
|
"learning_rate": 2.3876030728266235e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14341790974140167,
|
|
"step": 740,
|
|
"valid_targets_mean": 5723.3,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 2.491638795986622,
|
|
"grad_norm": 0.22407683696782663,
|
|
"learning_rate": 2.3646624362711655e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28306153416633606,
|
|
"step": 745,
|
|
"valid_targets_mean": 4555.0,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 2.508361204013378,
|
|
"grad_norm": 0.22686391457978547,
|
|
"learning_rate": 2.3416720625219373e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2748595178127289,
|
|
"step": 750,
|
|
"valid_targets_mean": 4720.4,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 2.5250836120401337,
|
|
"grad_norm": 0.23690652646070248,
|
|
"learning_rate": 2.3186350872917514e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806766629219055,
|
|
"step": 755,
|
|
"valid_targets_mean": 4706.1,
|
|
"valid_targets_min": 1820
|
|
},
|
|
{
|
|
"epoch": 2.5418060200668897,
|
|
"grad_norm": 0.21389857186446018,
|
|
"learning_rate": 2.295554652649511e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26098814606666565,
|
|
"step": 760,
|
|
"valid_targets_mean": 4222.0,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 2.5585284280936453,
|
|
"grad_norm": 0.2212548146105914,
|
|
"learning_rate": 2.272433906591652e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25039681792259216,
|
|
"step": 765,
|
|
"valid_targets_mean": 4723.6,
|
|
"valid_targets_min": 1592
|
|
},
|
|
{
|
|
"epoch": 2.5752508361204014,
|
|
"grad_norm": 0.22322400991786523,
|
|
"learning_rate": 2.249276002612785e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26302745938301086,
|
|
"step": 770,
|
|
"valid_targets_mean": 4409.8,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 2.591973244147157,
|
|
"grad_norm": 0.23478742092249213,
|
|
"learning_rate": 2.2260840992755765e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26876962184906006,
|
|
"step": 775,
|
|
"valid_targets_mean": 4625.4,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 2.608695652173913,
|
|
"grad_norm": 0.23417620201495953,
|
|
"learning_rate": 2.202861359779949e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2331017255783081,
|
|
"step": 780,
|
|
"valid_targets_mean": 4481.9,
|
|
"valid_targets_min": 2429
|
|
},
|
|
{
|
|
"epoch": 2.625418060200669,
|
|
"grad_norm": 0.264301297317795,
|
|
"learning_rate": 2.1796109515316445e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2378367930650711,
|
|
"step": 785,
|
|
"valid_targets_mean": 4473.0,
|
|
"valid_targets_min": 2169
|
|
},
|
|
{
|
|
"epoch": 2.6421404682274248,
|
|
"grad_norm": 0.2565866317091138,
|
|
"learning_rate": 2.156336045710211e-05,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22258122265338898,
|
|
"step": 790,
|
|
"valid_targets_mean": 4505.5,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 2.6588628762541804,
|
|
"grad_norm": 0.1940288155795933,
|
|
"learning_rate": 2.1330398168364823e-05,
|
|
"loss": 0.2172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21678568422794342,
|
|
"step": 795,
|
|
"valid_targets_mean": 4446.6,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 2.6755852842809364,
|
|
"grad_norm": 0.18627970283420714,
|
|
"learning_rate": 2.1097254423395937e-05,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20801906287670135,
|
|
"step": 800,
|
|
"valid_targets_mean": 4618.1,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 2.6923076923076925,
|
|
"grad_norm": 0.19229368174567585,
|
|
"learning_rate": 2.0863961021236053e-05,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21324963867664337,
|
|
"step": 805,
|
|
"valid_targets_mean": 4455.3,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.709030100334448,
|
|
"grad_norm": 0.19664196403921202,
|
|
"learning_rate": 2.0630549781337904e-05,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22369103133678436,
|
|
"step": 810,
|
|
"valid_targets_mean": 4562.8,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 2.7257525083612038,
|
|
"grad_norm": 0.1863005841070964,
|
|
"learning_rate": 2.0397052539226362e-05,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21109290421009064,
|
|
"step": 815,
|
|
"valid_targets_mean": 4596.7,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.74247491638796,
|
|
"grad_norm": 0.6173372728089213,
|
|
"learning_rate": 2.016350114215639e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3844788372516632,
|
|
"step": 820,
|
|
"valid_targets_mean": 678.1,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 2.759197324414716,
|
|
"grad_norm": 0.2907761759630343,
|
|
"learning_rate": 1.992992744476924e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23014144599437714,
|
|
"step": 825,
|
|
"valid_targets_mean": 4751.9,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 2.7759197324414715,
|
|
"grad_norm": 0.7059514788472127,
|
|
"learning_rate": 1.9696363304747786e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3986765146255493,
|
|
"step": 830,
|
|
"valid_targets_mean": 674.0,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 2.7926421404682276,
|
|
"grad_norm": 0.2585488252354123,
|
|
"learning_rate": 1.9462840578471338e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2241886407136917,
|
|
"step": 835,
|
|
"valid_targets_mean": 4826.7,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 2.809364548494983,
|
|
"grad_norm": 0.4214404035666233,
|
|
"learning_rate": 1.922939111667068e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919781506061554,
|
|
"step": 840,
|
|
"valid_targets_mean": 1458.4,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 2.8260869565217392,
|
|
"grad_norm": 0.4070462279502931,
|
|
"learning_rate": 1.899604676008388e-05,
|
|
"loss": 0.3345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24292106926441193,
|
|
"step": 845,
|
|
"valid_targets_mean": 4536.0,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 2.842809364548495,
|
|
"grad_norm": 0.2326857123378931,
|
|
"learning_rate": 1.8762839335113454e-05,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21685612201690674,
|
|
"step": 850,
|
|
"valid_targets_mean": 4670.0,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 2.859531772575251,
|
|
"grad_norm": 0.3858936345614067,
|
|
"learning_rate": 1.852980064948549e-05,
|
|
"loss": 0.3423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23001201450824738,
|
|
"step": 855,
|
|
"valid_targets_mean": 4312.6,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 2.8762541806020065,
|
|
"grad_norm": 0.2846110038843764,
|
|
"learning_rate": 1.8296962487911293e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23642951250076294,
|
|
"step": 860,
|
|
"valid_targets_mean": 3092.2,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 2.8929765886287626,
|
|
"grad_norm": 0.367258015187427,
|
|
"learning_rate": 1.8064356607752217e-05,
|
|
"loss": 0.3676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541278898715973,
|
|
"step": 865,
|
|
"valid_targets_mean": 3393.6,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 2.9096989966555182,
|
|
"grad_norm": 0.2377725393070408,
|
|
"learning_rate": 1.7832014734688182e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21238528192043304,
|
|
"step": 870,
|
|
"valid_targets_mean": 4894.2,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 2.9264214046822743,
|
|
"grad_norm": 0.9018971437972336,
|
|
"learning_rate": 1.7599968558390553e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3804803788661957,
|
|
"step": 875,
|
|
"valid_targets_mean": 738.1,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 2.94314381270903,
|
|
"grad_norm": 0.1937813815953225,
|
|
"learning_rate": 1.7368249728199884e-05,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22677625715732574,
|
|
"step": 880,
|
|
"valid_targets_mean": 4568.2,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 2.959866220735786,
|
|
"grad_norm": 0.6429163423349497,
|
|
"learning_rate": 1.7136889848809194e-05,
|
|
"loss": 0.3477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4165579080581665,
|
|
"step": 885,
|
|
"valid_targets_mean": 750.7,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 2.976588628762542,
|
|
"grad_norm": 0.23979840190121002,
|
|
"learning_rate": 1.6905920475953358e-05,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24398088455200195,
|
|
"step": 890,
|
|
"valid_targets_mean": 5069.3,
|
|
"valid_targets_min": 416
|
|
},
|
|
{
|
|
"epoch": 2.9933110367892977,
|
|
"grad_norm": 0.7806320986114523,
|
|
"learning_rate": 1.6675373112105087e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4030502140522003,
|
|
"step": 895,
|
|
"valid_targets_mean": 673.1,
|
|
"valid_targets_min": 232
|
|
},
|
|
{
|
|
"epoch": 3.0100334448160537,
|
|
"grad_norm": 0.3325966050698298,
|
|
"learning_rate": 1.6445279202178287e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3040490448474884,
|
|
"step": 900,
|
|
"valid_targets_mean": 4467.1,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 3.0267558528428093,
|
|
"grad_norm": 0.25348447790070533,
|
|
"learning_rate": 1.621567012923917e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27941253781318665,
|
|
"step": 905,
|
|
"valid_targets_mean": 4703.9,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 3.0434782608695654,
|
|
"grad_norm": 0.27167890992632737,
|
|
"learning_rate": 1.5986577210225857e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2902013063430786,
|
|
"step": 910,
|
|
"valid_targets_mean": 5275.6,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 3.060200668896321,
|
|
"grad_norm": 0.1910683683867069,
|
|
"learning_rate": 1.575803169167699e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2436305433511734,
|
|
"step": 915,
|
|
"valid_targets_mean": 5387.9,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 3.076923076923077,
|
|
"grad_norm": 0.1874060436776931,
|
|
"learning_rate": 1.553006474546992e-05,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23999392986297607,
|
|
"step": 920,
|
|
"valid_targets_mean": 4801.6,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 3.0936454849498327,
|
|
"grad_norm": 0.18119039129245468,
|
|
"learning_rate": 1.5302707464569132e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24977512657642365,
|
|
"step": 925,
|
|
"valid_targets_mean": 5440.0,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 3.1103678929765888,
|
|
"grad_norm": 0.17719444575363888,
|
|
"learning_rate": 1.5075990858785377e-05,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2357928305864334,
|
|
"step": 930,
|
|
"valid_targets_mean": 5248.3,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 3.1270903010033444,
|
|
"grad_norm": 0.21513060390100192,
|
|
"learning_rate": 1.4849945850546153e-05,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23575645685195923,
|
|
"step": 935,
|
|
"valid_targets_mean": 5246.6,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 3.1438127090301005,
|
|
"grad_norm": 0.18107305406498955,
|
|
"learning_rate": 1.4624603270678171e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22320790588855743,
|
|
"step": 940,
|
|
"valid_targets_mean": 5302.3,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 3.160535117056856,
|
|
"grad_norm": 0.19063928947134984,
|
|
"learning_rate": 1.4399993854202214e-05,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22890616953372955,
|
|
"step": 945,
|
|
"valid_targets_mean": 5620.1,
|
|
"valid_targets_min": 1858
|
|
},
|
|
{
|
|
"epoch": 3.177257525083612,
|
|
"grad_norm": 0.17529233912179928,
|
|
"learning_rate": 1.4176148236141143e-05,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22984355688095093,
|
|
"step": 950,
|
|
"valid_targets_mean": 5451.8,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 3.1939799331103678,
|
|
"grad_norm": 0.15526905588957163,
|
|
"learning_rate": 1.3953096947341492e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.163493350148201,
|
|
"step": 955,
|
|
"valid_targets_mean": 6425.3,
|
|
"valid_targets_min": 2629
|
|
},
|
|
{
|
|
"epoch": 3.210702341137124,
|
|
"grad_norm": 0.23047094958309533,
|
|
"learning_rate": 1.3730870410309311e-05,
|
|
"loss": 0.1597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15648625791072845,
|
|
"step": 960,
|
|
"valid_targets_mean": 6433.0,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 3.2274247491638794,
|
|
"grad_norm": 0.15051441110478084,
|
|
"learning_rate": 1.3509498935060746e-05,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14432001113891602,
|
|
"step": 965,
|
|
"valid_targets_mean": 6299.8,
|
|
"valid_targets_min": 2765
|
|
},
|
|
{
|
|
"epoch": 3.2441471571906355,
|
|
"grad_norm": 0.18713782540742388,
|
|
"learning_rate": 1.3289012714988006e-05,
|
|
"loss": 0.1556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1696312427520752,
|
|
"step": 970,
|
|
"valid_targets_mean": 6668.9,
|
|
"valid_targets_min": 3131
|
|
},
|
|
{
|
|
"epoch": 3.260869565217391,
|
|
"grad_norm": 0.14193832407851184,
|
|
"learning_rate": 1.3069441822741195e-05,
|
|
"loss": 0.1528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15616631507873535,
|
|
"step": 975,
|
|
"valid_targets_mean": 6579.5,
|
|
"valid_targets_min": 1654
|
|
},
|
|
{
|
|
"epoch": 3.277591973244147,
|
|
"grad_norm": 0.17659222491324522,
|
|
"learning_rate": 1.2850816206126623e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2911715507507324,
|
|
"step": 980,
|
|
"valid_targets_mean": 6860.2,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 3.294314381270903,
|
|
"grad_norm": 0.17751955711425818,
|
|
"learning_rate": 1.2633165684022184e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30897554755210876,
|
|
"step": 985,
|
|
"valid_targets_mean": 6717.8,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 3.311036789297659,
|
|
"grad_norm": 0.15672473164502146,
|
|
"learning_rate": 1.2416519942310256e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.280882328748703,
|
|
"step": 990,
|
|
"valid_targets_mean": 7239.4,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 3.327759197324415,
|
|
"grad_norm": 0.19003500142538396,
|
|
"learning_rate": 1.2200908529828794e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14710815250873566,
|
|
"step": 995,
|
|
"valid_targets_mean": 6309.6,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 3.3444816053511706,
|
|
"grad_norm": 0.18357730003119666,
|
|
"learning_rate": 1.1986360854341068e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13904519379138947,
|
|
"step": 1000,
|
|
"valid_targets_mean": 5761.4,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 3.361204013377926,
|
|
"grad_norm": 0.16001904477148948,
|
|
"learning_rate": 1.1772906178524691e-05,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13291610777378082,
|
|
"step": 1005,
|
|
"valid_targets_mean": 6086.1,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 3.3779264214046822,
|
|
"grad_norm": 0.16723753846924963,
|
|
"learning_rate": 1.1560573615980393e-05,
|
|
"loss": 0.1337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13813412189483643,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5800.1,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 3.3946488294314383,
|
|
"grad_norm": 0.1745217811675545,
|
|
"learning_rate": 1.1349392127261169e-05,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13039818406105042,
|
|
"step": 1015,
|
|
"valid_targets_mean": 6074.7,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 3.411371237458194,
|
|
"grad_norm": 0.1655260029060803,
|
|
"learning_rate": 1.113939051592225e-05,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1264718621969223,
|
|
"step": 1020,
|
|
"valid_targets_mean": 6158.8,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 3.42809364548495,
|
|
"grad_norm": 0.15510678838112874,
|
|
"learning_rate": 1.0930597424592522e-05,
|
|
"loss": 0.1225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12112051248550415,
|
|
"step": 1025,
|
|
"valid_targets_mean": 6096.9,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 3.4448160535117056,
|
|
"grad_norm": 0.14545317901479146,
|
|
"learning_rate": 1.0723041331067917e-05,
|
|
"loss": 0.1266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13073737919330597,
|
|
"step": 1030,
|
|
"valid_targets_mean": 6842.2,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 3.4615384615384617,
|
|
"grad_norm": 0.14743435245575817,
|
|
"learning_rate": 1.0516750544427236e-05,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12710754573345184,
|
|
"step": 1035,
|
|
"valid_targets_mean": 6249.6,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 3.4782608695652173,
|
|
"grad_norm": 0.19638550592251586,
|
|
"learning_rate": 1.0311753201171002e-05,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15527376532554626,
|
|
"step": 1040,
|
|
"valid_targets_mean": 5658.3,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 3.4949832775919734,
|
|
"grad_norm": 0.2384441511668291,
|
|
"learning_rate": 1.0108077261383843e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2483915090560913,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4639.5,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 3.511705685618729,
|
|
"grad_norm": 0.22952161164325965,
|
|
"learning_rate": 9.905750504920988e-06,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25090211629867554,
|
|
"step": 1050,
|
|
"valid_targets_mean": 4660.8,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 3.528428093645485,
|
|
"grad_norm": 0.19081898217049426,
|
|
"learning_rate": 9.704800527619271e-06,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23392079770565033,
|
|
"step": 1055,
|
|
"valid_targets_mean": 5102.3,
|
|
"valid_targets_min": 2424
|
|
},
|
|
{
|
|
"epoch": 3.5451505016722407,
|
|
"grad_norm": 0.2150223947743572,
|
|
"learning_rate": 9.505254737533288e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23533904552459717,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4816.6,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 3.5618729096989967,
|
|
"grad_norm": 0.21559324253644188,
|
|
"learning_rate": 9.30714035119712e-06,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23167192935943604,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4696.9,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 3.5785953177257523,
|
|
"grad_norm": 0.21476036313789315,
|
|
"learning_rate": 9.110484389912218e-06,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24024538695812225,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4515.7,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 3.5953177257525084,
|
|
"grad_norm": 0.20079745576530617,
|
|
"learning_rate": 8.915313676061925e-06,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2396174669265747,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4811.0,
|
|
"valid_targets_min": 1596
|
|
},
|
|
{
|
|
"epoch": 3.6120401337792645,
|
|
"grad_norm": 0.23949736554686882,
|
|
"learning_rate": 8.721654829453072e-06,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20303094387054443,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4378.2,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 3.62876254180602,
|
|
"grad_norm": 0.2162186432786319,
|
|
"learning_rate": 8.529534263685268e-06,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20163659751415253,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4515.4,
|
|
"valid_targets_min": 2198
|
|
},
|
|
{
|
|
"epoch": 3.6454849498327757,
|
|
"grad_norm": 0.19436543294501413,
|
|
"learning_rate": 8.338978182548234e-06,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1891588717699051,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4146.3,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 3.6622073578595318,
|
|
"grad_norm": 0.19164979346182845,
|
|
"learning_rate": 8.150012576447872e-06,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20176713168621063,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4482.4,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 3.678929765886288,
|
|
"grad_norm": 0.19985143783581158,
|
|
"learning_rate": 7.962663218861324e-06,
|
|
"loss": 0.1832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18389666080474854,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4490.5,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 3.6956521739130435,
|
|
"grad_norm": 0.20930072238561145,
|
|
"learning_rate": 7.77695566282169e-06,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18639083206653595,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4398.2,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 3.712374581939799,
|
|
"grad_norm": 0.18519836850773455,
|
|
"learning_rate": 7.5929152374327515e-06,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18989145755767822,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4469.8,
|
|
"valid_targets_min": 2241
|
|
},
|
|
{
|
|
"epoch": 3.729096989966555,
|
|
"grad_norm": 0.18493285556718495,
|
|
"learning_rate": 7.41056704441433e-06,
|
|
"loss": 0.2002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19204336404800415,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4966.5,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 3.745819397993311,
|
|
"grad_norm": 0.5966224380636336,
|
|
"learning_rate": 7.229935954678544e-06,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29537415504455566,
|
|
"step": 1120,
|
|
"valid_targets_mean": 718.0,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 3.762541806020067,
|
|
"grad_norm": 0.21823572079242787,
|
|
"learning_rate": 7.05104660493765e-06,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21302206814289093,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4742.8,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 3.779264214046823,
|
|
"grad_norm": 0.5832962249106494,
|
|
"learning_rate": 6.873923394343758e-06,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2774748206138611,
|
|
"step": 1130,
|
|
"valid_targets_mean": 747.7,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 3.7959866220735785,
|
|
"grad_norm": 0.2167157370011282,
|
|
"learning_rate": 6.698590481160987e-06,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20832622051239014,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4341.8,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 3.8127090301003346,
|
|
"grad_norm": 0.5623912186612033,
|
|
"learning_rate": 6.525071779470429e-06,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31451621651649475,
|
|
"step": 1140,
|
|
"valid_targets_mean": 731.8,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 3.82943143812709,
|
|
"grad_norm": 0.1974116434223006,
|
|
"learning_rate": 6.3533909559084915e-06,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21299366652965546,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4850.5,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 3.8461538461538463,
|
|
"grad_norm": 0.3938591347572728,
|
|
"learning_rate": 6.183571426438928e-06,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28042930364608765,
|
|
"step": 1150,
|
|
"valid_targets_mean": 1309.6,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 3.862876254180602,
|
|
"grad_norm": 0.203233024286556,
|
|
"learning_rate": 6.015636353159073e-06,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2157667726278305,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4642.4,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 3.879598662207358,
|
|
"grad_norm": 0.5964085554812023,
|
|
"learning_rate": 5.8496086411407135e-06,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3337545692920685,
|
|
"step": 1160,
|
|
"valid_targets_mean": 681.9,
|
|
"valid_targets_min": 173
|
|
},
|
|
{
|
|
"epoch": 3.8963210702341136,
|
|
"grad_norm": 0.2214120863538693,
|
|
"learning_rate": 5.685510935305998e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20867155492305756,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4420.6,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 3.9130434782608696,
|
|
"grad_norm": 0.19187783009425913,
|
|
"learning_rate": 5.523365617338872e-06,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21160221099853516,
|
|
"step": 1170,
|
|
"valid_targets_mean": 5155.8,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 3.9297658862876252,
|
|
"grad_norm": 0.22333071467247392,
|
|
"learning_rate": 5.3631948026323585e-06,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.229045569896698,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4811.5,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 3.9464882943143813,
|
|
"grad_norm": 0.15497067788450447,
|
|
"learning_rate": 5.2050203372722e-06,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19900214672088623,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4957.0,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 3.9632107023411374,
|
|
"grad_norm": 0.42499289353318304,
|
|
"learning_rate": 5.048863795057191e-06,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2734402120113373,
|
|
"step": 1185,
|
|
"valid_targets_mean": 1039.6,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 3.979933110367893,
|
|
"grad_norm": 0.16265281657263275,
|
|
"learning_rate": 4.894746474556717e-06,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2060660570859909,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4632.8,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 3.9966555183946486,
|
|
"grad_norm": 0.6053798931032222,
|
|
"learning_rate": 4.742689396205766e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3188337981700897,
|
|
"step": 1195,
|
|
"valid_targets_mean": 747.0,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 4.013377926421405,
|
|
"grad_norm": 0.27445958249018554,
|
|
"learning_rate": 4.592713299437905e-06,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27683737874031067,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5249.1,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 4.030100334448161,
|
|
"grad_norm": 0.25687751261546804,
|
|
"learning_rate": 4.444838639856568e-06,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518869638442993,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4295.5,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 4.046822742474917,
|
|
"grad_norm": 0.20651475690375476,
|
|
"learning_rate": 4.299085586445078e-06,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23218762874603271,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5412.7,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 4.063545150501672,
|
|
"grad_norm": 0.19554051256399013,
|
|
"learning_rate": 4.1554740188157595e-06,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2517744302749634,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5297.2,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 4.080267558528428,
|
|
"grad_norm": 0.17978474793556265,
|
|
"learning_rate": 4.014023524498492e-06,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2261568307876587,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5272.5,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 4.096989966555184,
|
|
"grad_norm": 0.18038505982255448,
|
|
"learning_rate": 3.874753396269135e-06,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23343046009540558,
|
|
"step": 1225,
|
|
"valid_targets_mean": 5164.4,
|
|
"valid_targets_min": 1749
|
|
},
|
|
{
|
|
"epoch": 4.11371237458194,
|
|
"grad_norm": 0.16023221073597105,
|
|
"learning_rate": 3.7376826295181133e-06,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21051271259784698,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5625.0,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 4.130434782608695,
|
|
"grad_norm": 0.17583632120005055,
|
|
"learning_rate": 3.602829919659623e-06,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2274329662322998,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5280.3,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 4.147157190635451,
|
|
"grad_norm": 0.1763063365449292,
|
|
"learning_rate": 3.4702136595817002e-06,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21405138075351715,
|
|
"step": 1240,
|
|
"valid_targets_mean": 5215.2,
|
|
"valid_targets_min": 2231
|
|
},
|
|
{
|
|
"epoch": 4.1638795986622075,
|
|
"grad_norm": 0.17730523624749578,
|
|
"learning_rate": 3.3398519371375705e-06,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21326959133148193,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5049.2,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 4.1806020066889635,
|
|
"grad_norm": 0.19356277462319527,
|
|
"learning_rate": 3.2117625326786127e-06,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14234782755374908,
|
|
"step": 1250,
|
|
"valid_targets_mean": 6386.6,
|
|
"valid_targets_min": 1623
|
|
},
|
|
{
|
|
"epoch": 4.197324414715719,
|
|
"grad_norm": 0.16730300258691497,
|
|
"learning_rate": 3.085962916629235e-06,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13740719854831696,
|
|
"step": 1255,
|
|
"valid_targets_mean": 6023.3,
|
|
"valid_targets_min": 2729
|
|
},
|
|
{
|
|
"epoch": 4.214046822742475,
|
|
"grad_norm": 0.15827200537686784,
|
|
"learning_rate": 2.962470247104057e-06,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13070766627788544,
|
|
"step": 1260,
|
|
"valid_targets_mean": 6772.9,
|
|
"valid_targets_min": 2394
|
|
},
|
|
{
|
|
"epoch": 4.230769230769231,
|
|
"grad_norm": 0.1491184832085282,
|
|
"learning_rate": 2.8413013675676703e-06,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14747533202171326,
|
|
"step": 1265,
|
|
"valid_targets_mean": 6463.4,
|
|
"valid_targets_min": 2173
|
|
},
|
|
{
|
|
"epoch": 4.247491638795987,
|
|
"grad_norm": 0.15092183381928306,
|
|
"learning_rate": 2.722472804537324e-06,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12587887048721313,
|
|
"step": 1270,
|
|
"valid_targets_mean": 6199.0,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 4.264214046822742,
|
|
"grad_norm": 0.1608093727346184,
|
|
"learning_rate": 2.6060007653288155e-06,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18034277856349945,
|
|
"step": 1275,
|
|
"valid_targets_mean": 6426.9,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 4.280936454849498,
|
|
"grad_norm": 0.18870342067871398,
|
|
"learning_rate": 2.4919011358459443e-06,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28456005454063416,
|
|
"step": 1280,
|
|
"valid_targets_mean": 6586.4,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 4.297658862876254,
|
|
"grad_norm": 0.16650802588613298,
|
|
"learning_rate": 2.380189478413799e-06,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753714323043823,
|
|
"step": 1285,
|
|
"valid_targets_mean": 6664.2,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 4.31438127090301,
|
|
"grad_norm": 0.15911538469277994,
|
|
"learning_rate": 2.2708810296561713e-06,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978549301624298,
|
|
"step": 1290,
|
|
"valid_targets_mean": 6947.3,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 4.331103678929766,
|
|
"grad_norm": 0.17444646580607717,
|
|
"learning_rate": 2.163990698417402e-06,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1320081502199173,
|
|
"step": 1295,
|
|
"valid_targets_mean": 6716.0,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 4.3478260869565215,
|
|
"grad_norm": 0.15418199902523114,
|
|
"learning_rate": 2.0595330637289046e-06,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12595362961292267,
|
|
"step": 1300,
|
|
"valid_targets_mean": 6080.9,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 4.364548494983278,
|
|
"grad_norm": 0.1445001546999268,
|
|
"learning_rate": 1.9575223728207217e-06,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1298508644104004,
|
|
"step": 1305,
|
|
"valid_targets_mean": 6157.9,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 4.381270903010034,
|
|
"grad_norm": 0.13859121653874001,
|
|
"learning_rate": 1.857972539178301e-06,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11383823305368423,
|
|
"step": 1310,
|
|
"valid_targets_mean": 6154.5,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 4.39799331103679,
|
|
"grad_norm": 0.13210345726724612,
|
|
"learning_rate": 1.7608971406448061e-06,
|
|
"loss": 0.1119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10649622231721878,
|
|
"step": 1315,
|
|
"valid_targets_mean": 6118.1,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 4.414715719063545,
|
|
"grad_norm": 0.13740873041385782,
|
|
"learning_rate": 1.666309417569194e-06,
|
|
"loss": 0.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10804883390665054,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5741.9,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 4.431438127090301,
|
|
"grad_norm": 0.1460663780198078,
|
|
"learning_rate": 1.5742222710003252e-06,
|
|
"loss": 0.1134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12147744745016098,
|
|
"step": 1325,
|
|
"valid_targets_mean": 6000.1,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 4.448160535117057,
|
|
"grad_norm": 0.1491078315390638,
|
|
"learning_rate": 1.4846482609273839e-06,
|
|
"loss": 0.1125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1106313169002533,
|
|
"step": 1330,
|
|
"valid_targets_mean": 6530.3,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 4.464882943143813,
|
|
"grad_norm": 0.13146477252411287,
|
|
"learning_rate": 1.3975996045667616e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10968706756830215,
|
|
"step": 1335,
|
|
"valid_targets_mean": 5982.4,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 4.481605351170568,
|
|
"grad_norm": 0.29637387004163185,
|
|
"learning_rate": 1.3130881746957447e-06,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656233608722687,
|
|
"step": 1340,
|
|
"valid_targets_mean": 5084.1,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 4.498327759197324,
|
|
"grad_norm": 0.2664430133539512,
|
|
"learning_rate": 1.2311254980331477e-06,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2514803409576416,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4601.8,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 4.51505016722408,
|
|
"grad_norm": 0.2148348860800221,
|
|
"learning_rate": 1.151722753667137e-06,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24929897487163544,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4559.6,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 4.531772575250836,
|
|
"grad_norm": 0.22288534309058536,
|
|
"learning_rate": 1.0748907715305301e-06,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22896242141723633,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4532.3,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 4.548494983277592,
|
|
"grad_norm": 0.2274829356019196,
|
|
"learning_rate": 1.0006400309236385e-06,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21920983493328094,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4192.1,
|
|
"valid_targets_min": 1621
|
|
},
|
|
{
|
|
"epoch": 4.565217391304348,
|
|
"grad_norm": 0.20576644660868582,
|
|
"learning_rate": 9.289806590849904e-07,
|
|
"loss": 0.212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21935880184173584,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4696.6,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 4.581939799331104,
|
|
"grad_norm": 0.19171424252028949,
|
|
"learning_rate": 8.599224298100428e-07,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20938502252101898,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4814.6,
|
|
"valid_targets_min": 1866
|
|
},
|
|
{
|
|
"epoch": 4.59866220735786,
|
|
"grad_norm": 0.1887310644022492,
|
|
"learning_rate": 7.934747621181049e-07,
|
|
"loss": 0.2143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2093948870897293,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4647.9,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 4.615384615384615,
|
|
"grad_norm": 0.2950282554295084,
|
|
"learning_rate": 7.296467189676737e-07,
|
|
"loss": 0.1993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19741912186145782,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4493.0,
|
|
"valid_targets_min": 2453
|
|
},
|
|
{
|
|
"epoch": 4.632107023411371,
|
|
"grad_norm": 0.2318488586611998,
|
|
"learning_rate": 6.684470060202986e-07,
|
|
"loss": 0.1985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19462911784648895,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4325.5,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 4.648829431438127,
|
|
"grad_norm": 0.1918353079745598,
|
|
"learning_rate": 6.098839704532045e-07,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18498313426971436,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4599.4,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 4.665551839464883,
|
|
"grad_norm": 0.20421997050212348,
|
|
"learning_rate": 5.539655998207872e-07,
|
|
"loss": 0.1812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16699044406414032,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4453.3,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 4.682274247491639,
|
|
"grad_norm": 0.19927447638970935,
|
|
"learning_rate": 5.006995209651866e-07,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17403970658779144,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4138.5,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 4.698996655518394,
|
|
"grad_norm": 0.18813292577091748,
|
|
"learning_rate": 4.500929989760372e-07,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19561976194381714,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4641.5,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 4.7157190635451505,
|
|
"grad_norm": 0.1945356363289933,
|
|
"learning_rate": 4.0215293619956466e-07,
|
|
"loss": 0.1811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19034479558467865,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4728.2,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.7324414715719065,
|
|
"grad_norm": 0.23787414563653314,
|
|
"learning_rate": 3.568858712971568e-07,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19106751680374146,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4865.8,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 4.749163879598662,
|
|
"grad_norm": 0.7015353232599782,
|
|
"learning_rate": 3.1429797835353935e-07,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25036635994911194,
|
|
"step": 1420,
|
|
"valid_targets_mean": 700.6,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 4.765886287625418,
|
|
"grad_norm": 0.23301838604492123,
|
|
"learning_rate": 2.7439506603468056e-07,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1926100105047226,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4673.6,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 4.782608695652174,
|
|
"grad_norm": 0.5751763900806196,
|
|
"learning_rate": 2.3718257679553603e-07,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720150649547577,
|
|
"step": 1430,
|
|
"valid_targets_mean": 719.2,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 4.79933110367893,
|
|
"grad_norm": 0.18730383058307254,
|
|
"learning_rate": 2.026655861377269e-07,
|
|
"loss": 0.2042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18452896177768707,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5145.3,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 4.816053511705686,
|
|
"grad_norm": 0.5733176547263106,
|
|
"learning_rate": 1.7084880191729601e-07,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27038872241973877,
|
|
"step": 1440,
|
|
"valid_targets_mean": 698.6,
|
|
"valid_targets_min": 213
|
|
},
|
|
{
|
|
"epoch": 4.832775919732441,
|
|
"grad_norm": 0.19122491136437014,
|
|
"learning_rate": 1.4173656370257693e-07,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20094060897827148,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4992.0,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 4.849498327759197,
|
|
"grad_norm": 0.6374646702282563,
|
|
"learning_rate": 1.1533284218231855e-07,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3297445774078369,
|
|
"step": 1450,
|
|
"valid_targets_mean": 670.7,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 4.866220735785953,
|
|
"grad_norm": 0.1946563231598942,
|
|
"learning_rate": 9.164123862411612e-08,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21031109988689423,
|
|
"step": 1455,
|
|
"valid_targets_mean": 4702.2,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 4.882943143812709,
|
|
"grad_norm": 0.5877898109745995,
|
|
"learning_rate": 7.0664984383213e-08,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27888360619544983,
|
|
"step": 1460,
|
|
"valid_targets_mean": 683.8,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 4.8996655518394645,
|
|
"grad_norm": 0.19765013589459693,
|
|
"learning_rate": 5.240694046178219e-08,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19759269058704376,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4679.5,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 4.916387959866221,
|
|
"grad_norm": 0.6156529589125915,
|
|
"learning_rate": 3.6869597118698306e-08,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33030518889427185,
|
|
"step": 1470,
|
|
"valid_targets_mean": 672.6,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 4.933110367892977,
|
|
"grad_norm": 0.2203721452325052,
|
|
"learning_rate": 2.4055073529887228e-08,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2159907966852188,
|
|
"step": 1475,
|
|
"valid_targets_mean": 4921.4,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 4.949832775919733,
|
|
"grad_norm": 0.17669342666025922,
|
|
"learning_rate": 1.3965117499292746e-08,
|
|
"loss": 0.194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1800321489572525,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4283.2,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 4.966555183946488,
|
|
"grad_norm": 0.21424544221902356,
|
|
"learning_rate": 6.60110522047619e-09,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21270084381103516,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4852.2,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 4.983277591973244,
|
|
"grad_norm": 0.18095176673344857,
|
|
"learning_rate": 1.9640410889265606e-09,
|
|
"loss": 0.2025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18122506141662598,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4463.2,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.3562161320946657,
|
|
"learning_rate": 5.455756506345111e-11,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21731798350811005,
|
|
"step": 1495,
|
|
"valid_targets_mean": 1465.8,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21731798350811005,
|
|
"step": 1495,
|
|
"total_flos": 9850857283649536.0,
|
|
"train_loss": 0.03974963869139502,
|
|
"train_runtime": 9909.9567,
|
|
"train_samples_per_second": 14.475,
|
|
"train_steps_per_second": 0.151,
|
|
"valid_targets_mean": 1465.8,
|
|
"valid_targets_min": 325
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1495,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 9850857283649536.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|