3674 lines
102 KiB
JSON
3674 lines
102 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 5.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 1650,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.015182186234817813,
|
||
|
|
"grad_norm": 4.577882285169533,
|
||
|
|
"learning_rate": 9.696969696969698e-07,
|
||
|
|
"loss": 0.3821,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14125539362430573,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 6349.3,
|
||
|
|
"valid_targets_min": 1205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.030364372469635626,
|
||
|
|
"grad_norm": 2.36333443068163,
|
||
|
|
"learning_rate": 2.181818181818182e-06,
|
||
|
|
"loss": 0.3419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10619394481182098,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 6624.3,
|
||
|
|
"valid_targets_min": 1531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04554655870445344,
|
||
|
|
"grad_norm": 1.4113487235292677,
|
||
|
|
"learning_rate": 3.3939393939393946e-06,
|
||
|
|
"loss": 0.3243,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10431598126888275,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 5438.1,
|
||
|
|
"valid_targets_min": 1068
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06072874493927125,
|
||
|
|
"grad_norm": 0.62553520757171,
|
||
|
|
"learning_rate": 4.606060606060606e-06,
|
||
|
|
"loss": 0.303,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11968786269426346,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 5190.2,
|
||
|
|
"valid_targets_min": 1193
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07591093117408906,
|
||
|
|
"grad_norm": 0.40619377975716625,
|
||
|
|
"learning_rate": 5.8181818181818185e-06,
|
||
|
|
"loss": 0.2779,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0885775089263916,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 6902.4,
|
||
|
|
"valid_targets_min": 903
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09109311740890688,
|
||
|
|
"grad_norm": 0.3671651963956234,
|
||
|
|
"learning_rate": 7.030303030303031e-06,
|
||
|
|
"loss": 0.2558,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08641765266656876,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 5683.3,
|
||
|
|
"valid_targets_min": 1196
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1062753036437247,
|
||
|
|
"grad_norm": 0.24719995342571235,
|
||
|
|
"learning_rate": 8.242424242424243e-06,
|
||
|
|
"loss": 0.2341,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08379081636667252,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 5579.6,
|
||
|
|
"valid_targets_min": 1261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1214574898785425,
|
||
|
|
"grad_norm": 0.18778985066745008,
|
||
|
|
"learning_rate": 9.454545454545456e-06,
|
||
|
|
"loss": 0.2168,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05891978368163109,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 5071.2,
|
||
|
|
"valid_targets_min": 1382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13663967611336034,
|
||
|
|
"grad_norm": 0.18215489837899956,
|
||
|
|
"learning_rate": 1.0666666666666667e-05,
|
||
|
|
"loss": 0.1962,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06543523073196411,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 5722.6,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15182186234817813,
|
||
|
|
"grad_norm": 0.1994040406642919,
|
||
|
|
"learning_rate": 1.187878787878788e-05,
|
||
|
|
"loss": 0.1975,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06948289275169373,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 4391.1,
|
||
|
|
"valid_targets_min": 1226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16700404858299595,
|
||
|
|
"grad_norm": 0.1794623411727323,
|
||
|
|
"learning_rate": 1.3090909090909092e-05,
|
||
|
|
"loss": 0.1867,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0551609992980957,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 5057.4,
|
||
|
|
"valid_targets_min": 1219
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18218623481781376,
|
||
|
|
"grad_norm": 0.15110982315290433,
|
||
|
|
"learning_rate": 1.4303030303030305e-05,
|
||
|
|
"loss": 0.1829,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06352324038743973,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 5477.7,
|
||
|
|
"valid_targets_min": 1130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19736842105263158,
|
||
|
|
"grad_norm": 0.17887737225464861,
|
||
|
|
"learning_rate": 1.5515151515151516e-05,
|
||
|
|
"loss": 0.1818,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.061294250190258026,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 4873.1,
|
||
|
|
"valid_targets_min": 2187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2125506072874494,
|
||
|
|
"grad_norm": 0.14662492214397718,
|
||
|
|
"learning_rate": 1.672727272727273e-05,
|
||
|
|
"loss": 0.1732,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.049816764891147614,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 5407.7,
|
||
|
|
"valid_targets_min": 1155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22773279352226722,
|
||
|
|
"grad_norm": 0.15555536697911404,
|
||
|
|
"learning_rate": 1.7939393939393942e-05,
|
||
|
|
"loss": 0.1752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05831480771303177,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 5577.9,
|
||
|
|
"valid_targets_min": 1172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.242914979757085,
|
||
|
|
"grad_norm": 0.15003625008902433,
|
||
|
|
"learning_rate": 1.9151515151515152e-05,
|
||
|
|
"loss": 0.169,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.052947502583265305,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 6273.1,
|
||
|
|
"valid_targets_min": 1978
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25809716599190285,
|
||
|
|
"grad_norm": 0.1484191753674967,
|
||
|
|
"learning_rate": 2.0363636363636365e-05,
|
||
|
|
"loss": 0.1665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06921609491109848,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 6116.3,
|
||
|
|
"valid_targets_min": 1519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2732793522267207,
|
||
|
|
"grad_norm": 0.16725415591253295,
|
||
|
|
"learning_rate": 2.1575757575757578e-05,
|
||
|
|
"loss": 0.1606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05471062660217285,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 4370.1,
|
||
|
|
"valid_targets_min": 1040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28846153846153844,
|
||
|
|
"grad_norm": 0.15463499334053316,
|
||
|
|
"learning_rate": 2.278787878787879e-05,
|
||
|
|
"loss": 0.1582,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05240866169333458,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 5131.7,
|
||
|
|
"valid_targets_min": 982
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30364372469635625,
|
||
|
|
"grad_norm": 0.14301548261073,
|
||
|
|
"learning_rate": 2.4e-05,
|
||
|
|
"loss": 0.1557,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04320158809423447,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 5694.0,
|
||
|
|
"valid_targets_min": 1121
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3188259109311741,
|
||
|
|
"grad_norm": 0.16164511595169545,
|
||
|
|
"learning_rate": 2.5212121212121214e-05,
|
||
|
|
"loss": 0.1562,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04968992620706558,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 5940.3,
|
||
|
|
"valid_targets_min": 1771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3340080971659919,
|
||
|
|
"grad_norm": 0.25196399601323866,
|
||
|
|
"learning_rate": 2.6424242424242427e-05,
|
||
|
|
"loss": 0.1488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04176684468984604,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 6489.9,
|
||
|
|
"valid_targets_min": 1543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3491902834008097,
|
||
|
|
"grad_norm": 0.14923683707765953,
|
||
|
|
"learning_rate": 2.763636363636364e-05,
|
||
|
|
"loss": 0.145,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04738590121269226,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 6781.3,
|
||
|
|
"valid_targets_min": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3643724696356275,
|
||
|
|
"grad_norm": 0.15818654819578504,
|
||
|
|
"learning_rate": 2.884848484848485e-05,
|
||
|
|
"loss": 0.1508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04790515452623367,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 5334.8,
|
||
|
|
"valid_targets_min": 372
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37955465587044535,
|
||
|
|
"grad_norm": 0.19234264461233105,
|
||
|
|
"learning_rate": 3.0060606060606062e-05,
|
||
|
|
"loss": 0.152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05456281453371048,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 5340.9,
|
||
|
|
"valid_targets_min": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39473684210526316,
|
||
|
|
"grad_norm": 0.16265824731534112,
|
||
|
|
"learning_rate": 3.127272727272728e-05,
|
||
|
|
"loss": 0.143,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04653193801641464,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 5583.2,
|
||
|
|
"valid_targets_min": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.409919028340081,
|
||
|
|
"grad_norm": 0.15571751458809774,
|
||
|
|
"learning_rate": 3.2484848484848485e-05,
|
||
|
|
"loss": 0.1476,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04823809117078781,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 5145.2,
|
||
|
|
"valid_targets_min": 1274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4251012145748988,
|
||
|
|
"grad_norm": 0.16542547088134105,
|
||
|
|
"learning_rate": 3.36969696969697e-05,
|
||
|
|
"loss": 0.1407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.050901204347610474,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 5734.6,
|
||
|
|
"valid_targets_min": 1040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4402834008097166,
|
||
|
|
"grad_norm": 0.1543727010347394,
|
||
|
|
"learning_rate": 3.490909090909091e-05,
|
||
|
|
"loss": 0.1345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.042660776525735855,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 6155.5,
|
||
|
|
"valid_targets_min": 1282
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45546558704453444,
|
||
|
|
"grad_norm": 0.16917853442828049,
|
||
|
|
"learning_rate": 3.6121212121212124e-05,
|
||
|
|
"loss": 0.1378,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03548862040042877,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 5270.2,
|
||
|
|
"valid_targets_min": 1186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4706477732793522,
|
||
|
|
"grad_norm": 0.16323829901889883,
|
||
|
|
"learning_rate": 3.733333333333334e-05,
|
||
|
|
"loss": 0.1337,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0435483492910862,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 5555.4,
|
||
|
|
"valid_targets_min": 1181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48582995951417,
|
||
|
|
"grad_norm": 0.1692106384315938,
|
||
|
|
"learning_rate": 3.854545454545455e-05,
|
||
|
|
"loss": 0.1324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04682815819978714,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 5212.5,
|
||
|
|
"valid_targets_min": 1057
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5010121457489879,
|
||
|
|
"grad_norm": 0.3268896604465774,
|
||
|
|
"learning_rate": 3.9757575757575757e-05,
|
||
|
|
"loss": 0.1295,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03665677458047867,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 5034.9,
|
||
|
|
"valid_targets_min": 1057
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5161943319838057,
|
||
|
|
"grad_norm": 0.1782081910166358,
|
||
|
|
"learning_rate": 3.999928391557286e-05,
|
||
|
|
"loss": 0.1334,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0440300777554512,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 6430.0,
|
||
|
|
"valid_targets_min": 1643
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5313765182186235,
|
||
|
|
"grad_norm": 0.14555082752297552,
|
||
|
|
"learning_rate": 3.999637491047052e-05,
|
||
|
|
"loss": 0.1246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03578227758407593,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 5345.4,
|
||
|
|
"valid_targets_min": 1567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5465587044534413,
|
||
|
|
"grad_norm": 0.18362203345864717,
|
||
|
|
"learning_rate": 3.999122855464813e-05,
|
||
|
|
"loss": 0.127,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03790612518787384,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 4368.3,
|
||
|
|
"valid_targets_min": 984
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5617408906882592,
|
||
|
|
"grad_norm": 0.23418747992901073,
|
||
|
|
"learning_rate": 3.998384542392021e-05,
|
||
|
|
"loss": 0.1256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04290394484996796,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 5182.0,
|
||
|
|
"valid_targets_min": 820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5769230769230769,
|
||
|
|
"grad_norm": 0.20572571033751894,
|
||
|
|
"learning_rate": 3.9974226344369124e-05,
|
||
|
|
"loss": 0.1329,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04855868220329285,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 5835.6,
|
||
|
|
"valid_targets_min": 1171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5921052631578947,
|
||
|
|
"grad_norm": 0.18498650361194152,
|
||
|
|
"learning_rate": 3.996237239225268e-05,
|
||
|
|
"loss": 0.1138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0366818904876709,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 5377.1,
|
||
|
|
"valid_targets_min": 854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6072874493927125,
|
||
|
|
"grad_norm": 0.20044512387262414,
|
||
|
|
"learning_rate": 3.994828489388371e-05,
|
||
|
|
"loss": 0.1151,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.043819550424814224,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 6249.6,
|
||
|
|
"valid_targets_min": 1321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6224696356275303,
|
||
|
|
"grad_norm": 0.1784828455297008,
|
||
|
|
"learning_rate": 3.993196542548162e-05,
|
||
|
|
"loss": 0.1101,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04029553383588791,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 6224.9,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6376518218623481,
|
||
|
|
"grad_norm": 0.20703849729117568,
|
||
|
|
"learning_rate": 3.991341581299609e-05,
|
||
|
|
"loss": 0.1196,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0329214408993721,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 4804.6,
|
||
|
|
"valid_targets_min": 1043
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.652834008097166,
|
||
|
|
"grad_norm": 0.18364337011407494,
|
||
|
|
"learning_rate": 3.9892638131902765e-05,
|
||
|
|
"loss": 0.1176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04511767625808716,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 6160.4,
|
||
|
|
"valid_targets_min": 1384
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6680161943319838,
|
||
|
|
"grad_norm": 0.1558024964622049,
|
||
|
|
"learning_rate": 3.9869634706971e-05,
|
||
|
|
"loss": 0.1198,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03782845661044121,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 5498.9,
|
||
|
|
"valid_targets_min": 1544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6831983805668016,
|
||
|
|
"grad_norm": 0.20914383472969159,
|
||
|
|
"learning_rate": 3.984440811200379e-05,
|
||
|
|
"loss": 0.1078,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03220248222351074,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 5320.8,
|
||
|
|
"valid_targets_min": 1325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6983805668016194,
|
||
|
|
"grad_norm": 0.19517043152035096,
|
||
|
|
"learning_rate": 3.981696116954973e-05,
|
||
|
|
"loss": 0.1062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04534193128347397,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 7207.3,
|
||
|
|
"valid_targets_min": 1196
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7135627530364372,
|
||
|
|
"grad_norm": 0.17619073031217014,
|
||
|
|
"learning_rate": 3.978729695058729e-05,
|
||
|
|
"loss": 0.1046,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.031153306365013123,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 6348.5,
|
||
|
|
"valid_targets_min": 1173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.728744939271255,
|
||
|
|
"grad_norm": 0.18271054183558552,
|
||
|
|
"learning_rate": 3.9755418774181146e-05,
|
||
|
|
"loss": 0.1025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.028445910662412643,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 4876.5,
|
||
|
|
"valid_targets_min": 412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7439271255060729,
|
||
|
|
"grad_norm": 0.16880517618762886,
|
||
|
|
"learning_rate": 3.9721330207110835e-05,
|
||
|
|
"loss": 0.1055,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03436177596449852,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 6114.1,
|
||
|
|
"valid_targets_min": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7591093117408907,
|
||
|
|
"grad_norm": 0.19132484996627824,
|
||
|
|
"learning_rate": 3.9685035063471675e-05,
|
||
|
|
"loss": 0.1029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03607245534658432,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 6417.8,
|
||
|
|
"valid_targets_min": 701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7742914979757085,
|
||
|
|
"grad_norm": 0.15970529463142694,
|
||
|
|
"learning_rate": 3.964653740424804e-05,
|
||
|
|
"loss": 0.0989,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03516208380460739,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 5659.3,
|
||
|
|
"valid_targets_min": 325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7894736842105263,
|
||
|
|
"grad_norm": 0.18483145865781372,
|
||
|
|
"learning_rate": 3.960584153685895e-05,
|
||
|
|
"loss": 0.0966,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02324419468641281,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 4546.3,
|
||
|
|
"valid_targets_min": 393
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8046558704453441,
|
||
|
|
"grad_norm": 0.17613276390833632,
|
||
|
|
"learning_rate": 3.9562952014676116e-05,
|
||
|
|
"loss": 0.0959,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03462664783000946,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 7255.5,
|
||
|
|
"valid_targets_min": 1810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.819838056680162,
|
||
|
|
"grad_norm": 0.42619852991668516,
|
||
|
|
"learning_rate": 3.9517873636514525e-05,
|
||
|
|
"loss": 0.0898,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.027356699109077454,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 5930.0,
|
||
|
|
"valid_targets_min": 1600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8350202429149798,
|
||
|
|
"grad_norm": 0.21179870429303485,
|
||
|
|
"learning_rate": 3.947061144609546e-05,
|
||
|
|
"loss": 0.091,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0356098935008049,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 6128.2,
|
||
|
|
"valid_targets_min": 826
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8502024291497976,
|
||
|
|
"grad_norm": 0.20519933925513575,
|
||
|
|
"learning_rate": 3.942117073148221e-05,
|
||
|
|
"loss": 0.0864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.027698498219251633,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 5906.8,
|
||
|
|
"valid_targets_min": 1226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8653846153846154,
|
||
|
|
"grad_norm": 0.20548545562980808,
|
||
|
|
"learning_rate": 3.9369557024488345e-05,
|
||
|
|
"loss": 0.0845,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02698471024632454,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 6887.4,
|
||
|
|
"valid_targets_min": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8805668016194332,
|
||
|
|
"grad_norm": 0.18811028923789463,
|
||
|
|
"learning_rate": 3.931577610005883e-05,
|
||
|
|
"loss": 0.0868,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03699128329753876,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 5884.6,
|
||
|
|
"valid_targets_min": 1231
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.895748987854251,
|
||
|
|
"grad_norm": 0.17799496962672587,
|
||
|
|
"learning_rate": 3.925983397562385e-05,
|
||
|
|
"loss": 0.0853,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.026425950229167938,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 5361.0,
|
||
|
|
"valid_targets_min": 395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9109311740890689,
|
||
|
|
"grad_norm": 0.18919585189082683,
|
||
|
|
"learning_rate": 3.920173691042554e-05,
|
||
|
|
"loss": 0.0879,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03504861146211624,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 6373.2,
|
||
|
|
"valid_targets_min": 1155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9261133603238867,
|
||
|
|
"grad_norm": 0.14742943229085356,
|
||
|
|
"learning_rate": 3.914149140481766e-05,
|
||
|
|
"loss": 0.0764,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02615347132086754,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 8020.1,
|
||
|
|
"valid_targets_min": 1064
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9412955465587044,
|
||
|
|
"grad_norm": 0.17127932498342496,
|
||
|
|
"learning_rate": 3.9079104199538256e-05,
|
||
|
|
"loss": 0.0792,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.028958020731806755,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 6390.3,
|
||
|
|
"valid_targets_min": 1186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9564777327935222,
|
||
|
|
"grad_norm": 0.1611100779233749,
|
||
|
|
"learning_rate": 3.901458227495549e-05,
|
||
|
|
"loss": 0.0775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.018862418830394745,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 7854.0,
|
||
|
|
"valid_targets_min": 1626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.97165991902834,
|
||
|
|
"grad_norm": 0.1840495847276238,
|
||
|
|
"learning_rate": 3.8947932850286585e-05,
|
||
|
|
"loss": 0.0754,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02737005054950714,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 5870.0,
|
||
|
|
"valid_targets_min": 1117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9868421052631579,
|
||
|
|
"grad_norm": 0.1622918713412052,
|
||
|
|
"learning_rate": 3.887916338279014e-05,
|
||
|
|
"loss": 0.0771,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02394448220729828,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 6428.9,
|
||
|
|
"valid_targets_min": 1226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 0.32449365450924933,
|
||
|
|
"learning_rate": 3.8808281566931675e-05,
|
||
|
|
"loss": 0.0774,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0782683789730072,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 4910.3,
|
||
|
|
"valid_targets_min": 796
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0151821862348178,
|
||
|
|
"grad_norm": 0.20746893200474992,
|
||
|
|
"learning_rate": 3.873529533352277e-05,
|
||
|
|
"loss": 0.0718,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02715224027633667,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 6245.9,
|
||
|
|
"valid_targets_min": 977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0303643724696356,
|
||
|
|
"grad_norm": 0.1943595223782352,
|
||
|
|
"learning_rate": 3.8660212848833705e-05,
|
||
|
|
"loss": 0.0586,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01882760412991047,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 5001.6,
|
||
|
|
"valid_targets_min": 425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0455465587044535,
|
||
|
|
"grad_norm": 0.1742400328237816,
|
||
|
|
"learning_rate": 3.858304251367972e-05,
|
||
|
|
"loss": 0.0606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.021374892443418503,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 7840.0,
|
||
|
|
"valid_targets_min": 633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0607287449392713,
|
||
|
|
"grad_norm": 0.17556822665242897,
|
||
|
|
"learning_rate": 3.850379296248107e-05,
|
||
|
|
"loss": 0.0631,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.019903969019651413,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 5103.5,
|
||
|
|
"valid_targets_min": 395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.075910931174089,
|
||
|
|
"grad_norm": 0.16742225924074075,
|
||
|
|
"learning_rate": 3.8422473062297e-05,
|
||
|
|
"loss": 0.0563,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.014354637823998928,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 6132.1,
|
||
|
|
"valid_targets_min": 1181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.091093117408907,
|
||
|
|
"grad_norm": 0.20021656050383596,
|
||
|
|
"learning_rate": 3.8339091911833545e-05,
|
||
|
|
"loss": 0.0616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01479431428015232,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 5134.5,
|
||
|
|
"valid_targets_min": 437
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1062753036437247,
|
||
|
|
"grad_norm": 0.19054606128340812,
|
||
|
|
"learning_rate": 3.825365884042553e-05,
|
||
|
|
"loss": 0.0553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.017302554100751877,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 5901.2,
|
||
|
|
"valid_targets_min": 961
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1214574898785425,
|
||
|
|
"grad_norm": 0.17630366336918724,
|
||
|
|
"learning_rate": 3.8166183406992745e-05,
|
||
|
|
"loss": 0.0509,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.017461497336626053,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 5865.8,
|
||
|
|
"valid_targets_min": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1366396761133604,
|
||
|
|
"grad_norm": 0.17393703573797326,
|
||
|
|
"learning_rate": 3.807667539897041e-05,
|
||
|
|
"loss": 0.0556,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.016034454107284546,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 5455.8,
|
||
|
|
"valid_targets_min": 697
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1518218623481782,
|
||
|
|
"grad_norm": 0.18171916946177608,
|
||
|
|
"learning_rate": 3.798514483121408e-05,
|
||
|
|
"loss": 0.0542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.017630070447921753,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 4784.2,
|
||
|
|
"valid_targets_min": 1020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.167004048582996,
|
||
|
|
"grad_norm": 0.17545029371192425,
|
||
|
|
"learning_rate": 3.789160194487908e-05,
|
||
|
|
"loss": 0.0512,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.015380222350358963,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 6277.5,
|
||
|
|
"valid_targets_min": 382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1821862348178138,
|
||
|
|
"grad_norm": 0.20217196034333956,
|
||
|
|
"learning_rate": 3.7796057206274686e-05,
|
||
|
|
"loss": 0.0508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.021449625492095947,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 6695.3,
|
||
|
|
"valid_targets_min": 1158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1973684210526316,
|
||
|
|
"grad_norm": 0.18513715753260984,
|
||
|
|
"learning_rate": 3.769852130569304e-05,
|
||
|
|
"loss": 0.0517,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.020637229084968567,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 5614.1,
|
||
|
|
"valid_targets_min": 1155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2125506072874495,
|
||
|
|
"grad_norm": 0.17569296856429573,
|
||
|
|
"learning_rate": 3.7599005156213066e-05,
|
||
|
|
"loss": 0.0455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.013816449791193008,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 5578.5,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2277327935222673,
|
||
|
|
"grad_norm": 0.1819271647947525,
|
||
|
|
"learning_rate": 3.74975198924794e-05,
|
||
|
|
"loss": 0.0483,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.013525603339076042,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 6242.0,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.242914979757085,
|
||
|
|
"grad_norm": 0.1797887261745102,
|
||
|
|
"learning_rate": 3.739407686945658e-05,
|
||
|
|
"loss": 0.051,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.011500559747219086,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 4694.7,
|
||
|
|
"valid_targets_min": 1226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.258097165991903,
|
||
|
|
"grad_norm": 0.1741276215859531,
|
||
|
|
"learning_rate": 3.728868766115854e-05,
|
||
|
|
"loss": 0.046,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.020068110898137093,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 6145.7,
|
||
|
|
"valid_targets_min": 1516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2732793522267207,
|
||
|
|
"grad_norm": 0.18084874852713817,
|
||
|
|
"learning_rate": 3.718136405935365e-05,
|
||
|
|
"loss": 0.0486,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01688222959637642,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 5231.5,
|
||
|
|
"valid_targets_min": 1161
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2884615384615383,
|
||
|
|
"grad_norm": 0.16889751016736837,
|
||
|
|
"learning_rate": 3.707211807224534e-05,
|
||
|
|
"loss": 0.0438,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.014528230763971806,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 6355.8,
|
||
|
|
"valid_targets_min": 1046
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3036437246963564,
|
||
|
|
"grad_norm": 0.1903393788443133,
|
||
|
|
"learning_rate": 3.696096192312852e-05,
|
||
|
|
"loss": 0.0454,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.018855996429920197,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 6559.8,
|
||
|
|
"valid_targets_min": 832
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.318825910931174,
|
||
|
|
"grad_norm": 0.19972898046944965,
|
||
|
|
"learning_rate": 3.684790804902199e-05,
|
||
|
|
"loss": 0.0442,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01779274269938469,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 5856.2,
|
||
|
|
"valid_targets_min": 1771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.334008097165992,
|
||
|
|
"grad_norm": 0.17018811021407995,
|
||
|
|
"learning_rate": 3.673296909927682e-05,
|
||
|
|
"loss": 0.0466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.021237604320049286,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 5553.2,
|
||
|
|
"valid_targets_min": 1358
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3491902834008096,
|
||
|
|
"grad_norm": 0.1951555980157815,
|
||
|
|
"learning_rate": 3.661615793416109e-05,
|
||
|
|
"loss": 0.0485,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01593620888888836,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 5629.2,
|
||
|
|
"valid_targets_min": 942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3643724696356276,
|
||
|
|
"grad_norm": 0.180373652890474,
|
||
|
|
"learning_rate": 3.649748762342098e-05,
|
||
|
|
"loss": 0.0419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01810559071600437,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 5972.8,
|
||
|
|
"valid_targets_min": 1388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3795546558704452,
|
||
|
|
"grad_norm": 0.16479184696255464,
|
||
|
|
"learning_rate": 3.637697144481839e-05,
|
||
|
|
"loss": 0.0424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008586437441408634,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 5653.8,
|
||
|
|
"valid_targets_min": 1347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3947368421052633,
|
||
|
|
"grad_norm": 0.16853080157313605,
|
||
|
|
"learning_rate": 3.625462288264536e-05,
|
||
|
|
"loss": 0.0418,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01979016326367855,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 7305.0,
|
||
|
|
"valid_targets_min": 1092
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4099190283400809,
|
||
|
|
"grad_norm": 0.1714769531014035,
|
||
|
|
"learning_rate": 3.613045562621533e-05,
|
||
|
|
"loss": 0.052,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02096070721745491,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 6730.9,
|
||
|
|
"valid_targets_min": 1059
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.425101214574899,
|
||
|
|
"grad_norm": 0.17480947019323384,
|
||
|
|
"learning_rate": 3.600448356833146e-05,
|
||
|
|
"loss": 0.0421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.013837619684636593,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 7274.6,
|
||
|
|
"valid_targets_min": 1288
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4402834008097165,
|
||
|
|
"grad_norm": 0.1588859981479368,
|
||
|
|
"learning_rate": 3.587672080373219e-05,
|
||
|
|
"loss": 0.0433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.009698700159788132,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 5479.3,
|
||
|
|
"valid_targets_min": 1114
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4554655870445345,
|
||
|
|
"grad_norm": 0.161639453514387,
|
||
|
|
"learning_rate": 3.574718162751426e-05,
|
||
|
|
"loss": 0.0375,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.010496283881366253,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 5544.4,
|
||
|
|
"valid_targets_min": 916
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4706477732793521,
|
||
|
|
"grad_norm": 0.15503382075391162,
|
||
|
|
"learning_rate": 3.561588053353319e-05,
|
||
|
|
"loss": 0.0379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.009971126914024353,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 7162.0,
|
||
|
|
"valid_targets_min": 399
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.48582995951417,
|
||
|
|
"grad_norm": 0.1725961466937425,
|
||
|
|
"learning_rate": 3.5482832212781655e-05,
|
||
|
|
"loss": 0.0402,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.009653078392148018,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 6713.2,
|
||
|
|
"valid_targets_min": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5010121457489878,
|
||
|
|
"grad_norm": 0.19680739325496185,
|
||
|
|
"learning_rate": 3.53480515517457e-05,
|
||
|
|
"loss": 0.0379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0158684104681015,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 6155.9,
|
||
|
|
"valid_targets_min": 888
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5161943319838058,
|
||
|
|
"grad_norm": 0.17098248453785886,
|
||
|
|
"learning_rate": 3.5211553630739166e-05,
|
||
|
|
"loss": 0.0358,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.012387385591864586,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 6459.1,
|
||
|
|
"valid_targets_min": 1212
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5313765182186234,
|
||
|
|
"grad_norm": 0.17212144323378897,
|
||
|
|
"learning_rate": 3.5073353722216334e-05,
|
||
|
|
"loss": 0.0367,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.015653623268008232,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 5349.3,
|
||
|
|
"valid_targets_min": 1249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5465587044534415,
|
||
|
|
"grad_norm": 0.15613275126727846,
|
||
|
|
"learning_rate": 3.4933467289063156e-05,
|
||
|
|
"loss": 0.0334,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.010056696832180023,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 5716.3,
|
||
|
|
"valid_targets_min": 1089
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.561740890688259,
|
||
|
|
"grad_norm": 0.17516245854055706,
|
||
|
|
"learning_rate": 3.4791909982867175e-05,
|
||
|
|
"loss": 0.0339,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01214912161231041,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 5831.1,
|
||
|
|
"valid_targets_min": 561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5769230769230769,
|
||
|
|
"grad_norm": 0.19105559378665027,
|
||
|
|
"learning_rate": 3.464869764216622e-05,
|
||
|
|
"loss": 0.0327,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.012154040858149529,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 5496.7,
|
||
|
|
"valid_targets_min": 1195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5921052631578947,
|
||
|
|
"grad_norm": 0.17250800846906586,
|
||
|
|
"learning_rate": 3.450384629067635e-05,
|
||
|
|
"loss": 0.031,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008524593897163868,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 5553.9,
|
||
|
|
"valid_targets_min": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6072874493927125,
|
||
|
|
"grad_norm": 0.1717299949148825,
|
||
|
|
"learning_rate": 3.435737213549896e-05,
|
||
|
|
"loss": 0.0415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.031826041638851166,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 5484.1,
|
||
|
|
"valid_targets_min": 577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6224696356275303,
|
||
|
|
"grad_norm": 0.179799500085348,
|
||
|
|
"learning_rate": 3.420929156530738e-05,
|
||
|
|
"loss": 0.0319,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.009917274117469788,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 7064.2,
|
||
|
|
"valid_targets_min": 923
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6376518218623481,
|
||
|
|
"grad_norm": 0.15871774599737085,
|
||
|
|
"learning_rate": 3.405962114851324e-05,
|
||
|
|
"loss": 0.035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.010038254782557487,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 7041.0,
|
||
|
|
"valid_targets_min": 436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.652834008097166,
|
||
|
|
"grad_norm": 0.1794875978731234,
|
||
|
|
"learning_rate": 3.390837763141261e-05,
|
||
|
|
"loss": 0.0342,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00696304626762867,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 6311.3,
|
||
|
|
"valid_targets_min": 1669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6680161943319838,
|
||
|
|
"grad_norm": 0.1550391073555312,
|
||
|
|
"learning_rate": 3.3755577936312344e-05,
|
||
|
|
"loss": 0.0346,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00631769048050046,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 4612.6,
|
||
|
|
"valid_targets_min": 448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6831983805668016,
|
||
|
|
"grad_norm": 0.16850242447990393,
|
||
|
|
"learning_rate": 3.360123915963662e-05,
|
||
|
|
"loss": 0.035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00742865027859807,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 6497.4,
|
||
|
|
"valid_targets_min": 1414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6983805668016194,
|
||
|
|
"grad_norm": 0.16905370140192488,
|
||
|
|
"learning_rate": 3.3445378570014125e-05,
|
||
|
|
"loss": 0.0314,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.011094050481915474,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 5412.4,
|
||
|
|
"valid_targets_min": 1495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7135627530364372,
|
||
|
|
"grad_norm": 0.15956039481269715,
|
||
|
|
"learning_rate": 3.328801360634585e-05,
|
||
|
|
"loss": 0.0334,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005547087173908949,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 4378.1,
|
||
|
|
"valid_targets_min": 1302
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.728744939271255,
|
||
|
|
"grad_norm": 0.16589513640263132,
|
||
|
|
"learning_rate": 3.312916187585392e-05,
|
||
|
|
"loss": 0.0328,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.010751340538263321,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 5506.6,
|
||
|
|
"valid_targets_min": 1233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7439271255060729,
|
||
|
|
"grad_norm": 0.1744055632321872,
|
||
|
|
"learning_rate": 3.296884115211157e-05,
|
||
|
|
"loss": 0.0299,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.006707729306071997,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 5298.9,
|
||
|
|
"valid_targets_min": 1420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7591093117408907,
|
||
|
|
"grad_norm": 0.17696528943812867,
|
||
|
|
"learning_rate": 3.280706937305445e-05,
|
||
|
|
"loss": 0.0265,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.009326794184744358,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 5770.2,
|
||
|
|
"valid_targets_min": 1414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7742914979757085,
|
||
|
|
"grad_norm": 0.15958457014375885,
|
||
|
|
"learning_rate": 3.2643864638973645e-05,
|
||
|
|
"loss": 0.0371,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0119598638266325,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 6624.9,
|
||
|
|
"valid_targets_min": 1202
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7894736842105263,
|
||
|
|
"grad_norm": 0.1769988360539718,
|
||
|
|
"learning_rate": 3.2479245210490434e-05,
|
||
|
|
"loss": 0.0281,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.011356104165315628,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 6446.4,
|
||
|
|
"valid_targets_min": 1407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8046558704453441,
|
||
|
|
"grad_norm": 0.16194592137927077,
|
||
|
|
"learning_rate": 3.2313229506513167e-05,
|
||
|
|
"loss": 0.0285,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00683578522875905,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 5801.3,
|
||
|
|
"valid_targets_min": 1246
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.819838056680162,
|
||
|
|
"grad_norm": 0.17822073447265535,
|
||
|
|
"learning_rate": 3.2145836102176424e-05,
|
||
|
|
"loss": 0.0279,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.007592367008328438,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 6117.9,
|
||
|
|
"valid_targets_min": 653
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8350202429149798,
|
||
|
|
"grad_norm": 0.17577937924822495,
|
||
|
|
"learning_rate": 3.197708372676265e-05,
|
||
|
|
"loss": 0.0307,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.013171358034014702,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 6056.2,
|
||
|
|
"valid_targets_min": 1345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8502024291497976,
|
||
|
|
"grad_norm": 0.189351919140906,
|
||
|
|
"learning_rate": 3.1806991261606604e-05,
|
||
|
|
"loss": 0.0291,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008558647707104683,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 6072.7,
|
||
|
|
"valid_targets_min": 395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8653846153846154,
|
||
|
|
"grad_norm": 0.1765130899494387,
|
||
|
|
"learning_rate": 3.163557773798276e-05,
|
||
|
|
"loss": 0.0304,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.01081151608377695,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 6111.1,
|
||
|
|
"valid_targets_min": 1064
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8805668016194332,
|
||
|
|
"grad_norm": 0.1887454294616824,
|
||
|
|
"learning_rate": 3.146286233497593e-05,
|
||
|
|
"loss": 0.0276,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.009745375253260136,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 4363.2,
|
||
|
|
"valid_targets_min": 387
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.895748987854251,
|
||
|
|
"grad_norm": 0.17589978066244452,
|
||
|
|
"learning_rate": 3.128886437733539e-05,
|
||
|
|
"loss": 0.0249,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.007151344791054726,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 5897.3,
|
||
|
|
"valid_targets_min": 1996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9109311740890689,
|
||
|
|
"grad_norm": 0.16909428003654398,
|
||
|
|
"learning_rate": 3.111360333331263e-05,
|
||
|
|
"loss": 0.0237,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0048427945002913475,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 5007.2,
|
||
|
|
"valid_targets_min": 1116
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9261133603238867,
|
||
|
|
"grad_norm": 0.16402341969524917,
|
||
|
|
"learning_rate": 3.093709881248312e-05,
|
||
|
|
"loss": 0.0261,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.010756228119134903,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 6274.2,
|
||
|
|
"valid_targets_min": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9412955465587043,
|
||
|
|
"grad_norm": 0.16227058756454754,
|
||
|
|
"learning_rate": 3.075937056355225e-05,
|
||
|
|
"loss": 0.0236,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.011032762937247753,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 5846.4,
|
||
|
|
"valid_targets_min": 355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9564777327935223,
|
||
|
|
"grad_norm": 0.17153068653715034,
|
||
|
|
"learning_rate": 3.0580438472145665e-05,
|
||
|
|
"loss": 0.0217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.006100079044699669,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 5265.3,
|
||
|
|
"valid_targets_min": 692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.97165991902834,
|
||
|
|
"grad_norm": 0.15694835838400872,
|
||
|
|
"learning_rate": 3.0400322558584308e-05,
|
||
|
|
"loss": 0.0254,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.004396181087940931,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 5833.3,
|
||
|
|
"valid_targets_min": 921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.986842105263158,
|
||
|
|
"grad_norm": 0.15980764582016754,
|
||
|
|
"learning_rate": 3.0219042975644415e-05,
|
||
|
|
"loss": 0.0226,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.006331868004053831,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 6124.8,
|
||
|
|
"valid_targets_min": 884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 0.27431045014629224,
|
||
|
|
"learning_rate": 3.0036620006302624e-05,
|
||
|
|
"loss": 0.0216,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.02224479243159294,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 5601.6,
|
||
|
|
"valid_targets_min": 1076
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0151821862348176,
|
||
|
|
"grad_norm": 0.18008079720274842,
|
||
|
|
"learning_rate": 2.9853074061466602e-05,
|
||
|
|
"loss": 0.0148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.003647482953965664,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 4510.8,
|
||
|
|
"valid_targets_min": 372
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0303643724696356,
|
||
|
|
"grad_norm": 0.16950595950561143,
|
||
|
|
"learning_rate": 2.9668425677691278e-05,
|
||
|
|
"loss": 0.0186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.007231104653328657,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 6048.3,
|
||
|
|
"valid_targets_min": 1468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0455465587044532,
|
||
|
|
"grad_norm": 0.16358525483049902,
|
||
|
|
"learning_rate": 2.948269551488108e-05,
|
||
|
|
"loss": 0.0183,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.004514344036579132,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 5153.2,
|
||
|
|
"valid_targets_min": 1176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0607287449392713,
|
||
|
|
"grad_norm": 0.1593999835366038,
|
||
|
|
"learning_rate": 2.929590435397832e-05,
|
||
|
|
"loss": 0.0128,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.003227936802431941,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 4672.3,
|
||
|
|
"valid_targets_min": 1158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.075910931174089,
|
||
|
|
"grad_norm": 0.1380695703151424,
|
||
|
|
"learning_rate": 2.9108073094638066e-05,
|
||
|
|
"loss": 0.0124,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.007050475105643272,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 6611.9,
|
||
|
|
"valid_targets_min": 416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.091093117408907,
|
||
|
|
"grad_norm": 0.15120020386940933,
|
||
|
|
"learning_rate": 2.8919222752889727e-05,
|
||
|
|
"loss": 0.0155,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00420386390760541,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 5157.1,
|
||
|
|
"valid_targets_min": 1092
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1062753036437245,
|
||
|
|
"grad_norm": 0.142832386456704,
|
||
|
|
"learning_rate": 2.8729374458785647e-05,
|
||
|
|
"loss": 0.0212,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0038718217983841896,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 6239.8,
|
||
|
|
"valid_targets_min": 947
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1214574898785425,
|
||
|
|
"grad_norm": 0.1377475247478908,
|
||
|
|
"learning_rate": 2.8538549454036838e-05,
|
||
|
|
"loss": 0.0146,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005288434214890003,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 6355.2,
|
||
|
|
"valid_targets_min": 1231
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.13663967611336,
|
||
|
|
"grad_norm": 0.15104543128314743,
|
||
|
|
"learning_rate": 2.834676908963636e-05,
|
||
|
|
"loss": 0.0131,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0031184745021164417,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 4491.0,
|
||
|
|
"valid_targets_min": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.151821862348178,
|
||
|
|
"grad_norm": 0.14038989161016746,
|
||
|
|
"learning_rate": 2.815405482347037e-05,
|
||
|
|
"loss": 0.0131,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.008421801030635834,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 6889.0,
|
||
|
|
"valid_targets_min": 2031
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.167004048582996,
|
||
|
|
"grad_norm": 0.15798640698682528,
|
||
|
|
"learning_rate": 2.796042821791725e-05,
|
||
|
|
"loss": 0.0131,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005701111629605293,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 8192.0,
|
||
|
|
"valid_targets_min": 1439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.182186234817814,
|
||
|
|
"grad_norm": 0.13009340565031757,
|
||
|
|
"learning_rate": 2.776591093743505e-05,
|
||
|
|
"loss": 0.0192,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002326598856598139,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 6049.4,
|
||
|
|
"valid_targets_min": 412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1973684210526314,
|
||
|
|
"grad_norm": 0.14208658643553557,
|
||
|
|
"learning_rate": 2.7570524746137485e-05,
|
||
|
|
"loss": 0.0114,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0028066979721188545,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 4216.1,
|
||
|
|
"valid_targets_min": 983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2125506072874495,
|
||
|
|
"grad_norm": 0.14003401479048846,
|
||
|
|
"learning_rate": 2.7374291505358818e-05,
|
||
|
|
"loss": 0.017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0040283650159835815,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 6767.3,
|
||
|
|
"valid_targets_min": 1798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.227732793522267,
|
||
|
|
"grad_norm": 0.1404844207738646,
|
||
|
|
"learning_rate": 2.7177233171207817e-05,
|
||
|
|
"loss": 0.0129,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.004482706543058157,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 5788.1,
|
||
|
|
"valid_targets_min": 1172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.242914979757085,
|
||
|
|
"grad_norm": 0.14543072160206105,
|
||
|
|
"learning_rate": 2.6979371792111147e-05,
|
||
|
|
"loss": 0.013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0030938778072595596,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 5783.0,
|
||
|
|
"valid_targets_min": 998
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2580971659919027,
|
||
|
|
"grad_norm": 0.15031997200997996,
|
||
|
|
"learning_rate": 2.678072950634641e-05,
|
||
|
|
"loss": 0.0155,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.004818389657884836,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 5858.6,
|
||
|
|
"valid_targets_min": 653
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2732793522267207,
|
||
|
|
"grad_norm": 0.1474155672088557,
|
||
|
|
"learning_rate": 2.6581328539565184e-05,
|
||
|
|
"loss": 0.0105,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002948472509160638,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 5001.7,
|
||
|
|
"valid_targets_min": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2884615384615383,
|
||
|
|
"grad_norm": 0.15651905545877035,
|
||
|
|
"learning_rate": 2.638119120230616e-05,
|
||
|
|
"loss": 0.0134,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.003472162876278162,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 4897.8,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3036437246963564,
|
||
|
|
"grad_norm": 0.1489685695433526,
|
||
|
|
"learning_rate": 2.618033988749895e-05,
|
||
|
|
"loss": 0.013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005241721868515015,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 5184.1,
|
||
|
|
"valid_targets_min": 1225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.318825910931174,
|
||
|
|
"grad_norm": 0.14394874153264994,
|
||
|
|
"learning_rate": 2.5978797067958542e-05,
|
||
|
|
"loss": 0.0112,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005601848475635052,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 7270.8,
|
||
|
|
"valid_targets_min": 1516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.334008097165992,
|
||
|
|
"grad_norm": 0.14641494561745882,
|
||
|
|
"learning_rate": 2.5776585293870877e-05,
|
||
|
|
"loss": 0.0107,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.003249663859605789,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 6307.8,
|
||
|
|
"valid_targets_min": 492
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3491902834008096,
|
||
|
|
"grad_norm": 0.13606809371049533,
|
||
|
|
"learning_rate": 2.557372719026976e-05,
|
||
|
|
"loss": 0.0127,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001904345117509365,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 5415.7,
|
||
|
|
"valid_targets_min": 1660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3643724696356276,
|
||
|
|
"grad_norm": 0.14688794912709033,
|
||
|
|
"learning_rate": 2.537024545450539e-05,
|
||
|
|
"loss": 0.0133,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.006051709875464439,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 6052.3,
|
||
|
|
"valid_targets_min": 1193
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3795546558704452,
|
||
|
|
"grad_norm": 0.1410564249211947,
|
||
|
|
"learning_rate": 2.5166162853704825e-05,
|
||
|
|
"loss": 0.0133,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.007399308495223522,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 6521.0,
|
||
|
|
"valid_targets_min": 1629
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3947368421052633,
|
||
|
|
"grad_norm": 0.13759684286966475,
|
||
|
|
"learning_rate": 2.496150222222458e-05,
|
||
|
|
"loss": 0.0119,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0028168156277388334,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 5283.4,
|
||
|
|
"valid_targets_min": 1152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.409919028340081,
|
||
|
|
"grad_norm": 0.146148305747386,
|
||
|
|
"learning_rate": 2.475628645909576e-05,
|
||
|
|
"loss": 0.0141,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00658248458057642,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 6796.8,
|
||
|
|
"valid_targets_min": 1208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.425101214574899,
|
||
|
|
"grad_norm": 0.1276669258915863,
|
||
|
|
"learning_rate": 2.4550538525461963e-05,
|
||
|
|
"loss": 0.0123,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005004022270441055,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 5959.2,
|
||
|
|
"valid_targets_min": 1689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4402834008097165,
|
||
|
|
"grad_norm": 0.15639817230583197,
|
||
|
|
"learning_rate": 2.434428144201016e-05,
|
||
|
|
"loss": 0.012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0051465933211147785,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 5432.3,
|
||
|
|
"valid_targets_min": 1064
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4554655870445345,
|
||
|
|
"grad_norm": 0.12723540480056106,
|
||
|
|
"learning_rate": 2.4137538286394976e-05,
|
||
|
|
"loss": 0.0118,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0057717300951480865,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 8181.6,
|
||
|
|
"valid_targets_min": 1615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.470647773279352,
|
||
|
|
"grad_norm": 0.11404355239410903,
|
||
|
|
"learning_rate": 2.3930332190656604e-05,
|
||
|
|
"loss": 0.0129,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.012553590349853039,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 8294.0,
|
||
|
|
"valid_targets_min": 2134
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.48582995951417,
|
||
|
|
"grad_norm": 0.12081352714937799,
|
||
|
|
"learning_rate": 2.3722686338632602e-05,
|
||
|
|
"loss": 0.012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0045616645365953445,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 6857.9,
|
||
|
|
"valid_targets_min": 672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.501012145748988,
|
||
|
|
"grad_norm": 0.12829629760213093,
|
||
|
|
"learning_rate": 2.3514623963363886e-05,
|
||
|
|
"loss": 0.0138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0032292886171489954,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 6975.8,
|
||
|
|
"valid_targets_min": 1095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.516194331983806,
|
||
|
|
"grad_norm": 0.13744925270446964,
|
||
|
|
"learning_rate": 2.330616834449525e-05,
|
||
|
|
"loss": 0.0123,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0020485175773501396,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 5187.1,
|
||
|
|
"valid_targets_min": 916
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5313765182186234,
|
||
|
|
"grad_norm": 0.12416127230021508,
|
||
|
|
"learning_rate": 2.309734280567065e-05,
|
||
|
|
"loss": 0.0114,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0024628550745546818,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 5624.4,
|
||
|
|
"valid_targets_min": 412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5465587044534415,
|
||
|
|
"grad_norm": 0.12170092359902011,
|
||
|
|
"learning_rate": 2.28881707119236e-05,
|
||
|
|
"loss": 0.0099,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0037577205803245306,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 6274.0,
|
||
|
|
"valid_targets_min": 846
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.561740890688259,
|
||
|
|
"grad_norm": 0.1263438950688655,
|
||
|
|
"learning_rate": 2.267867546706287e-05,
|
||
|
|
"loss": 0.0114,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.004231428727507591,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 6037.4,
|
||
|
|
"valid_targets_min": 1107
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5769230769230766,
|
||
|
|
"grad_norm": 0.1245576799383021,
|
||
|
|
"learning_rate": 2.2468880511053896e-05,
|
||
|
|
"loss": 0.0087,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0017101940466091037,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 4867.1,
|
||
|
|
"valid_targets_min": 1523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5921052631578947,
|
||
|
|
"grad_norm": 0.12519745938970292,
|
||
|
|
"learning_rate": 2.2258809317396163e-05,
|
||
|
|
"loss": 0.0115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.005295773968100548,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 7008.8,
|
||
|
|
"valid_targets_min": 416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6072874493927127,
|
||
|
|
"grad_norm": 0.14296631760190703,
|
||
|
|
"learning_rate": 2.2048485390496757e-05,
|
||
|
|
"loss": 0.0126,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.004541776143014431,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 5919.8,
|
||
|
|
"valid_targets_min": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6224696356275303,
|
||
|
|
"grad_norm": 0.13592520238173653,
|
||
|
|
"learning_rate": 2.1837932263040553e-05,
|
||
|
|
"loss": 0.009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0023296151775866747,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 4947.3,
|
||
|
|
"valid_targets_min": 1150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.637651821862348,
|
||
|
|
"grad_norm": 0.1183802153348962,
|
||
|
|
"learning_rate": 2.1627173493357167e-05,
|
||
|
|
"loss": 0.0089,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001233824877999723,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 7681.5,
|
||
|
|
"valid_targets_min": 1849
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.652834008097166,
|
||
|
|
"grad_norm": 0.146011801149195,
|
||
|
|
"learning_rate": 2.1416232662785084e-05,
|
||
|
|
"loss": 0.0093,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002828218974173069,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 5370.2,
|
||
|
|
"valid_targets_min": 1319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.668016194331984,
|
||
|
|
"grad_norm": 0.12616997162436008,
|
||
|
|
"learning_rate": 2.1205133373033173e-05,
|
||
|
|
"loss": 0.0087,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0021544168703258038,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 5350.7,
|
||
|
|
"valid_targets_min": 1400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6831983805668016,
|
||
|
|
"grad_norm": 0.12828277280789904,
|
||
|
|
"learning_rate": 2.0993899243539953e-05,
|
||
|
|
"loss": 0.0095,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0030619329772889614,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 5140.3,
|
||
|
|
"valid_targets_min": 1205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.698380566801619,
|
||
|
|
"grad_norm": 0.12448517529183224,
|
||
|
|
"learning_rate": 2.0782553908830887e-05,
|
||
|
|
"loss": 0.0101,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00264772679656744,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 7220.8,
|
||
|
|
"valid_targets_min": 726
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7135627530364372,
|
||
|
|
"grad_norm": 0.13018465983618735,
|
||
|
|
"learning_rate": 2.0571121015873924e-05,
|
||
|
|
"loss": 0.0096,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001603921758942306,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 5471.2,
|
||
|
|
"valid_targets_min": 1121
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7287449392712553,
|
||
|
|
"grad_norm": 0.12403998240327094,
|
||
|
|
"learning_rate": 2.0359624221433728e-05,
|
||
|
|
"loss": 0.0087,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.004858669824898243,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 7486.9,
|
||
|
|
"valid_targets_min": 1486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.743927125506073,
|
||
|
|
"grad_norm": 0.12878060438832242,
|
||
|
|
"learning_rate": 2.014808718942476e-05,
|
||
|
|
"loss": 0.0097,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0026276602875441313,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 5077.5,
|
||
|
|
"valid_targets_min": 1271
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7591093117408905,
|
||
|
|
"grad_norm": 0.1318362046529001,
|
||
|
|
"learning_rate": 1.9936533588263557e-05,
|
||
|
|
"loss": 0.0076,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0019232281483709812,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 4782.8,
|
||
|
|
"valid_targets_min": 1046
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7742914979757085,
|
||
|
|
"grad_norm": 0.13400217889661628,
|
||
|
|
"learning_rate": 1.9724987088220565e-05,
|
||
|
|
"loss": 0.0088,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0034655483905225992,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 5774.3,
|
||
|
|
"valid_targets_min": 1068
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7894736842105265,
|
||
|
|
"grad_norm": 0.14052177532469134,
|
||
|
|
"learning_rate": 1.951347135877169e-05,
|
||
|
|
"loss": 0.0134,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0036318274214863777,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 5540.0,
|
||
|
|
"valid_targets_min": 771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.804655870445344,
|
||
|
|
"grad_norm": 0.12406832197918945,
|
||
|
|
"learning_rate": 1.930201006594999e-05,
|
||
|
|
"loss": 0.008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00445646233856678,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 4831.8,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8198380566801617,
|
||
|
|
"grad_norm": 0.12308808187111273,
|
||
|
|
"learning_rate": 1.9090626869697714e-05,
|
||
|
|
"loss": 0.0086,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002711558947339654,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 5135.3,
|
||
|
|
"valid_targets_min": 1158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.83502024291498,
|
||
|
|
"grad_norm": 0.12076313343088467,
|
||
|
|
"learning_rate": 1.8879345421219063e-05,
|
||
|
|
"loss": 0.0068,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002742330078035593,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 5565.3,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.850202429149798,
|
||
|
|
"grad_norm": 0.11363956368138457,
|
||
|
|
"learning_rate": 1.8668189360333923e-05,
|
||
|
|
"loss": 0.0082,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0011241225292906165,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 4582.2,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8653846153846154,
|
||
|
|
"grad_norm": 0.1408683722620157,
|
||
|
|
"learning_rate": 1.845718231283281e-05,
|
||
|
|
"loss": 0.0116,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0036686803214251995,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 5548.2,
|
||
|
|
"valid_targets_min": 1495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.880566801619433,
|
||
|
|
"grad_norm": 0.11406554429413167,
|
||
|
|
"learning_rate": 1.8246347887833457e-05,
|
||
|
|
"loss": 0.0061,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0016177756479009986,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 5573.5,
|
||
|
|
"valid_targets_min": 1325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.895748987854251,
|
||
|
|
"grad_norm": 0.10960387099674025,
|
||
|
|
"learning_rate": 1.8035709675139258e-05,
|
||
|
|
"loss": 0.0071,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0030507217161357403,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 5650.0,
|
||
|
|
"valid_targets_min": 1500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.910931174089069,
|
||
|
|
"grad_norm": 0.1313062947338427,
|
||
|
|
"learning_rate": 1.7825291242599837e-05,
|
||
|
|
"loss": 0.0102,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.003934426233172417,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 6043.7,
|
||
|
|
"valid_targets_min": 1059
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9261133603238867,
|
||
|
|
"grad_norm": 0.11005279182882197,
|
||
|
|
"learning_rate": 1.7615116133474084e-05,
|
||
|
|
"loss": 0.0078,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0014721793122589588,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 6052.4,
|
||
|
|
"valid_targets_min": 909
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9412955465587043,
|
||
|
|
"grad_norm": 0.11020025200867281,
|
||
|
|
"learning_rate": 1.7405207863795966e-05,
|
||
|
|
"loss": 0.0058,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0014757635071873665,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 4909.2,
|
||
|
|
"valid_targets_min": 1062
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9564777327935223,
|
||
|
|
"grad_norm": 0.10805582468251514,
|
||
|
|
"learning_rate": 1.719558991974339e-05,
|
||
|
|
"loss": 0.0099,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002586258575320244,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 6932.2,
|
||
|
|
"valid_targets_min": 1186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.97165991902834,
|
||
|
|
"grad_norm": 0.1265774000941838,
|
||
|
|
"learning_rate": 1.698628575501034e-05,
|
||
|
|
"loss": 0.008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0020268792286515236,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 5083.2,
|
||
|
|
"valid_targets_min": 961
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.986842105263158,
|
||
|
|
"grad_norm": 0.1111504130944611,
|
||
|
|
"learning_rate": 1.6777318788182723e-05,
|
||
|
|
"loss": 0.0067,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.006772821769118309,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 6128.4,
|
||
|
|
"valid_targets_min": 1046
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"grad_norm": 0.167104684966192,
|
||
|
|
"learning_rate": 1.6568712400118102e-05,
|
||
|
|
"loss": 0.0058,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002753224689513445,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 5531.0,
|
||
|
|
"valid_targets_min": 733
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0151821862348176,
|
||
|
|
"grad_norm": 0.09553905678445854,
|
||
|
|
"learning_rate": 1.636048993132969e-05,
|
||
|
|
"loss": 0.0073,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000724322278983891,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 5769.0,
|
||
|
|
"valid_targets_min": 2204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0303643724696356,
|
||
|
|
"grad_norm": 0.10240447291611517,
|
||
|
|
"learning_rate": 1.615267467937479e-05,
|
||
|
|
"loss": 0.0046,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0008562590228393674,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 6404.2,
|
||
|
|
"valid_targets_min": 906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0455465587044532,
|
||
|
|
"grad_norm": 0.09468266846983169,
|
||
|
|
"learning_rate": 1.59452898962481e-05,
|
||
|
|
"loss": 0.0062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001599764684215188,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 5183.8,
|
||
|
|
"valid_targets_min": 1185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0607287449392713,
|
||
|
|
"grad_norm": 0.09739535591833388,
|
||
|
|
"learning_rate": 1.573835878578013e-05,
|
||
|
|
"loss": 0.0033,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000953693815972656,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 6161.1,
|
||
|
|
"valid_targets_min": 961
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.075910931174089,
|
||
|
|
"grad_norm": 0.11180915656594645,
|
||
|
|
"learning_rate": 1.5531904501040917e-05,
|
||
|
|
"loss": 0.0037,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0008014809573069215,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 5151.8,
|
||
|
|
"valid_targets_min": 993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.091093117408907,
|
||
|
|
"grad_norm": 0.09727687969118004,
|
||
|
|
"learning_rate": 1.5325950141749522e-05,
|
||
|
|
"loss": 0.0042,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001928561832755804,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 5867.9,
|
||
|
|
"valid_targets_min": 1222
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1062753036437245,
|
||
|
|
"grad_norm": 0.09494041364817436,
|
||
|
|
"learning_rate": 1.5120518751689438e-05,
|
||
|
|
"loss": 0.0049,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0030567715875804424,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 5811.8,
|
||
|
|
"valid_targets_min": 1531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1214574898785425,
|
||
|
|
"grad_norm": 0.11261942531123377,
|
||
|
|
"learning_rate": 1.4915633316130267e-05,
|
||
|
|
"loss": 0.0056,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.011703863739967346,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 6078.9,
|
||
|
|
"valid_targets_min": 961
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.13663967611336,
|
||
|
|
"grad_norm": 0.1005072175870218,
|
||
|
|
"learning_rate": 1.4711316759255963e-05,
|
||
|
|
"loss": 0.0036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007407825905829668,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 5989.4,
|
||
|
|
"valid_targets_min": 372
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.151821862348178,
|
||
|
|
"grad_norm": 0.0897070631791768,
|
||
|
|
"learning_rate": 1.450759194159987e-05,
|
||
|
|
"loss": 0.0038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001739290077239275,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 5473.2,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.167004048582996,
|
||
|
|
"grad_norm": 0.09128564039769516,
|
||
|
|
"learning_rate": 1.4304481657486955e-05,
|
||
|
|
"loss": 0.0036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0015778269153088331,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 5053.5,
|
||
|
|
"valid_targets_min": 916
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.182186234817814,
|
||
|
|
"grad_norm": 0.10717444490017926,
|
||
|
|
"learning_rate": 1.4102008632483344e-05,
|
||
|
|
"loss": 0.0035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007246138993650675,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 4916.7,
|
||
|
|
"valid_targets_min": 692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1973684210526314,
|
||
|
|
"grad_norm": 0.10375123766311543,
|
||
|
|
"learning_rate": 1.3900195520853628e-05,
|
||
|
|
"loss": 0.0035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0013751237420365214,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 6373.6,
|
||
|
|
"valid_targets_min": 1948
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2125506072874495,
|
||
|
|
"grad_norm": 0.09149527268094115,
|
||
|
|
"learning_rate": 1.3699064903026149e-05,
|
||
|
|
"loss": 0.0069,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0019966168329119682,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 6287.4,
|
||
|
|
"valid_targets_min": 1089
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.227732793522267,
|
||
|
|
"grad_norm": 0.11863359183645682,
|
||
|
|
"learning_rate": 1.34986392830665e-05,
|
||
|
|
"loss": 0.0071,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0016580638475716114,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 5563.2,
|
||
|
|
"valid_targets_min": 854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.242914979757085,
|
||
|
|
"grad_norm": 0.0855112212625228,
|
||
|
|
"learning_rate": 1.3298941086159598e-05,
|
||
|
|
"loss": 0.0069,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004933560267090797,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 6606.3,
|
||
|
|
"valid_targets_min": 1186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2580971659919027,
|
||
|
|
"grad_norm": 0.09570561492253427,
|
||
|
|
"learning_rate": 1.3099992656100592e-05,
|
||
|
|
"loss": 0.0032,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00117517972830683,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 5255.0,
|
||
|
|
"valid_targets_min": 1325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2732793522267207,
|
||
|
|
"grad_norm": 0.0909562430234719,
|
||
|
|
"learning_rate": 1.2901816252794848e-05,
|
||
|
|
"loss": 0.0037,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0012113149277865887,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 5675.1,
|
||
|
|
"valid_targets_min": 1231
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2884615384615383,
|
||
|
|
"grad_norm": 0.08189530486902238,
|
||
|
|
"learning_rate": 1.2704434049767356e-05,
|
||
|
|
"loss": 0.0035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010620395187288523,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 5676.3,
|
||
|
|
"valid_targets_min": 1152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3036437246963564,
|
||
|
|
"grad_norm": 0.08948614953890663,
|
||
|
|
"learning_rate": 1.250786813168176e-05,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001292562810704112,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 6668.9,
|
||
|
|
"valid_targets_min": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.318825910931174,
|
||
|
|
"grad_norm": 0.09299479244515868,
|
||
|
|
"learning_rate": 1.2312140491869369e-05,
|
||
|
|
"loss": 0.0035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010404742788523436,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 5615.0,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.334008097165992,
|
||
|
|
"grad_norm": 0.08869720551088744,
|
||
|
|
"learning_rate": 1.2117273029868362e-05,
|
||
|
|
"loss": 0.004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00047623467980884016,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 6315.7,
|
||
|
|
"valid_targets_min": 1252
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3491902834008096,
|
||
|
|
"grad_norm": 0.08966722709716235,
|
||
|
|
"learning_rate": 1.1923287548973508e-05,
|
||
|
|
"loss": 0.0031,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0015258332714438438,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 4745.8,
|
||
|
|
"valid_targets_min": 1366
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3643724696356276,
|
||
|
|
"grad_norm": 0.0914871818454804,
|
||
|
|
"learning_rate": 1.1730205753796631e-05,
|
||
|
|
"loss": 0.0042,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005572251975536346,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 5411.1,
|
||
|
|
"valid_targets_min": 1057
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3795546558704452,
|
||
|
|
"grad_norm": 0.0991268632009275,
|
||
|
|
"learning_rate": 1.1538049247838128e-05,
|
||
|
|
"loss": 0.0034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0009232150041498244,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 7529.5,
|
||
|
|
"valid_targets_min": 1068
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3947368421052633,
|
||
|
|
"grad_norm": 0.06942077265753631,
|
||
|
|
"learning_rate": 1.134683953106983e-05,
|
||
|
|
"loss": 0.0026,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00040978423203341663,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 5348.5,
|
||
|
|
"valid_targets_min": 1222
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.409919028340081,
|
||
|
|
"grad_norm": 0.08938090828584724,
|
||
|
|
"learning_rate": 1.115659799752938e-05,
|
||
|
|
"loss": 0.0034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0008477874216623604,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 4953.0,
|
||
|
|
"valid_targets_min": 669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.425101214574899,
|
||
|
|
"grad_norm": 0.09026804310673464,
|
||
|
|
"learning_rate": 1.096734593292649e-05,
|
||
|
|
"loss": 0.0028,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010314269457012415,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 5280.3,
|
||
|
|
"valid_targets_min": 1506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4402834008097165,
|
||
|
|
"grad_norm": 0.06885287233281213,
|
||
|
|
"learning_rate": 1.077910451226138e-05,
|
||
|
|
"loss": 0.0028,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003990654367953539,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 4768.4,
|
||
|
|
"valid_targets_min": 1602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4554655870445345,
|
||
|
|
"grad_norm": 0.07923523998517162,
|
||
|
|
"learning_rate": 1.0591894797455526e-05,
|
||
|
|
"loss": 0.003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0008414952899329364,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 5667.3,
|
||
|
|
"valid_targets_min": 557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.470647773279352,
|
||
|
|
"grad_norm": 0.07839620139640652,
|
||
|
|
"learning_rate": 1.0405737734995083e-05,
|
||
|
|
"loss": 0.0059,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0009308420703746378,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 5440.8,
|
||
|
|
"valid_targets_min": 983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.48582995951417,
|
||
|
|
"grad_norm": 0.08143106991037652,
|
||
|
|
"learning_rate": 1.0220654153587225e-05,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000549286836758256,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 5549.2,
|
||
|
|
"valid_targets_min": 916
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.501012145748988,
|
||
|
|
"grad_norm": 0.09148397869628853,
|
||
|
|
"learning_rate": 1.00366647618297e-05,
|
||
|
|
"loss": 0.0031,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000851424178108573,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 5381.9,
|
||
|
|
"valid_targets_min": 1428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.516194331983806,
|
||
|
|
"grad_norm": 0.08536407748387596,
|
||
|
|
"learning_rate": 9.853790145893742e-06,
|
||
|
|
"loss": 0.003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007849158719182014,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 5692.0,
|
||
|
|
"valid_targets_min": 1534
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5313765182186234,
|
||
|
|
"grad_norm": 0.07500738769923868,
|
||
|
|
"learning_rate": 9.672050767220765e-06,
|
||
|
|
"loss": 0.0038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0018731937743723392,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 4630.3,
|
||
|
|
"valid_targets_min": 1650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5465587044534415,
|
||
|
|
"grad_norm": 0.08635271921250572,
|
||
|
|
"learning_rate": 9.491466960232955e-06,
|
||
|
|
"loss": 0.0027,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0018844190053641796,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 6699.6,
|
||
|
|
"valid_targets_min": 1281
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.561740890688259,
|
||
|
|
"grad_norm": 0.07132259294794761,
|
||
|
|
"learning_rate": 9.312058930058114e-06,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00045610909000970423,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 5466.4,
|
||
|
|
"valid_targets_min": 597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5769230769230766,
|
||
|
|
"grad_norm": 0.08076636933899621,
|
||
|
|
"learning_rate": 9.133846750268945e-06,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0009316107607446611,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 7522.7,
|
||
|
|
"valid_targets_min": 1517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5921052631578947,
|
||
|
|
"grad_norm": 0.07356467744946356,
|
||
|
|
"learning_rate": 8.956850360637046e-06,
|
||
|
|
"loss": 0.008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007976359920576215,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 7516.5,
|
||
|
|
"valid_targets_min": 2623
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6072874493927127,
|
||
|
|
"grad_norm": 0.07756733565954317,
|
||
|
|
"learning_rate": 8.78108956490194e-06,
|
||
|
|
"loss": 0.003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0013237085659056902,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 6886.8,
|
||
|
|
"valid_targets_min": 2053
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6224696356275303,
|
||
|
|
"grad_norm": 0.07105607166251748,
|
||
|
|
"learning_rate": 8.606584028555225e-06,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004069001879543066,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 4807.0,
|
||
|
|
"valid_targets_min": 1153
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.637651821862348,
|
||
|
|
"grad_norm": 0.07399494788941444,
|
||
|
|
"learning_rate": 8.43335327664027e-06,
|
||
|
|
"loss": 0.003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0014096340164542198,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 6198.2,
|
||
|
|
"valid_targets_min": 2352
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.652834008097166,
|
||
|
|
"grad_norm": 0.08708798278496013,
|
||
|
|
"learning_rate": 8.261416691567601e-06,
|
||
|
|
"loss": 0.0036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00120199890807271,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 6160.4,
|
||
|
|
"valid_targets_min": 1384
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.668016194331984,
|
||
|
|
"grad_norm": 0.06850782245649316,
|
||
|
|
"learning_rate": 8.090793510946242e-06,
|
||
|
|
"loss": 0.007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0011346151586622,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 5498.9,
|
||
|
|
"valid_targets_min": 1544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6831983805668016,
|
||
|
|
"grad_norm": 0.0665721537533129,
|
||
|
|
"learning_rate": 7.921502825431258e-06,
|
||
|
|
"loss": 0.0022,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0009298848453909159,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 5320.8,
|
||
|
|
"valid_targets_min": 1325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.698380566801619,
|
||
|
|
"grad_norm": 0.05817280875168348,
|
||
|
|
"learning_rate": 7.753563576587753e-06,
|
||
|
|
"loss": 0.0019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00031473039416596293,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 7207.3,
|
||
|
|
"valid_targets_min": 1196
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7135627530364372,
|
||
|
|
"grad_norm": 0.07585282212242273,
|
||
|
|
"learning_rate": 7.5869945547715275e-06,
|
||
|
|
"loss": 0.0035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001625682198209688,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 6348.5,
|
||
|
|
"valid_targets_min": 1173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7287449392712553,
|
||
|
|
"grad_norm": 0.05928061653856549,
|
||
|
|
"learning_rate": 7.421814397026674e-06,
|
||
|
|
"loss": 0.0055,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002983218291774392,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 4876.5,
|
||
|
|
"valid_targets_min": 412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.743927125506073,
|
||
|
|
"grad_norm": 0.05167166612364566,
|
||
|
|
"learning_rate": 7.258041585000317e-06,
|
||
|
|
"loss": 0.0021,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002519974368624389,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 6114.1,
|
||
|
|
"valid_targets_min": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7591093117408905,
|
||
|
|
"grad_norm": 0.06755208306247126,
|
||
|
|
"learning_rate": 7.095694442874743e-06,
|
||
|
|
"loss": 0.0023,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0007116460474207997,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 6417.8,
|
||
|
|
"valid_targets_min": 701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7742914979757085,
|
||
|
|
"grad_norm": 0.051297695584593836,
|
||
|
|
"learning_rate": 6.934791135317147e-06,
|
||
|
|
"loss": 0.0018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005321088247001171,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 5659.3,
|
||
|
|
"valid_targets_min": 325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7894736842105265,
|
||
|
|
"grad_norm": 0.05752662490981521,
|
||
|
|
"learning_rate": 6.775349665447222e-06,
|
||
|
|
"loss": 0.0019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005416910862550139,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 4546.3,
|
||
|
|
"valid_targets_min": 393
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.804655870445344,
|
||
|
|
"grad_norm": 0.052946443641855044,
|
||
|
|
"learning_rate": 6.617387872822842e-06,
|
||
|
|
"loss": 0.0015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010024468647316098,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 7255.5,
|
||
|
|
"valid_targets_min": 1810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8198380566801617,
|
||
|
|
"grad_norm": 0.05880925202734049,
|
||
|
|
"learning_rate": 6.460923431444015e-06,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004680416896007955,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 5930.0,
|
||
|
|
"valid_targets_min": 1600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.83502024291498,
|
||
|
|
"grad_norm": 0.06372750335866821,
|
||
|
|
"learning_rate": 6.305973847775406e-06,
|
||
|
|
"loss": 0.0017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0013748719356954098,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 6128.2,
|
||
|
|
"valid_targets_min": 826
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.850202429149798,
|
||
|
|
"grad_norm": 0.05722799203054713,
|
||
|
|
"learning_rate": 6.152556458787546e-06,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002156760892830789,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 5906.8,
|
||
|
|
"valid_targets_min": 1226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8653846153846154,
|
||
|
|
"grad_norm": 0.07081813428284034,
|
||
|
|
"learning_rate": 6.000688430017048e-06,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004200451367069036,
|
||
|
|
"step": 1275,
|
||
|
|
"valid_targets_mean": 6887.4,
|
||
|
|
"valid_targets_min": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.880566801619433,
|
||
|
|
"grad_norm": 0.06866565362661654,
|
||
|
|
"learning_rate": 5.850386753645998e-06,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00035961525281891227,
|
||
|
|
"step": 1280,
|
||
|
|
"valid_targets_mean": 5884.6,
|
||
|
|
"valid_targets_min": 1231
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.895748987854251,
|
||
|
|
"grad_norm": 0.07473837087777652,
|
||
|
|
"learning_rate": 5.701668246600731e-06,
|
||
|
|
"loss": 0.0037,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006638249033130705,
|
||
|
|
"step": 1285,
|
||
|
|
"valid_targets_mean": 5361.0,
|
||
|
|
"valid_targets_min": 395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.910931174089069,
|
||
|
|
"grad_norm": 0.06332394817627471,
|
||
|
|
"learning_rate": 5.554549548670227e-06,
|
||
|
|
"loss": 0.0018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00031463263439945877,
|
||
|
|
"step": 1290,
|
||
|
|
"valid_targets_mean": 6373.2,
|
||
|
|
"valid_targets_min": 1155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9261133603238867,
|
||
|
|
"grad_norm": 0.04379006866304081,
|
||
|
|
"learning_rate": 5.409047120644307e-06,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002834107435774058,
|
||
|
|
"step": 1295,
|
||
|
|
"valid_targets_mean": 8020.1,
|
||
|
|
"valid_targets_min": 1064
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9412955465587043,
|
||
|
|
"grad_norm": 0.0587907679513708,
|
||
|
|
"learning_rate": 5.265177242471899e-06,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000306104077026248,
|
||
|
|
"step": 1300,
|
||
|
|
"valid_targets_mean": 6390.3,
|
||
|
|
"valid_targets_min": 1186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9564777327935223,
|
||
|
|
"grad_norm": 0.051201748061711794,
|
||
|
|
"learning_rate": 5.122956011439486e-06,
|
||
|
|
"loss": 0.0015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006984270294196904,
|
||
|
|
"step": 1305,
|
||
|
|
"valid_targets_mean": 7854.0,
|
||
|
|
"valid_targets_min": 1626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.97165991902834,
|
||
|
|
"grad_norm": 0.04389042358717779,
|
||
|
|
"learning_rate": 4.982399340370017e-06,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000228608405450359,
|
||
|
|
"step": 1310,
|
||
|
|
"valid_targets_mean": 5870.0,
|
||
|
|
"valid_targets_min": 1117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.986842105263158,
|
||
|
|
"grad_norm": 0.04144493373117889,
|
||
|
|
"learning_rate": 4.843522955842464e-06,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00036165837082080543,
|
||
|
|
"step": 1315,
|
||
|
|
"valid_targets_mean": 6428.9,
|
||
|
|
"valid_targets_min": 1226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0,
|
||
|
|
"grad_norm": 0.10432443407266767,
|
||
|
|
"learning_rate": 4.706342396432213e-06,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0010806015925481915,
|
||
|
|
"step": 1320,
|
||
|
|
"valid_targets_mean": 4910.3,
|
||
|
|
"valid_targets_min": 796
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.015182186234818,
|
||
|
|
"grad_norm": 0.04575394395571745,
|
||
|
|
"learning_rate": 4.570873010972477e-06,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00019171061285305768,
|
||
|
|
"step": 1325,
|
||
|
|
"valid_targets_mean": 4514.5,
|
||
|
|
"valid_targets_min": 942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.030364372469635,
|
||
|
|
"grad_norm": 0.06136036341219001,
|
||
|
|
"learning_rate": 4.43712995683695e-06,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00025325745809823275,
|
||
|
|
"step": 1330,
|
||
|
|
"valid_targets_mean": 5904.1,
|
||
|
|
"valid_targets_min": 1319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.045546558704453,
|
||
|
|
"grad_norm": 0.04678980368302935,
|
||
|
|
"learning_rate": 4.305128198243888e-06,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00023867773415986449,
|
||
|
|
"step": 1335,
|
||
|
|
"valid_targets_mean": 5641.8,
|
||
|
|
"valid_targets_min": 1051
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.060728744939271,
|
||
|
|
"grad_norm": 0.044273575964736044,
|
||
|
|
"learning_rate": 4.174882504581794e-06,
|
||
|
|
"loss": 0.0019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00012092696852050722,
|
||
|
|
"step": 1340,
|
||
|
|
"valid_targets_mean": 5501.6,
|
||
|
|
"valid_targets_min": 983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.075910931174089,
|
||
|
|
"grad_norm": 0.06616327486605052,
|
||
|
|
"learning_rate": 4.046407448756895e-06,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005744840018451214,
|
||
|
|
"step": 1345,
|
||
|
|
"valid_targets_mean": 6480.2,
|
||
|
|
"valid_targets_min": 1409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0910931174089065,
|
||
|
|
"grad_norm": 0.03504675811498584,
|
||
|
|
"learning_rate": 3.91971740556262e-06,
|
||
|
|
"loss": 0.0045,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00013153860345482826,
|
||
|
|
"step": 1350,
|
||
|
|
"valid_targets_mean": 6125.2,
|
||
|
|
"valid_targets_min": 1260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.1062753036437245,
|
||
|
|
"grad_norm": 0.05094012282416404,
|
||
|
|
"learning_rate": 3.7948265500712313e-06,
|
||
|
|
"loss": 0.0042,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002957929973490536,
|
||
|
|
"step": 1355,
|
||
|
|
"valid_targets_mean": 4896.0,
|
||
|
|
"valid_targets_min": 561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.1214574898785425,
|
||
|
|
"grad_norm": 0.03832020075332006,
|
||
|
|
"learning_rate": 3.6717488560478096e-06,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00017256051069125533,
|
||
|
|
"step": 1360,
|
||
|
|
"valid_targets_mean": 5400.4,
|
||
|
|
"valid_targets_min": 697
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.136639676113361,
|
||
|
|
"grad_norm": 0.06499952485602367,
|
||
|
|
"learning_rate": 3.5504980943867538e-06,
|
||
|
|
"loss": 0.0053,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00017852283781394362,
|
||
|
|
"step": 1365,
|
||
|
|
"valid_targets_mean": 6389.8,
|
||
|
|
"valid_targets_min": 697
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.151821862348178,
|
||
|
|
"grad_norm": 0.06164804945094931,
|
||
|
|
"learning_rate": 3.4310878315710074e-06,
|
||
|
|
"loss": 0.0025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0013586217537522316,
|
||
|
|
"step": 1370,
|
||
|
|
"valid_targets_mean": 7214.0,
|
||
|
|
"valid_targets_min": 906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.167004048582996,
|
||
|
|
"grad_norm": 0.04024799044304015,
|
||
|
|
"learning_rate": 3.3135314281540954e-06,
|
||
|
|
"loss": 0.0018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003238618082832545,
|
||
|
|
"step": 1375,
|
||
|
|
"valid_targets_mean": 4662.5,
|
||
|
|
"valid_targets_min": 653
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.182186234817814,
|
||
|
|
"grad_norm": 0.04299991800079604,
|
||
|
|
"learning_rate": 3.1978420372652776e-06,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00016643814160488546,
|
||
|
|
"step": 1380,
|
||
|
|
"valid_targets_mean": 5508.5,
|
||
|
|
"valid_targets_min": 1759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.197368421052632,
|
||
|
|
"grad_norm": 0.03981366931525965,
|
||
|
|
"learning_rate": 3.084032603137852e-06,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00024849892361089587,
|
||
|
|
"step": 1385,
|
||
|
|
"valid_targets_mean": 5699.5,
|
||
|
|
"valid_targets_min": 909
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.212550607287449,
|
||
|
|
"grad_norm": 0.06374131446855894,
|
||
|
|
"learning_rate": 2.9721158596608622e-06,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0016528195701539516,
|
||
|
|
"step": 1390,
|
||
|
|
"valid_targets_mean": 5386.7,
|
||
|
|
"valid_targets_min": 1449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.227732793522267,
|
||
|
|
"grad_norm": 0.0488344552225502,
|
||
|
|
"learning_rate": 2.8621043289543314e-06,
|
||
|
|
"loss": 0.0015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003213430754840374,
|
||
|
|
"step": 1395,
|
||
|
|
"valid_targets_mean": 5162.6,
|
||
|
|
"valid_targets_min": 884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.242914979757085,
|
||
|
|
"grad_norm": 0.040266253339609565,
|
||
|
|
"learning_rate": 2.754010319968181e-06,
|
||
|
|
"loss": 0.0044,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00016541770310141146,
|
||
|
|
"step": 1400,
|
||
|
|
"valid_targets_mean": 7292.2,
|
||
|
|
"valid_targets_min": 1282
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.258097165991903,
|
||
|
|
"grad_norm": 0.05618171654900691,
|
||
|
|
"learning_rate": 2.647845927105015e-06,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000637515913695097,
|
||
|
|
"step": 1405,
|
||
|
|
"valid_targets_mean": 5535.6,
|
||
|
|
"valid_targets_min": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.27327935222672,
|
||
|
|
"grad_norm": 0.044241199347689504,
|
||
|
|
"learning_rate": 2.543623028866915e-06,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001080352594726719,
|
||
|
|
"step": 1410,
|
||
|
|
"valid_targets_mean": 5776.3,
|
||
|
|
"valid_targets_min": 1448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.288461538461538,
|
||
|
|
"grad_norm": 0.04240919318151957,
|
||
|
|
"learning_rate": 2.4413532865263533e-06,
|
||
|
|
"loss": 0.0048,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002839623484760523,
|
||
|
|
"step": 1415,
|
||
|
|
"valid_targets_mean": 5609.8,
|
||
|
|
"valid_targets_min": 978
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.303643724696356,
|
||
|
|
"grad_norm": 0.04717996482372255,
|
||
|
|
"learning_rate": 2.3410481428214602e-06,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0003408491029404104,
|
||
|
|
"step": 1420,
|
||
|
|
"valid_targets_mean": 4040.8,
|
||
|
|
"valid_targets_min": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.318825910931174,
|
||
|
|
"grad_norm": 0.06239339252341931,
|
||
|
|
"learning_rate": 2.242718820675718e-06,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001997358922380954,
|
||
|
|
"step": 1425,
|
||
|
|
"valid_targets_mean": 6168.2,
|
||
|
|
"valid_targets_min": 1162
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.334008097165992,
|
||
|
|
"grad_norm": 0.0625616384807205,
|
||
|
|
"learning_rate": 2.1463763219422495e-06,
|
||
|
|
"loss": 0.004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0009303807746618986,
|
||
|
|
"step": 1430,
|
||
|
|
"valid_targets_mean": 5197.7,
|
||
|
|
"valid_targets_min": 1391
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.34919028340081,
|
||
|
|
"grad_norm": 0.049057182007342326,
|
||
|
|
"learning_rate": 2.0520314261728357e-06,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00032955483766272664,
|
||
|
|
"step": 1435,
|
||
|
|
"valid_targets_mean": 5227.9,
|
||
|
|
"valid_targets_min": 1292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.364372469635628,
|
||
|
|
"grad_norm": 0.02755211450191434,
|
||
|
|
"learning_rate": 1.9596946894118306e-06,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00012536742724478245,
|
||
|
|
"step": 1440,
|
||
|
|
"valid_targets_mean": 6774.8,
|
||
|
|
"valid_targets_min": 399
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.379554655870446,
|
||
|
|
"grad_norm": 0.059198175261942494,
|
||
|
|
"learning_rate": 1.8693764430150696e-06,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006651548319496214,
|
||
|
|
"step": 1445,
|
||
|
|
"valid_targets_mean": 5754.2,
|
||
|
|
"valid_targets_min": 1175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.394736842105263,
|
||
|
|
"grad_norm": 0.06672168346536564,
|
||
|
|
"learning_rate": 1.7810867924938978e-06,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005800529033876956,
|
||
|
|
"step": 1450,
|
||
|
|
"valid_targets_mean": 6373.2,
|
||
|
|
"valid_targets_min": 1172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.409919028340081,
|
||
|
|
"grad_norm": 0.04103210163550645,
|
||
|
|
"learning_rate": 1.6948356163845048e-06,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002091139613185078,
|
||
|
|
"step": 1455,
|
||
|
|
"valid_targets_mean": 4446.1,
|
||
|
|
"valid_targets_min": 1155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.425101214574899,
|
||
|
|
"grad_norm": 0.0531210573922736,
|
||
|
|
"learning_rate": 1.610632565142627e-06,
|
||
|
|
"loss": 0.0015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00031264807330444455,
|
||
|
|
"step": 1460,
|
||
|
|
"valid_targets_mean": 5350.0,
|
||
|
|
"valid_targets_min": 1583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.440283400809717,
|
||
|
|
"grad_norm": 0.06214094975132685,
|
||
|
|
"learning_rate": 1.5284870600637813e-06,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005728084361180663,
|
||
|
|
"step": 1465,
|
||
|
|
"valid_targets_mean": 6931.2,
|
||
|
|
"valid_targets_min": 882
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.455465587044534,
|
||
|
|
"grad_norm": 0.025470297264450825,
|
||
|
|
"learning_rate": 1.4484082922291376e-06,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001540776138426736,
|
||
|
|
"step": 1470,
|
||
|
|
"valid_targets_mean": 6458.8,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.470647773279352,
|
||
|
|
"grad_norm": 0.0371089157160395,
|
||
|
|
"learning_rate": 1.3704052214771513e-06,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005277476739138365,
|
||
|
|
"step": 1475,
|
||
|
|
"valid_targets_mean": 5589.3,
|
||
|
|
"valid_targets_min": 1255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.48582995951417,
|
||
|
|
"grad_norm": 0.03597473308278048,
|
||
|
|
"learning_rate": 1.2944865754010682e-06,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00027125386986881495,
|
||
|
|
"step": 1480,
|
||
|
|
"valid_targets_mean": 5186.9,
|
||
|
|
"valid_targets_min": 1046
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.501012145748988,
|
||
|
|
"grad_norm": 0.05603090651957245,
|
||
|
|
"learning_rate": 1.2206608483724013e-06,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0006555815925821662,
|
||
|
|
"step": 1485,
|
||
|
|
"valid_targets_mean": 4437.2,
|
||
|
|
"valid_targets_min": 1281
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.516194331983805,
|
||
|
|
"grad_norm": 0.040660615586738724,
|
||
|
|
"learning_rate": 1.1489363005905241e-06,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005926231970079243,
|
||
|
|
"step": 1490,
|
||
|
|
"valid_targets_mean": 5373.7,
|
||
|
|
"valid_targets_min": 1246
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.531376518218623,
|
||
|
|
"grad_norm": 0.039681408267201096,
|
||
|
|
"learning_rate": 1.0793209571584562e-06,
|
||
|
|
"loss": 0.0016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00044325290946289897,
|
||
|
|
"step": 1495,
|
||
|
|
"valid_targets_mean": 5612.2,
|
||
|
|
"valid_targets_min": 975
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5465587044534415,
|
||
|
|
"grad_norm": 0.03482398576019173,
|
||
|
|
"learning_rate": 1.0118226071849424e-06,
|
||
|
|
"loss": 0.0015,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00012415298260748386,
|
||
|
|
"step": 1500,
|
||
|
|
"valid_targets_mean": 5565.6,
|
||
|
|
"valid_targets_min": 820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5617408906882595,
|
||
|
|
"grad_norm": 0.03861786834216097,
|
||
|
|
"learning_rate": 9.464488029129581e-07,
|
||
|
|
"loss": 0.0038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00042347534326836467,
|
||
|
|
"step": 1505,
|
||
|
|
"valid_targets_mean": 6770.4,
|
||
|
|
"valid_targets_min": 2383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.576923076923077,
|
||
|
|
"grad_norm": 0.03782526929710735,
|
||
|
|
"learning_rate": 8.832068588746945e-07,
|
||
|
|
"loss": 0.0053,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00042459441465325654,
|
||
|
|
"step": 1510,
|
||
|
|
"valid_targets_mean": 6201.0,
|
||
|
|
"valid_targets_min": 748
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.592105263157895,
|
||
|
|
"grad_norm": 0.06581264347607255,
|
||
|
|
"learning_rate": 8.221038510731704e-07,
|
||
|
|
"loss": 0.0026,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.009063578210771084,
|
||
|
|
"step": 1515,
|
||
|
|
"valid_targets_mean": 6460.5,
|
||
|
|
"valid_targets_min": 1058
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.607287449392713,
|
||
|
|
"grad_norm": 0.05187430297042141,
|
||
|
|
"learning_rate": 7.631466161904821e-07,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.647851114394143e-05,
|
||
|
|
"step": 1520,
|
||
|
|
"valid_targets_mean": 5950.4,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.62246963562753,
|
||
|
|
"grad_norm": 0.03418084442890125,
|
||
|
|
"learning_rate": 7.063417508228876e-07,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001711228978820145,
|
||
|
|
"step": 1525,
|
||
|
|
"valid_targets_mean": 5680.2,
|
||
|
|
"valid_targets_min": 976
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.637651821862348,
|
||
|
|
"grad_norm": 0.045181614299724095,
|
||
|
|
"learning_rate": 6.516956107427241e-07,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002168192877434194,
|
||
|
|
"step": 1530,
|
||
|
|
"valid_targets_mean": 3741.5,
|
||
|
|
"valid_targets_min": 563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.652834008097166,
|
||
|
|
"grad_norm": 0.028822520990575768,
|
||
|
|
"learning_rate": 5.992143101872638e-07,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 6.904886686243117e-05,
|
||
|
|
"step": 1535,
|
||
|
|
"valid_targets_mean": 5398.7,
|
||
|
|
"valid_targets_min": 978
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.668016194331984,
|
||
|
|
"grad_norm": 0.04528903775134061,
|
||
|
|
"learning_rate": 5.489037211746184e-07,
|
||
|
|
"loss": 0.002,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002343966334592551,
|
||
|
|
"step": 1540,
|
||
|
|
"valid_targets_mean": 6025.8,
|
||
|
|
"valid_targets_min": 1408
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.683198380566802,
|
||
|
|
"grad_norm": 0.017318450062876578,
|
||
|
|
"learning_rate": 5.007694728467228e-07,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.243115346180275e-05,
|
||
|
|
"step": 1545,
|
||
|
|
"valid_targets_mean": 5785.7,
|
||
|
|
"valid_targets_min": 1152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.698380566801619,
|
||
|
|
"grad_norm": 0.045176485557852156,
|
||
|
|
"learning_rate": 4.548169508395028e-07,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00011724950309144333,
|
||
|
|
"step": 1550,
|
||
|
|
"valid_targets_mean": 5474.6,
|
||
|
|
"valid_targets_min": 2293
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.713562753036437,
|
||
|
|
"grad_norm": 0.04717179450293906,
|
||
|
|
"learning_rate": 4.1105129668029595e-07,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00032075931085273623,
|
||
|
|
"step": 1555,
|
||
|
|
"valid_targets_mean": 5440.1,
|
||
|
|
"valid_targets_min": 1977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.728744939271255,
|
||
|
|
"grad_norm": 0.02883542239202345,
|
||
|
|
"learning_rate": 3.6947740721257066e-07,
|
||
|
|
"loss": 0.0036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002757580659817904,
|
||
|
|
"step": 1560,
|
||
|
|
"valid_targets_mean": 5387.8,
|
||
|
|
"valid_targets_min": 1569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.743927125506072,
|
||
|
|
"grad_norm": 0.04649446397617834,
|
||
|
|
"learning_rate": 3.3009993404802486e-07,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00040277530206367373,
|
||
|
|
"step": 1565,
|
||
|
|
"valid_targets_mean": 5777.2,
|
||
|
|
"valid_targets_min": 1465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7591093117408905,
|
||
|
|
"grad_norm": 0.05003669025654074,
|
||
|
|
"learning_rate": 2.929232830461404e-07,
|
||
|
|
"loss": 0.0025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00040337140671908855,
|
||
|
|
"step": 1570,
|
||
|
|
"valid_targets_mean": 5920.8,
|
||
|
|
"valid_targets_min": 1195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7742914979757085,
|
||
|
|
"grad_norm": 0.026085597840677918,
|
||
|
|
"learning_rate": 2.579516138212101e-07,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 9.011440852191299e-05,
|
||
|
|
"step": 1575,
|
||
|
|
"valid_targets_mean": 4939.5,
|
||
|
|
"valid_targets_min": 697
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7894736842105265,
|
||
|
|
"grad_norm": 0.03964386050033589,
|
||
|
|
"learning_rate": 2.2518883927692857e-07,
|
||
|
|
"loss": 0.0011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005810962175019085,
|
||
|
|
"step": 1580,
|
||
|
|
"valid_targets_mean": 6490.4,
|
||
|
|
"valid_targets_min": 1347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.804655870445345,
|
||
|
|
"grad_norm": 0.034600096719414955,
|
||
|
|
"learning_rate": 1.9463862516859277e-07,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0001201618870254606,
|
||
|
|
"step": 1585,
|
||
|
|
"valid_targets_mean": 7714.5,
|
||
|
|
"valid_targets_min": 2128
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.819838056680162,
|
||
|
|
"grad_norm": 0.04077060684331417,
|
||
|
|
"learning_rate": 1.6630438969294615e-07,
|
||
|
|
"loss": 0.0017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000434387126006186,
|
||
|
|
"step": 1590,
|
||
|
|
"valid_targets_mean": 5693.0,
|
||
|
|
"valid_targets_min": 773
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.83502024291498,
|
||
|
|
"grad_norm": 0.03559592574542344,
|
||
|
|
"learning_rate": 1.4018930310571553e-07,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0002756250905804336,
|
||
|
|
"step": 1595,
|
||
|
|
"valid_targets_mean": 5118.2,
|
||
|
|
"valid_targets_min": 1126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.850202429149798,
|
||
|
|
"grad_norm": 0.04102607474420591,
|
||
|
|
"learning_rate": 1.1629628736690824e-07,
|
||
|
|
"loss": 0.0013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.002798682078719139,
|
||
|
|
"step": 1600,
|
||
|
|
"valid_targets_mean": 5115.5,
|
||
|
|
"valid_targets_min": 669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.865384615384615,
|
||
|
|
"grad_norm": 0.03665735826038814,
|
||
|
|
"learning_rate": 9.46280158138757e-08,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0012289484729990363,
|
||
|
|
"step": 1605,
|
||
|
|
"valid_targets_mean": 5086.9,
|
||
|
|
"valid_targets_min": 1019
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.880566801619433,
|
||
|
|
"grad_norm": 0.035401557000787016,
|
||
|
|
"learning_rate": 7.518691286220625e-08,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005569718778133392,
|
||
|
|
"step": 1610,
|
||
|
|
"valid_targets_mean": 6123.9,
|
||
|
|
"valid_targets_min": 1519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.895748987854251,
|
||
|
|
"grad_norm": 0.022697139243698643,
|
||
|
|
"learning_rate": 5.797515373445084e-08,
|
||
|
|
"loss": 0.0008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00019126076949760318,
|
||
|
|
"step": 1615,
|
||
|
|
"valid_targets_mean": 5520.2,
|
||
|
|
"valid_targets_min": 1126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.910931174089069,
|
||
|
|
"grad_norm": 0.05200869813147198,
|
||
|
|
"learning_rate": 4.299466421675113e-08,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00018992288096342236,
|
||
|
|
"step": 1620,
|
||
|
|
"valid_targets_mean": 6700.1,
|
||
|
|
"valid_targets_min": 1817
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.926113360323887,
|
||
|
|
"grad_norm": 0.03273174844805149,
|
||
|
|
"learning_rate": 3.0247120443362976e-08,
|
||
|
|
"loss": 0.0044,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0004165939171798527,
|
||
|
|
"step": 1625,
|
||
|
|
"valid_targets_mean": 5461.3,
|
||
|
|
"valid_targets_min": 1151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.941295546558704,
|
||
|
|
"grad_norm": 0.043664082408331556,
|
||
|
|
"learning_rate": 1.973394870912193e-08,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.00014396056940313429,
|
||
|
|
"step": 1630,
|
||
|
|
"valid_targets_mean": 5573.7,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.956477732793522,
|
||
|
|
"grad_norm": 0.04647220242878979,
|
||
|
|
"learning_rate": 1.145632530985541e-08,
|
||
|
|
"loss": 0.0021,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.001477197976782918,
|
||
|
|
"step": 1635,
|
||
|
|
"valid_targets_mean": 6098.6,
|
||
|
|
"valid_targets_min": 1186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.97165991902834,
|
||
|
|
"grad_norm": 0.03741551615684759,
|
||
|
|
"learning_rate": 5.415176410765721e-09,
|
||
|
|
"loss": 0.0019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.000190427279449068,
|
||
|
|
"step": 1640,
|
||
|
|
"valid_targets_mean": 7667.1,
|
||
|
|
"valid_targets_min": 1201
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.9868421052631575,
|
||
|
|
"grad_norm": 0.034908554086182576,
|
||
|
|
"learning_rate": 1.611177942812958e-09,
|
||
|
|
"loss": 0.0006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0005074947839602828,
|
||
|
|
"step": 1645,
|
||
|
|
"valid_targets_mean": 4861.1,
|
||
|
|
"valid_targets_min": 1478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.06119276217350039,
|
||
|
|
"learning_rate": 4.475552707772224e-11,
|
||
|
|
"loss": 0.001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0014345531817525625,
|
||
|
|
"step": 1650,
|
||
|
|
"valid_targets_mean": 5408.7,
|
||
|
|
"valid_targets_min": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"step": 1650,
|
||
|
|
"total_flos": 4.6024941377264026e+18,
|
||
|
|
"train_loss": 0.0,
|
||
|
|
"train_runtime": 1.9709,
|
||
|
|
"train_samples_per_second": 80167.399,
|
||
|
|
"train_steps_per_second": 837.191
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 1650,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 5,
|
||
|
|
"save_steps": 300,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 4.6024941377264026e+18,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|