2629 lines
73 KiB
JSON
2629 lines
73 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1176,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.02982107355864811,
|
|
"grad_norm": 13.09333147683712,
|
|
"learning_rate": 1.3559322033898307e-06,
|
|
"loss": 0.9066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3346264064311981,
|
|
"step": 5,
|
|
"valid_targets_mean": 9695.2,
|
|
"valid_targets_min": 3418
|
|
},
|
|
{
|
|
"epoch": 0.05964214711729622,
|
|
"grad_norm": 9.149937267728706,
|
|
"learning_rate": 3.0508474576271192e-06,
|
|
"loss": 0.8761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28128373622894287,
|
|
"step": 10,
|
|
"valid_targets_mean": 9758.7,
|
|
"valid_targets_min": 2911
|
|
},
|
|
{
|
|
"epoch": 0.08946322067594434,
|
|
"grad_norm": 4.520961825372613,
|
|
"learning_rate": 4.745762711864408e-06,
|
|
"loss": 0.8155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23452389240264893,
|
|
"step": 15,
|
|
"valid_targets_mean": 8859.2,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 0.11928429423459244,
|
|
"grad_norm": 2.114054619234936,
|
|
"learning_rate": 6.440677966101695e-06,
|
|
"loss": 0.7578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24326761066913605,
|
|
"step": 20,
|
|
"valid_targets_mean": 8662.6,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 0.14910536779324055,
|
|
"grad_norm": 1.4393876248426487,
|
|
"learning_rate": 8.135593220338983e-06,
|
|
"loss": 0.7069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.260776162147522,
|
|
"step": 25,
|
|
"valid_targets_mean": 10508.2,
|
|
"valid_targets_min": 3664
|
|
},
|
|
{
|
|
"epoch": 0.17892644135188868,
|
|
"grad_norm": 1.1642740315365356,
|
|
"learning_rate": 9.830508474576272e-06,
|
|
"loss": 0.6928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2416732907295227,
|
|
"step": 30,
|
|
"valid_targets_mean": 9538.0,
|
|
"valid_targets_min": 2536
|
|
},
|
|
{
|
|
"epoch": 0.20874751491053678,
|
|
"grad_norm": 0.7766319665933883,
|
|
"learning_rate": 1.1525423728813561e-05,
|
|
"loss": 0.6532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21398067474365234,
|
|
"step": 35,
|
|
"valid_targets_mean": 9878.9,
|
|
"valid_targets_min": 3211
|
|
},
|
|
{
|
|
"epoch": 0.23856858846918488,
|
|
"grad_norm": 0.5838308603893411,
|
|
"learning_rate": 1.3220338983050848e-05,
|
|
"loss": 0.6283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20412425696849823,
|
|
"step": 40,
|
|
"valid_targets_mean": 9810.7,
|
|
"valid_targets_min": 2899
|
|
},
|
|
{
|
|
"epoch": 0.268389662027833,
|
|
"grad_norm": 0.5037663875608299,
|
|
"learning_rate": 1.4915254237288137e-05,
|
|
"loss": 0.605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1884448379278183,
|
|
"step": 45,
|
|
"valid_targets_mean": 9185.9,
|
|
"valid_targets_min": 2971
|
|
},
|
|
{
|
|
"epoch": 0.2982107355864811,
|
|
"grad_norm": 0.39709002181472497,
|
|
"learning_rate": 1.6610169491525424e-05,
|
|
"loss": 0.5845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17413000762462616,
|
|
"step": 50,
|
|
"valid_targets_mean": 8571.4,
|
|
"valid_targets_min": 2940
|
|
},
|
|
{
|
|
"epoch": 0.32803180914512925,
|
|
"grad_norm": 0.33127605925110176,
|
|
"learning_rate": 1.8305084745762713e-05,
|
|
"loss": 0.5575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16696935892105103,
|
|
"step": 55,
|
|
"valid_targets_mean": 9093.4,
|
|
"valid_targets_min": 2520
|
|
},
|
|
{
|
|
"epoch": 0.35785288270377735,
|
|
"grad_norm": 0.3215099174730303,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.5544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17364796996116638,
|
|
"step": 60,
|
|
"valid_targets_mean": 8888.1,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 0.38767395626242546,
|
|
"grad_norm": 0.2821181675694596,
|
|
"learning_rate": 2.169491525423729e-05,
|
|
"loss": 0.5342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18019835650920868,
|
|
"step": 65,
|
|
"valid_targets_mean": 9505.2,
|
|
"valid_targets_min": 3936
|
|
},
|
|
{
|
|
"epoch": 0.41749502982107356,
|
|
"grad_norm": 0.2414187942326976,
|
|
"learning_rate": 2.338983050847458e-05,
|
|
"loss": 0.5251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18193358182907104,
|
|
"step": 70,
|
|
"valid_targets_mean": 9565.2,
|
|
"valid_targets_min": 3319
|
|
},
|
|
{
|
|
"epoch": 0.44731610337972166,
|
|
"grad_norm": 0.22420801544120914,
|
|
"learning_rate": 2.5084745762711865e-05,
|
|
"loss": 0.512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14556047320365906,
|
|
"step": 75,
|
|
"valid_targets_mean": 9219.1,
|
|
"valid_targets_min": 3220
|
|
},
|
|
{
|
|
"epoch": 0.47713717693836977,
|
|
"grad_norm": 0.2412726568511184,
|
|
"learning_rate": 2.6779661016949153e-05,
|
|
"loss": 0.5028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16368263959884644,
|
|
"step": 80,
|
|
"valid_targets_mean": 8981.2,
|
|
"valid_targets_min": 3194
|
|
},
|
|
{
|
|
"epoch": 0.5069582504970179,
|
|
"grad_norm": 0.23318641532107723,
|
|
"learning_rate": 2.8474576271186442e-05,
|
|
"loss": 0.5007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16197912395000458,
|
|
"step": 85,
|
|
"valid_targets_mean": 8739.4,
|
|
"valid_targets_min": 2389
|
|
},
|
|
{
|
|
"epoch": 0.536779324055666,
|
|
"grad_norm": 0.23052715262359902,
|
|
"learning_rate": 3.016949152542373e-05,
|
|
"loss": 0.4809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1737665832042694,
|
|
"step": 90,
|
|
"valid_targets_mean": 10664.4,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 0.5666003976143141,
|
|
"grad_norm": 0.22528756572218692,
|
|
"learning_rate": 3.186440677966102e-05,
|
|
"loss": 0.4788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1719246357679367,
|
|
"step": 95,
|
|
"valid_targets_mean": 10288.0,
|
|
"valid_targets_min": 2629
|
|
},
|
|
{
|
|
"epoch": 0.5964214711729622,
|
|
"grad_norm": 0.2601607078156621,
|
|
"learning_rate": 3.355932203389831e-05,
|
|
"loss": 0.4687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1524590253829956,
|
|
"step": 100,
|
|
"valid_targets_mean": 9881.0,
|
|
"valid_targets_min": 2893
|
|
},
|
|
{
|
|
"epoch": 0.6262425447316103,
|
|
"grad_norm": 0.22675199525629694,
|
|
"learning_rate": 3.52542372881356e-05,
|
|
"loss": 0.4565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15666724741458893,
|
|
"step": 105,
|
|
"valid_targets_mean": 10118.0,
|
|
"valid_targets_min": 3280
|
|
},
|
|
{
|
|
"epoch": 0.6560636182902585,
|
|
"grad_norm": 0.23545944466896276,
|
|
"learning_rate": 3.6949152542372886e-05,
|
|
"loss": 0.4635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15132704377174377,
|
|
"step": 110,
|
|
"valid_targets_mean": 9088.4,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 0.6858846918489065,
|
|
"grad_norm": 0.2552945140336154,
|
|
"learning_rate": 3.8644067796610175e-05,
|
|
"loss": 0.4624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15736186504364014,
|
|
"step": 115,
|
|
"valid_targets_mean": 9703.8,
|
|
"valid_targets_min": 2496
|
|
},
|
|
{
|
|
"epoch": 0.7157057654075547,
|
|
"grad_norm": 0.2661696526978554,
|
|
"learning_rate": 3.999991182852808e-05,
|
|
"loss": 0.4518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14347058534622192,
|
|
"step": 120,
|
|
"valid_targets_mean": 8487.7,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 0.7455268389662028,
|
|
"grad_norm": 0.24265446044028421,
|
|
"learning_rate": 3.999682590863935e-05,
|
|
"loss": 0.4401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15881270170211792,
|
|
"step": 125,
|
|
"valid_targets_mean": 10650.6,
|
|
"valid_targets_min": 4557
|
|
},
|
|
{
|
|
"epoch": 0.7753479125248509,
|
|
"grad_norm": 0.3477779737132978,
|
|
"learning_rate": 3.9989332192544725e-05,
|
|
"loss": 0.4454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1669636368751526,
|
|
"step": 130,
|
|
"valid_targets_mean": 10482.2,
|
|
"valid_targets_min": 3995
|
|
},
|
|
{
|
|
"epoch": 0.805168986083499,
|
|
"grad_norm": 0.2688791723605399,
|
|
"learning_rate": 3.997743233204502e-05,
|
|
"loss": 0.4463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1437249481678009,
|
|
"step": 135,
|
|
"valid_targets_mean": 9449.9,
|
|
"valid_targets_min": 2652
|
|
},
|
|
{
|
|
"epoch": 0.8349900596421471,
|
|
"grad_norm": 0.27803453281851986,
|
|
"learning_rate": 3.996112895016452e-05,
|
|
"loss": 0.4377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13932648301124573,
|
|
"step": 140,
|
|
"valid_targets_mean": 9696.8,
|
|
"valid_targets_min": 3542
|
|
},
|
|
{
|
|
"epoch": 0.8648111332007953,
|
|
"grad_norm": 0.2616493730811242,
|
|
"learning_rate": 3.994042564057279e-05,
|
|
"loss": 0.4405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14841462671756744,
|
|
"step": 145,
|
|
"valid_targets_mean": 9823.4,
|
|
"valid_targets_min": 3035
|
|
},
|
|
{
|
|
"epoch": 0.8946322067594433,
|
|
"grad_norm": 0.2763712349418631,
|
|
"learning_rate": 3.9915326966792555e-05,
|
|
"loss": 0.4393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14706194400787354,
|
|
"step": 150,
|
|
"valid_targets_mean": 9098.7,
|
|
"valid_targets_min": 2668
|
|
},
|
|
{
|
|
"epoch": 0.9244532803180915,
|
|
"grad_norm": 0.29517511116566714,
|
|
"learning_rate": 3.9885838461193794e-05,
|
|
"loss": 0.4316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1425623744726181,
|
|
"step": 155,
|
|
"valid_targets_mean": 9000.5,
|
|
"valid_targets_min": 3371
|
|
},
|
|
{
|
|
"epoch": 0.9542743538767395,
|
|
"grad_norm": 0.304747766618288,
|
|
"learning_rate": 3.985196662377424e-05,
|
|
"loss": 0.4372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1514747142791748,
|
|
"step": 160,
|
|
"valid_targets_mean": 9558.1,
|
|
"valid_targets_min": 3439
|
|
},
|
|
{
|
|
"epoch": 0.9840954274353877,
|
|
"grad_norm": 0.25971095278000167,
|
|
"learning_rate": 3.981371892072661e-05,
|
|
"loss": 0.4321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14394722878932953,
|
|
"step": 165,
|
|
"valid_targets_mean": 9277.1,
|
|
"valid_targets_min": 3851
|
|
},
|
|
{
|
|
"epoch": 1.0119284294234592,
|
|
"grad_norm": 0.2900070928223118,
|
|
"learning_rate": 3.9771103782792956e-05,
|
|
"loss": 0.428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1467021405696869,
|
|
"step": 170,
|
|
"valid_targets_mean": 9804.5,
|
|
"valid_targets_min": 3955
|
|
},
|
|
{
|
|
"epoch": 1.0417495029821073,
|
|
"grad_norm": 0.2562110209083835,
|
|
"learning_rate": 3.9724130603406204e-05,
|
|
"loss": 0.4214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15313297510147095,
|
|
"step": 175,
|
|
"valid_targets_mean": 10062.1,
|
|
"valid_targets_min": 2682
|
|
},
|
|
{
|
|
"epoch": 1.0715705765407555,
|
|
"grad_norm": 0.26570170838169876,
|
|
"learning_rate": 3.9672809736619684e-05,
|
|
"loss": 0.426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1393103003501892,
|
|
"step": 180,
|
|
"valid_targets_mean": 9271.5,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 1.1013916500994037,
|
|
"grad_norm": 0.2757881608821532,
|
|
"learning_rate": 3.961715249482482e-05,
|
|
"loss": 0.4128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14483334124088287,
|
|
"step": 185,
|
|
"valid_targets_mean": 9806.2,
|
|
"valid_targets_min": 2567
|
|
},
|
|
{
|
|
"epoch": 1.1312127236580518,
|
|
"grad_norm": 0.2593086099940217,
|
|
"learning_rate": 3.95571711462576e-05,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15444408357143402,
|
|
"step": 190,
|
|
"valid_targets_mean": 10145.6,
|
|
"valid_targets_min": 3190
|
|
},
|
|
{
|
|
"epoch": 1.1610337972166997,
|
|
"grad_norm": 0.3074440806567765,
|
|
"learning_rate": 3.9492878912294345e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13667774200439453,
|
|
"step": 195,
|
|
"valid_targets_mean": 8876.5,
|
|
"valid_targets_min": 2465
|
|
},
|
|
{
|
|
"epoch": 1.190854870775348,
|
|
"grad_norm": 0.26202363702381226,
|
|
"learning_rate": 3.942428996453741e-05,
|
|
"loss": 0.4141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1346900910139084,
|
|
"step": 200,
|
|
"valid_targets_mean": 9183.1,
|
|
"valid_targets_min": 3115
|
|
},
|
|
{
|
|
"epoch": 1.220675944333996,
|
|
"grad_norm": 0.23482546143374838,
|
|
"learning_rate": 3.935141942169138e-05,
|
|
"loss": 0.4196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14526072144508362,
|
|
"step": 205,
|
|
"valid_targets_mean": 10055.5,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 1.250497017892644,
|
|
"grad_norm": 0.24783213003782373,
|
|
"learning_rate": 3.927428334623054e-05,
|
|
"loss": 0.4251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13255912065505981,
|
|
"step": 210,
|
|
"valid_targets_mean": 10057.3,
|
|
"valid_targets_min": 4200
|
|
},
|
|
{
|
|
"epoch": 1.2803180914512922,
|
|
"grad_norm": 0.3356314182794496,
|
|
"learning_rate": 3.919289874085837e-05,
|
|
"loss": 0.4219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14556168019771576,
|
|
"step": 215,
|
|
"valid_targets_mean": 9935.3,
|
|
"valid_targets_min": 4601
|
|
},
|
|
{
|
|
"epoch": 1.3101391650099403,
|
|
"grad_norm": 0.3205331125262116,
|
|
"learning_rate": 3.910728354475961e-05,
|
|
"loss": 0.4156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14186373353004456,
|
|
"step": 220,
|
|
"valid_targets_mean": 10047.3,
|
|
"valid_targets_min": 3586
|
|
},
|
|
{
|
|
"epoch": 1.3399602385685885,
|
|
"grad_norm": 0.24942595965959885,
|
|
"learning_rate": 3.9017456629646126e-05,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1405375897884369,
|
|
"step": 225,
|
|
"valid_targets_mean": 10060.8,
|
|
"valid_targets_min": 3242
|
|
},
|
|
{
|
|
"epoch": 1.3697813121272366,
|
|
"grad_norm": 0.2584612306483155,
|
|
"learning_rate": 3.8923437795597056e-05,
|
|
"loss": 0.4182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11859870702028275,
|
|
"step": 230,
|
|
"valid_targets_mean": 8309.6,
|
|
"valid_targets_min": 3444
|
|
},
|
|
{
|
|
"epoch": 1.3996023856858848,
|
|
"grad_norm": 0.25784491517610686,
|
|
"learning_rate": 3.882524776669442e-05,
|
|
"loss": 0.415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13979320228099823,
|
|
"step": 235,
|
|
"valid_targets_mean": 9624.1,
|
|
"valid_targets_min": 2944
|
|
},
|
|
{
|
|
"epoch": 1.4294234592445327,
|
|
"grad_norm": 0.261391607331903,
|
|
"learning_rate": 3.872290818645497e-05,
|
|
"loss": 0.4121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1375332772731781,
|
|
"step": 240,
|
|
"valid_targets_mean": 9274.7,
|
|
"valid_targets_min": 2578
|
|
},
|
|
{
|
|
"epoch": 1.459244532803181,
|
|
"grad_norm": 0.26508343196177364,
|
|
"learning_rate": 3.861644161305948e-05,
|
|
"loss": 0.4094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1376476287841797,
|
|
"step": 245,
|
|
"valid_targets_mean": 9600.3,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 1.489065606361829,
|
|
"grad_norm": 0.25905924292751825,
|
|
"learning_rate": 3.850587151438031e-05,
|
|
"loss": 0.4075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12836024165153503,
|
|
"step": 250,
|
|
"valid_targets_mean": 8532.4,
|
|
"valid_targets_min": 2314
|
|
},
|
|
{
|
|
"epoch": 1.518886679920477,
|
|
"grad_norm": 0.24769087290084157,
|
|
"learning_rate": 3.839122226280854e-05,
|
|
"loss": 0.4106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1362186223268509,
|
|
"step": 255,
|
|
"valid_targets_mean": 9983.8,
|
|
"valid_targets_min": 3384
|
|
},
|
|
{
|
|
"epoch": 1.5487077534791251,
|
|
"grad_norm": 0.24871240968489136,
|
|
"learning_rate": 3.8272519129881696e-05,
|
|
"loss": 0.4046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329842209815979,
|
|
"step": 260,
|
|
"valid_targets_mean": 9754.4,
|
|
"valid_targets_min": 3902
|
|
},
|
|
{
|
|
"epoch": 1.5785288270377733,
|
|
"grad_norm": 0.2574497272355019,
|
|
"learning_rate": 3.814978828071325e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14275474846363068,
|
|
"step": 265,
|
|
"valid_targets_mean": 9707.1,
|
|
"valid_targets_min": 2847
|
|
},
|
|
{
|
|
"epoch": 1.6083499005964215,
|
|
"grad_norm": 0.26745189858092433,
|
|
"learning_rate": 3.802305676822517e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13096818327903748,
|
|
"step": 270,
|
|
"valid_targets_mean": 9074.9,
|
|
"valid_targets_min": 2634
|
|
},
|
|
{
|
|
"epoch": 1.6381709741550696,
|
|
"grad_norm": 0.2801974592140141,
|
|
"learning_rate": 3.789235252718484e-05,
|
|
"loss": 0.4099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14643187820911407,
|
|
"step": 275,
|
|
"valid_targets_mean": 9043.5,
|
|
"valid_targets_min": 3736
|
|
},
|
|
{
|
|
"epoch": 1.6679920477137178,
|
|
"grad_norm": 0.24615159428763864,
|
|
"learning_rate": 3.775770436804751e-05,
|
|
"loss": 0.4088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417016237974167,
|
|
"step": 280,
|
|
"valid_targets_mean": 10256.2,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 1.697813121272366,
|
|
"grad_norm": 0.255233286604699,
|
|
"learning_rate": 3.761914197060573e-05,
|
|
"loss": 0.4005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12331944704055786,
|
|
"step": 285,
|
|
"valid_targets_mean": 9173.6,
|
|
"valid_targets_min": 3254
|
|
},
|
|
{
|
|
"epoch": 1.7276341948310139,
|
|
"grad_norm": 0.2779216978110516,
|
|
"learning_rate": 3.747669587744723e-05,
|
|
"loss": 0.4007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13669756054878235,
|
|
"step": 290,
|
|
"valid_targets_mean": 9882.7,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 1.757455268389662,
|
|
"grad_norm": 0.26422478087504886,
|
|
"learning_rate": 3.733039748722258e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14321698248386383,
|
|
"step": 295,
|
|
"valid_targets_mean": 9899.4,
|
|
"valid_targets_min": 2409
|
|
},
|
|
{
|
|
"epoch": 1.78727634194831,
|
|
"grad_norm": 0.3959250627738573,
|
|
"learning_rate": 3.718027904772412e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15091611444950104,
|
|
"step": 300,
|
|
"valid_targets_mean": 9824.9,
|
|
"valid_targets_min": 1650
|
|
},
|
|
{
|
|
"epoch": 1.8170974155069581,
|
|
"grad_norm": 0.28379938678387157,
|
|
"learning_rate": 3.702637364877776e-05,
|
|
"loss": 0.4095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386471688747406,
|
|
"step": 305,
|
|
"valid_targets_mean": 9847.7,
|
|
"valid_targets_min": 3711
|
|
},
|
|
{
|
|
"epoch": 1.8469184890656063,
|
|
"grad_norm": 0.2889207311661245,
|
|
"learning_rate": 3.686871521494915e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14630842208862305,
|
|
"step": 310,
|
|
"valid_targets_mean": 9958.7,
|
|
"valid_targets_min": 2990
|
|
},
|
|
{
|
|
"epoch": 1.8767395626242545,
|
|
"grad_norm": 0.2561333348175739,
|
|
"learning_rate": 3.67073384980659e-05,
|
|
"loss": 0.395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12660804390907288,
|
|
"step": 315,
|
|
"valid_targets_mean": 9167.2,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 1.9065606361829026,
|
|
"grad_norm": 0.2914965872986488,
|
|
"learning_rate": 3.654227906955737e-05,
|
|
"loss": 0.3981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14069530367851257,
|
|
"step": 320,
|
|
"valid_targets_mean": 9996.4,
|
|
"valid_targets_min": 2710
|
|
},
|
|
{
|
|
"epoch": 1.9363817097415508,
|
|
"grad_norm": 0.2543075725463023,
|
|
"learning_rate": 3.6373573312613874e-05,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1340501606464386,
|
|
"step": 325,
|
|
"valid_targets_mean": 9937.8,
|
|
"valid_targets_min": 3155
|
|
},
|
|
{
|
|
"epoch": 1.966202783300199,
|
|
"grad_norm": 0.2646619167637183,
|
|
"learning_rate": 3.620125841416692e-05,
|
|
"loss": 0.3987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13946759700775146,
|
|
"step": 330,
|
|
"valid_targets_mean": 9728.8,
|
|
"valid_targets_min": 2807
|
|
},
|
|
{
|
|
"epoch": 1.9960238568588469,
|
|
"grad_norm": 0.294581414903317,
|
|
"learning_rate": 3.602537235669228e-05,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12473136186599731,
|
|
"step": 335,
|
|
"valid_targets_mean": 9149.7,
|
|
"valid_targets_min": 3235
|
|
},
|
|
{
|
|
"epoch": 2.0238568588469183,
|
|
"grad_norm": 0.27850055295898757,
|
|
"learning_rate": 3.5845953909837716e-05,
|
|
"loss": 0.393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12054517865180969,
|
|
"step": 340,
|
|
"valid_targets_mean": 8854.7,
|
|
"valid_targets_min": 1949
|
|
},
|
|
{
|
|
"epoch": 2.0536779324055665,
|
|
"grad_norm": 0.26244144180768225,
|
|
"learning_rate": 3.566304262187718e-05,
|
|
"loss": 0.3938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12201672792434692,
|
|
"step": 345,
|
|
"valid_targets_mean": 8900.0,
|
|
"valid_targets_min": 2330
|
|
},
|
|
{
|
|
"epoch": 2.0834990059642147,
|
|
"grad_norm": 0.2675354283095,
|
|
"learning_rate": 3.547667881099341e-05,
|
|
"loss": 0.3931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1361660212278366,
|
|
"step": 350,
|
|
"valid_targets_mean": 10345.9,
|
|
"valid_targets_min": 5477
|
|
},
|
|
{
|
|
"epoch": 2.113320079522863,
|
|
"grad_norm": 0.27333374666170746,
|
|
"learning_rate": 3.528690355639079e-05,
|
|
"loss": 0.4028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1383921355009079,
|
|
"step": 355,
|
|
"valid_targets_mean": 10431.5,
|
|
"valid_targets_min": 2495
|
|
},
|
|
{
|
|
"epoch": 2.143141153081511,
|
|
"grad_norm": 0.2515801080189176,
|
|
"learning_rate": 3.509375868924048e-05,
|
|
"loss": 0.3857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11818955838680267,
|
|
"step": 360,
|
|
"valid_targets_mean": 8793.7,
|
|
"valid_targets_min": 3129
|
|
},
|
|
{
|
|
"epoch": 2.172962226640159,
|
|
"grad_norm": 0.2625179819877921,
|
|
"learning_rate": 3.489728678345978e-05,
|
|
"loss": 0.3938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13788491487503052,
|
|
"step": 365,
|
|
"valid_targets_mean": 10044.4,
|
|
"valid_targets_min": 4449
|
|
},
|
|
{
|
|
"epoch": 2.2027833001988073,
|
|
"grad_norm": 0.2964649252713314,
|
|
"learning_rate": 3.46975311463278e-05,
|
|
"loss": 0.3843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13454961776733398,
|
|
"step": 370,
|
|
"valid_targets_mean": 9236.4,
|
|
"valid_targets_min": 2752
|
|
},
|
|
{
|
|
"epoch": 2.2326043737574555,
|
|
"grad_norm": 0.2710375300470546,
|
|
"learning_rate": 3.449453580893945e-05,
|
|
"loss": 0.3968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12816333770751953,
|
|
"step": 375,
|
|
"valid_targets_mean": 9797.3,
|
|
"valid_targets_min": 3511
|
|
},
|
|
{
|
|
"epoch": 2.2624254473161036,
|
|
"grad_norm": 0.25606918527553785,
|
|
"learning_rate": 3.428834551649989e-05,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13535423576831818,
|
|
"step": 380,
|
|
"valid_targets_mean": 9259.8,
|
|
"valid_targets_min": 1987
|
|
},
|
|
{
|
|
"epoch": 2.2922465208747513,
|
|
"grad_norm": 0.25758666339894315,
|
|
"learning_rate": 3.4079005718461596e-05,
|
|
"loss": 0.3935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293325573205948,
|
|
"step": 385,
|
|
"valid_targets_mean": 8947.5,
|
|
"valid_targets_min": 2799
|
|
},
|
|
{
|
|
"epoch": 2.3220675944333995,
|
|
"grad_norm": 0.24234110541082965,
|
|
"learning_rate": 3.386656255850617e-05,
|
|
"loss": 0.3904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1355832815170288,
|
|
"step": 390,
|
|
"valid_targets_mean": 10094.5,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 2.3518886679920477,
|
|
"grad_norm": 0.25695722708253604,
|
|
"learning_rate": 3.365106286437309e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12625740468502045,
|
|
"step": 395,
|
|
"valid_targets_mean": 9291.4,
|
|
"valid_targets_min": 3611
|
|
},
|
|
{
|
|
"epoch": 2.381709741550696,
|
|
"grad_norm": 0.2563116812129519,
|
|
"learning_rate": 3.3432554137537764e-05,
|
|
"loss": 0.3895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11663530766963959,
|
|
"step": 400,
|
|
"valid_targets_mean": 9260.6,
|
|
"valid_targets_min": 2691
|
|
},
|
|
{
|
|
"epoch": 2.411530815109344,
|
|
"grad_norm": 0.2572513815596599,
|
|
"learning_rate": 3.321108454274103e-05,
|
|
"loss": 0.3907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12803307175636292,
|
|
"step": 405,
|
|
"valid_targets_mean": 9468.8,
|
|
"valid_targets_min": 3154
|
|
},
|
|
{
|
|
"epoch": 2.441351888667992,
|
|
"grad_norm": 0.2607707850998344,
|
|
"learning_rate": 3.29867028973724e-05,
|
|
"loss": 0.3859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312410533428192,
|
|
"step": 410,
|
|
"valid_targets_mean": 8711.6,
|
|
"valid_targets_min": 3918
|
|
},
|
|
{
|
|
"epoch": 2.4711729622266403,
|
|
"grad_norm": 0.23731121331277513,
|
|
"learning_rate": 3.275945866070955e-05,
|
|
"loss": 0.3864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14013691246509552,
|
|
"step": 415,
|
|
"valid_targets_mean": 10685.4,
|
|
"valid_targets_min": 2421
|
|
},
|
|
{
|
|
"epoch": 2.500994035785288,
|
|
"grad_norm": 0.24444398626626032,
|
|
"learning_rate": 3.252940192301624e-05,
|
|
"loss": 0.3864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12714995443820953,
|
|
"step": 420,
|
|
"valid_targets_mean": 10293.2,
|
|
"valid_targets_min": 3706
|
|
},
|
|
{
|
|
"epoch": 2.530815109343936,
|
|
"grad_norm": 0.2405776417574965,
|
|
"learning_rate": 3.229658339450119e-05,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13615553081035614,
|
|
"step": 425,
|
|
"valid_targets_mean": 10000.6,
|
|
"valid_targets_min": 3407
|
|
},
|
|
{
|
|
"epoch": 2.5606361829025843,
|
|
"grad_norm": 0.24845003130000037,
|
|
"learning_rate": 3.2061054394140285e-05,
|
|
"loss": 0.3918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352383941411972,
|
|
"step": 430,
|
|
"valid_targets_mean": 10112.7,
|
|
"valid_targets_min": 3079
|
|
},
|
|
{
|
|
"epoch": 2.5904572564612325,
|
|
"grad_norm": 0.2562994770469392,
|
|
"learning_rate": 3.182286683836461e-05,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12299023568630219,
|
|
"step": 435,
|
|
"valid_targets_mean": 8783.9,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 2.6202783300198806,
|
|
"grad_norm": 0.2591727396961362,
|
|
"learning_rate": 3.158207322961678e-05,
|
|
"loss": 0.389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12027916312217712,
|
|
"step": 440,
|
|
"valid_targets_mean": 8550.8,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 2.650099403578529,
|
|
"grad_norm": 0.23343608939765326,
|
|
"learning_rate": 3.1338726644778084e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12307856976985931,
|
|
"step": 445,
|
|
"valid_targets_mean": 9197.8,
|
|
"valid_targets_min": 3880
|
|
},
|
|
{
|
|
"epoch": 2.679920477137177,
|
|
"grad_norm": 0.2608548923226282,
|
|
"learning_rate": 3.109288072346904e-05,
|
|
"loss": 0.3846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13536040484905243,
|
|
"step": 450,
|
|
"valid_targets_mean": 9562.6,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 2.709741550695825,
|
|
"grad_norm": 0.2674341383904537,
|
|
"learning_rate": 3.084458965622591e-05,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1220935583114624,
|
|
"step": 455,
|
|
"valid_targets_mean": 9237.2,
|
|
"valid_targets_min": 3261
|
|
},
|
|
{
|
|
"epoch": 2.7395626242544733,
|
|
"grad_norm": 0.24113612336638285,
|
|
"learning_rate": 3.0593908172555696e-05,
|
|
"loss": 0.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12890732288360596,
|
|
"step": 460,
|
|
"valid_targets_mean": 9338.5,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 2.7693836978131214,
|
|
"grad_norm": 0.23504577264959453,
|
|
"learning_rate": 3.0340891528872503e-05,
|
|
"loss": 0.3861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365663856267929,
|
|
"step": 465,
|
|
"valid_targets_mean": 10284.9,
|
|
"valid_targets_min": 3324
|
|
},
|
|
{
|
|
"epoch": 2.7992047713717696,
|
|
"grad_norm": 0.23542940739467466,
|
|
"learning_rate": 3.0085595496317558e-05,
|
|
"loss": 0.386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12416007369756699,
|
|
"step": 470,
|
|
"valid_targets_mean": 9443.1,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 2.8290258449304178,
|
|
"grad_norm": 0.2520271616790775,
|
|
"learning_rate": 2.9828076348465913e-05,
|
|
"loss": 0.3887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12400713562965393,
|
|
"step": 475,
|
|
"valid_targets_mean": 8372.7,
|
|
"valid_targets_min": 3158
|
|
},
|
|
{
|
|
"epoch": 2.8588469184890655,
|
|
"grad_norm": 0.2617214720802655,
|
|
"learning_rate": 2.956839084892235e-05,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13921988010406494,
|
|
"step": 480,
|
|
"valid_targets_mean": 10316.4,
|
|
"valid_targets_min": 2117
|
|
},
|
|
{
|
|
"epoch": 2.8886679920477136,
|
|
"grad_norm": 0.23989440057199513,
|
|
"learning_rate": 2.9306596238809292e-05,
|
|
"loss": 0.3826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12483088672161102,
|
|
"step": 485,
|
|
"valid_targets_mean": 9510.9,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 2.918489065606362,
|
|
"grad_norm": 0.2619100232438116,
|
|
"learning_rate": 2.9042750224149396e-05,
|
|
"loss": 0.384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1344929337501526,
|
|
"step": 490,
|
|
"valid_targets_mean": 10448.7,
|
|
"valid_targets_min": 4344
|
|
},
|
|
{
|
|
"epoch": 2.94831013916501,
|
|
"grad_norm": 0.24604640536880984,
|
|
"learning_rate": 2.877691096314576e-05,
|
|
"loss": 0.3878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13883237540721893,
|
|
"step": 495,
|
|
"valid_targets_mean": 10006.5,
|
|
"valid_targets_min": 3781
|
|
},
|
|
{
|
|
"epoch": 2.978131212723658,
|
|
"grad_norm": 0.24449981627934345,
|
|
"learning_rate": 2.850913705336238e-05,
|
|
"loss": 0.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12503878772258759,
|
|
"step": 500,
|
|
"valid_targets_mean": 9295.5,
|
|
"valid_targets_min": 2761
|
|
},
|
|
{
|
|
"epoch": 3.00596421471173,
|
|
"grad_norm": 0.25973568404266545,
|
|
"learning_rate": 2.8239487518807816e-05,
|
|
"loss": 0.3787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11682064831256866,
|
|
"step": 505,
|
|
"valid_targets_mean": 8969.0,
|
|
"valid_targets_min": 2105
|
|
},
|
|
{
|
|
"epoch": 3.0357852882703775,
|
|
"grad_norm": 0.22127882526750678,
|
|
"learning_rate": 2.7968021796924834e-05,
|
|
"loss": 0.376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10894481837749481,
|
|
"step": 510,
|
|
"valid_targets_mean": 8102.7,
|
|
"valid_targets_min": 4194
|
|
},
|
|
{
|
|
"epoch": 3.0656063618290257,
|
|
"grad_norm": 0.2568154982874112,
|
|
"learning_rate": 2.76947997254889e-05,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1269792914390564,
|
|
"step": 515,
|
|
"valid_targets_mean": 9722.5,
|
|
"valid_targets_min": 3461
|
|
},
|
|
{
|
|
"epoch": 3.095427435387674,
|
|
"grad_norm": 0.2418191676681759,
|
|
"learning_rate": 2.741988152941849e-05,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313607394695282,
|
|
"step": 520,
|
|
"valid_targets_mean": 10292.7,
|
|
"valid_targets_min": 2701
|
|
},
|
|
{
|
|
"epoch": 3.125248508946322,
|
|
"grad_norm": 0.26137529392846076,
|
|
"learning_rate": 2.714332780749997e-05,
|
|
"loss": 0.3727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12172074615955353,
|
|
"step": 525,
|
|
"valid_targets_mean": 9764.4,
|
|
"valid_targets_min": 2498
|
|
},
|
|
{
|
|
"epoch": 3.15506958250497,
|
|
"grad_norm": 0.24623727496182754,
|
|
"learning_rate": 2.6865199519030178e-05,
|
|
"loss": 0.3779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12355610728263855,
|
|
"step": 530,
|
|
"valid_targets_mean": 9638.3,
|
|
"valid_targets_min": 3022
|
|
},
|
|
{
|
|
"epoch": 3.1848906560636183,
|
|
"grad_norm": 0.24954885423234177,
|
|
"learning_rate": 2.658555797037945e-05,
|
|
"loss": 0.3787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12461437284946442,
|
|
"step": 535,
|
|
"valid_targets_mean": 8588.2,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 3.2147117296222665,
|
|
"grad_norm": 0.21483912308106684,
|
|
"learning_rate": 2.6304464801478177e-05,
|
|
"loss": 0.3762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1340673416852951,
|
|
"step": 540,
|
|
"valid_targets_mean": 10699.2,
|
|
"valid_targets_min": 2852
|
|
},
|
|
{
|
|
"epoch": 3.2445328031809146,
|
|
"grad_norm": 0.253090712355003,
|
|
"learning_rate": 2.6021981972229852e-05,
|
|
"loss": 0.3796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1394907385110855,
|
|
"step": 545,
|
|
"valid_targets_mean": 9860.4,
|
|
"valid_targets_min": 2323
|
|
},
|
|
{
|
|
"epoch": 3.274353876739563,
|
|
"grad_norm": 0.22081826679193847,
|
|
"learning_rate": 2.5738171748853552e-05,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1250452846288681,
|
|
"step": 550,
|
|
"valid_targets_mean": 10036.3,
|
|
"valid_targets_min": 1623
|
|
},
|
|
{
|
|
"epoch": 3.3041749502982105,
|
|
"grad_norm": 0.25504537067123745,
|
|
"learning_rate": 2.545309669015895e-05,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12748511135578156,
|
|
"step": 555,
|
|
"valid_targets_mean": 10318.6,
|
|
"valid_targets_min": 4635
|
|
},
|
|
{
|
|
"epoch": 3.3339960238568587,
|
|
"grad_norm": 0.23504480529865782,
|
|
"learning_rate": 2.5166819633756746e-05,
|
|
"loss": 0.3781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12984226644039154,
|
|
"step": 560,
|
|
"valid_targets_mean": 9720.7,
|
|
"valid_targets_min": 2968
|
|
},
|
|
{
|
|
"epoch": 3.363817097415507,
|
|
"grad_norm": 0.22141055145170896,
|
|
"learning_rate": 2.4879403682207775e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256609559059143,
|
|
"step": 565,
|
|
"valid_targets_mean": 9638.0,
|
|
"valid_targets_min": 2492
|
|
},
|
|
{
|
|
"epoch": 3.393638170974155,
|
|
"grad_norm": 0.24107550315856965,
|
|
"learning_rate": 2.4590912189113575e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13189947605133057,
|
|
"step": 570,
|
|
"valid_targets_mean": 9626.9,
|
|
"valid_targets_min": 2284
|
|
},
|
|
{
|
|
"epoch": 3.423459244532803,
|
|
"grad_norm": 0.23868274137862344,
|
|
"learning_rate": 2.430140874515171e-05,
|
|
"loss": 0.3742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12700285017490387,
|
|
"step": 575,
|
|
"valid_targets_mean": 9807.8,
|
|
"valid_targets_min": 3978
|
|
},
|
|
{
|
|
"epoch": 3.4532803180914513,
|
|
"grad_norm": 0.22377331028220349,
|
|
"learning_rate": 2.4010957164058803e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1289224922657013,
|
|
"step": 580,
|
|
"valid_targets_mean": 9460.9,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 3.4831013916500995,
|
|
"grad_norm": 0.24019351233889105,
|
|
"learning_rate": 2.3719621468564416e-05,
|
|
"loss": 0.3763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12393779307603836,
|
|
"step": 585,
|
|
"valid_targets_mean": 9742.5,
|
|
"valid_targets_min": 2693
|
|
},
|
|
{
|
|
"epoch": 3.5129224652087476,
|
|
"grad_norm": 0.24701426386806288,
|
|
"learning_rate": 2.3427465876278843e-05,
|
|
"loss": 0.3697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12272713333368301,
|
|
"step": 590,
|
|
"valid_targets_mean": 10039.6,
|
|
"valid_targets_min": 2944
|
|
},
|
|
{
|
|
"epoch": 3.542743538767396,
|
|
"grad_norm": 0.25447639049380394,
|
|
"learning_rate": 2.3134554785537943e-05,
|
|
"loss": 0.3778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1295909881591797,
|
|
"step": 595,
|
|
"valid_targets_mean": 9521.8,
|
|
"valid_targets_min": 2744
|
|
},
|
|
{
|
|
"epoch": 3.572564612326044,
|
|
"grad_norm": 0.21876380474118456,
|
|
"learning_rate": 2.284095276120818e-05,
|
|
"loss": 0.376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11200149357318878,
|
|
"step": 600,
|
|
"valid_targets_mean": 9297.1,
|
|
"valid_targets_min": 4191
|
|
},
|
|
{
|
|
"epoch": 3.602385685884692,
|
|
"grad_norm": 0.2876203923550237,
|
|
"learning_rate": 2.2546724520454916e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13492657244205475,
|
|
"step": 605,
|
|
"valid_targets_mean": 9474.2,
|
|
"valid_targets_min": 2691
|
|
},
|
|
{
|
|
"epoch": 3.63220675944334,
|
|
"grad_norm": 0.3109728940450909,
|
|
"learning_rate": 2.2251934918477126e-05,
|
|
"loss": 0.3764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1116630882024765,
|
|
"step": 610,
|
|
"valid_targets_mean": 8435.2,
|
|
"valid_targets_min": 2827
|
|
},
|
|
{
|
|
"epoch": 3.662027833001988,
|
|
"grad_norm": 0.23811609842596262,
|
|
"learning_rate": 2.1956648934211717e-05,
|
|
"loss": 0.3772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11188781261444092,
|
|
"step": 615,
|
|
"valid_targets_mean": 9120.6,
|
|
"valid_targets_min": 2188
|
|
},
|
|
{
|
|
"epoch": 3.691848906560636,
|
|
"grad_norm": 0.22411995085803113,
|
|
"learning_rate": 2.1660931656010568e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11489223688840866,
|
|
"step": 620,
|
|
"valid_targets_mean": 8820.9,
|
|
"valid_targets_min": 3292
|
|
},
|
|
{
|
|
"epoch": 3.7216699801192843,
|
|
"grad_norm": 0.22528057105116367,
|
|
"learning_rate": 2.1364848267293424e-05,
|
|
"loss": 0.3748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11487048119306564,
|
|
"step": 625,
|
|
"valid_targets_mean": 8887.9,
|
|
"valid_targets_min": 2649
|
|
},
|
|
{
|
|
"epoch": 3.7514910536779325,
|
|
"grad_norm": 0.23210531928235775,
|
|
"learning_rate": 2.106846403217987e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12289533764123917,
|
|
"step": 630,
|
|
"valid_targets_mean": 9040.5,
|
|
"valid_targets_min": 2927
|
|
},
|
|
{
|
|
"epoch": 3.7813121272365806,
|
|
"grad_norm": 0.22420265923284372,
|
|
"learning_rate": 2.0771844281103503e-05,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1272115558385849,
|
|
"step": 635,
|
|
"valid_targets_mean": 9784.5,
|
|
"valid_targets_min": 3904
|
|
},
|
|
{
|
|
"epoch": 3.8111332007952288,
|
|
"grad_norm": 0.2463614011353854,
|
|
"learning_rate": 2.0475054396411464e-05,
|
|
"loss": 0.3737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13237622380256653,
|
|
"step": 640,
|
|
"valid_targets_mean": 10127.7,
|
|
"valid_targets_min": 2225
|
|
},
|
|
{
|
|
"epoch": 3.8409542743538765,
|
|
"grad_norm": 0.23705284154886855,
|
|
"learning_rate": 2.017815979795257e-05,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1224151998758316,
|
|
"step": 645,
|
|
"valid_targets_mean": 8962.2,
|
|
"valid_targets_min": 2634
|
|
},
|
|
{
|
|
"epoch": 3.8707753479125246,
|
|
"grad_norm": 0.27393497557546315,
|
|
"learning_rate": 1.9881225928657132e-05,
|
|
"loss": 0.3755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11714585870504379,
|
|
"step": 650,
|
|
"valid_targets_mean": 9084.2,
|
|
"valid_targets_min": 3314
|
|
},
|
|
{
|
|
"epoch": 3.900596421471173,
|
|
"grad_norm": 0.24064359347617442,
|
|
"learning_rate": 1.958431824011176e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11896838247776031,
|
|
"step": 655,
|
|
"valid_targets_mean": 9061.0,
|
|
"valid_targets_min": 3267
|
|
},
|
|
{
|
|
"epoch": 3.930417495029821,
|
|
"grad_norm": 0.23848724567684518,
|
|
"learning_rate": 1.928750217813214e-05,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12432240694761276,
|
|
"step": 660,
|
|
"valid_targets_mean": 9445.7,
|
|
"valid_targets_min": 3656
|
|
},
|
|
{
|
|
"epoch": 3.960238568588469,
|
|
"grad_norm": 0.23633084097635995,
|
|
"learning_rate": 1.899084316833722e-05,
|
|
"loss": 0.3726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12451080232858658,
|
|
"step": 665,
|
|
"valid_targets_mean": 9713.7,
|
|
"valid_targets_min": 3239
|
|
},
|
|
{
|
|
"epoch": 3.9900596421471173,
|
|
"grad_norm": 0.24802678596076022,
|
|
"learning_rate": 1.869440660172774e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1269044578075409,
|
|
"step": 670,
|
|
"valid_targets_mean": 9837.6,
|
|
"valid_targets_min": 2370
|
|
},
|
|
{
|
|
"epoch": 4.0178926441351885,
|
|
"grad_norm": 0.23067166950312643,
|
|
"learning_rate": 1.8398257820272438e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13049767911434174,
|
|
"step": 675,
|
|
"valid_targets_mean": 9200.5,
|
|
"valid_targets_min": 3730
|
|
},
|
|
{
|
|
"epoch": 4.047713717693837,
|
|
"grad_norm": 0.22687025778414033,
|
|
"learning_rate": 1.8102462102505096e-05,
|
|
"loss": 0.3664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11435438692569733,
|
|
"step": 680,
|
|
"valid_targets_mean": 9556.8,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 4.077534791252485,
|
|
"grad_norm": 0.23492036510934328,
|
|
"learning_rate": 1.7807084649135473e-05,
|
|
"loss": 0.3686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11235421150922775,
|
|
"step": 685,
|
|
"valid_targets_mean": 8562.3,
|
|
"valid_targets_min": 2218
|
|
},
|
|
{
|
|
"epoch": 4.107355864811133,
|
|
"grad_norm": 0.22232507076273023,
|
|
"learning_rate": 1.751219056867751e-05,
|
|
"loss": 0.3649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12534525990486145,
|
|
"step": 690,
|
|
"valid_targets_mean": 9685.4,
|
|
"valid_targets_min": 3439
|
|
},
|
|
{
|
|
"epoch": 4.137176938369781,
|
|
"grad_norm": 0.2160815037269669,
|
|
"learning_rate": 1.7217844863097774e-05,
|
|
"loss": 0.3704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12021255493164062,
|
|
"step": 695,
|
|
"valid_targets_mean": 9724.7,
|
|
"valid_targets_min": 4045
|
|
},
|
|
{
|
|
"epoch": 4.166998011928429,
|
|
"grad_norm": 0.22504176113051863,
|
|
"learning_rate": 1.6924112413487382e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12961643934249878,
|
|
"step": 700,
|
|
"valid_targets_mean": 10400.0,
|
|
"valid_targets_min": 3902
|
|
},
|
|
{
|
|
"epoch": 4.1968190854870775,
|
|
"grad_norm": 0.2239146324338242,
|
|
"learning_rate": 1.6631057965760674e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256789118051529,
|
|
"step": 705,
|
|
"valid_targets_mean": 10124.2,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 4.226640159045726,
|
|
"grad_norm": 0.24637631808051266,
|
|
"learning_rate": 1.633874611638353e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11531466245651245,
|
|
"step": 710,
|
|
"valid_targets_mean": 9248.9,
|
|
"valid_targets_min": 3223
|
|
},
|
|
{
|
|
"epoch": 4.256461232604374,
|
|
"grad_norm": 0.2436999109413977,
|
|
"learning_rate": 1.6047241298134767e-05,
|
|
"loss": 0.3664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12499426305294037,
|
|
"step": 715,
|
|
"valid_targets_mean": 8669.4,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 4.286282306163022,
|
|
"grad_norm": 0.23915655247210107,
|
|
"learning_rate": 1.5756607765903525e-05,
|
|
"loss": 0.3673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13046535849571228,
|
|
"step": 720,
|
|
"valid_targets_mean": 9301.4,
|
|
"valid_targets_min": 3807
|
|
},
|
|
{
|
|
"epoch": 4.31610337972167,
|
|
"grad_norm": 0.27408527428068585,
|
|
"learning_rate": 1.5466909582525893e-05,
|
|
"loss": 0.3664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10745998471975327,
|
|
"step": 725,
|
|
"valid_targets_mean": 8807.9,
|
|
"valid_targets_min": 2492
|
|
},
|
|
{
|
|
"epoch": 4.345924453280318,
|
|
"grad_norm": 0.22513801096410269,
|
|
"learning_rate": 1.51782106046639e-05,
|
|
"loss": 0.3633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12577703595161438,
|
|
"step": 730,
|
|
"valid_targets_mean": 9537.7,
|
|
"valid_targets_min": 2496
|
|
},
|
|
{
|
|
"epoch": 4.3757455268389664,
|
|
"grad_norm": 0.22189885599414932,
|
|
"learning_rate": 1.4890574468729893e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11032432317733765,
|
|
"step": 735,
|
|
"valid_targets_mean": 8715.6,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 4.405566600397615,
|
|
"grad_norm": 0.2262677009022974,
|
|
"learning_rate": 1.4604064576859513e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12237927317619324,
|
|
"step": 740,
|
|
"valid_targets_mean": 8992.1,
|
|
"valid_targets_min": 4707
|
|
},
|
|
{
|
|
"epoch": 4.435387673956263,
|
|
"grad_norm": 0.21753810116362765,
|
|
"learning_rate": 1.43187440829363e-05,
|
|
"loss": 0.3681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13011434674263,
|
|
"step": 745,
|
|
"valid_targets_mean": 9628.3,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 4.465208747514911,
|
|
"grad_norm": 0.23166763660646592,
|
|
"learning_rate": 1.4034675878670964e-05,
|
|
"loss": 0.366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11755146086215973,
|
|
"step": 750,
|
|
"valid_targets_mean": 8975.1,
|
|
"valid_targets_min": 3581
|
|
},
|
|
{
|
|
"epoch": 4.495029821073558,
|
|
"grad_norm": 0.24948530398746774,
|
|
"learning_rate": 1.3751922579738566e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1294461041688919,
|
|
"step": 755,
|
|
"valid_targets_mean": 9031.1,
|
|
"valid_targets_min": 2432
|
|
},
|
|
{
|
|
"epoch": 4.524850894632207,
|
|
"grad_norm": 0.22634991751036648,
|
|
"learning_rate": 1.3470546511976395e-05,
|
|
"loss": 0.3653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13205093145370483,
|
|
"step": 760,
|
|
"valid_targets_mean": 10131.7,
|
|
"valid_targets_min": 3011
|
|
},
|
|
{
|
|
"epoch": 4.5546719681908545,
|
|
"grad_norm": 0.2407875833801288,
|
|
"learning_rate": 1.3190609697645882e-05,
|
|
"loss": 0.3698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11704811453819275,
|
|
"step": 765,
|
|
"valid_targets_mean": 8992.2,
|
|
"valid_targets_min": 2746
|
|
},
|
|
{
|
|
"epoch": 4.584493041749503,
|
|
"grad_norm": 0.2232911058573211,
|
|
"learning_rate": 1.2912173841761288e-05,
|
|
"loss": 0.3648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12442412227392197,
|
|
"step": 770,
|
|
"valid_targets_mean": 10025.8,
|
|
"valid_targets_min": 4098
|
|
},
|
|
{
|
|
"epoch": 4.614314115308151,
|
|
"grad_norm": 0.21682935538971534,
|
|
"learning_rate": 1.2635300318488426e-05,
|
|
"loss": 0.3668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12467293441295624,
|
|
"step": 775,
|
|
"valid_targets_mean": 9998.5,
|
|
"valid_targets_min": 3071
|
|
},
|
|
{
|
|
"epoch": 4.644135188866799,
|
|
"grad_norm": 0.22390557119853882,
|
|
"learning_rate": 1.236005015761629e-05,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1269168108701706,
|
|
"step": 780,
|
|
"valid_targets_mean": 10062.8,
|
|
"valid_targets_min": 2395
|
|
},
|
|
{
|
|
"epoch": 4.673956262425447,
|
|
"grad_norm": 0.24000520155840155,
|
|
"learning_rate": 1.2086484031104515e-05,
|
|
"loss": 0.3719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11352889239788055,
|
|
"step": 785,
|
|
"valid_targets_mean": 8199.0,
|
|
"valid_targets_min": 3394
|
|
},
|
|
{
|
|
"epoch": 4.703777335984095,
|
|
"grad_norm": 0.21139692598361529,
|
|
"learning_rate": 1.1814662239709851e-05,
|
|
"loss": 0.3745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12804041802883148,
|
|
"step": 790,
|
|
"valid_targets_mean": 10180.8,
|
|
"valid_targets_min": 3169
|
|
},
|
|
{
|
|
"epoch": 4.7335984095427435,
|
|
"grad_norm": 0.23419559497822282,
|
|
"learning_rate": 1.1544644699694307e-05,
|
|
"loss": 0.3631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1193591058254242,
|
|
"step": 795,
|
|
"valid_targets_mean": 9056.3,
|
|
"valid_targets_min": 3674
|
|
},
|
|
{
|
|
"epoch": 4.763419483101392,
|
|
"grad_norm": 0.24646021525100137,
|
|
"learning_rate": 1.1276490929618177e-05,
|
|
"loss": 0.3653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11579230427742004,
|
|
"step": 800,
|
|
"valid_targets_mean": 9170.7,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 4.79324055666004,
|
|
"grad_norm": 0.22174275700505441,
|
|
"learning_rate": 1.1010260037220643e-05,
|
|
"loss": 0.3688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12448688596487045,
|
|
"step": 805,
|
|
"valid_targets_mean": 9580.0,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 4.823061630218688,
|
|
"grad_norm": 0.22272875692529412,
|
|
"learning_rate": 1.0746010706390981e-05,
|
|
"loss": 0.3717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12802085280418396,
|
|
"step": 810,
|
|
"valid_targets_mean": 9987.5,
|
|
"valid_targets_min": 2232
|
|
},
|
|
{
|
|
"epoch": 4.852882703777336,
|
|
"grad_norm": 0.2138007940278265,
|
|
"learning_rate": 1.048380118423316e-05,
|
|
"loss": 0.3742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12118125706911087,
|
|
"step": 815,
|
|
"valid_targets_mean": 9264.9,
|
|
"valid_targets_min": 2934
|
|
},
|
|
{
|
|
"epoch": 4.882703777335984,
|
|
"grad_norm": 0.22079166609200498,
|
|
"learning_rate": 1.0223689268226754e-05,
|
|
"loss": 0.3699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12725740671157837,
|
|
"step": 820,
|
|
"valid_targets_mean": 10555.1,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 4.912524850894632,
|
|
"grad_norm": 0.21264177919293614,
|
|
"learning_rate": 9.965732293486929e-06,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13087576627731323,
|
|
"step": 825,
|
|
"valid_targets_mean": 10235.8,
|
|
"valid_targets_min": 2786
|
|
},
|
|
{
|
|
"epoch": 4.942345924453281,
|
|
"grad_norm": 0.24182464066253673,
|
|
"learning_rate": 9.709987120126371e-06,
|
|
"loss": 0.3657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12286906689405441,
|
|
"step": 830,
|
|
"valid_targets_mean": 8107.4,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 4.972166998011929,
|
|
"grad_norm": 0.2328727528842246,
|
|
"learning_rate": 9.456510120721911e-06,
|
|
"loss": 0.3713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11418526619672775,
|
|
"step": 835,
|
|
"valid_targets_mean": 8587.7,
|
|
"valid_targets_min": 2879
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.28105201212007624,
|
|
"learning_rate": 9.205357167888595e-06,
|
|
"loss": 0.3688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17316186428070068,
|
|
"step": 840,
|
|
"valid_targets_mean": 9005.3,
|
|
"valid_targets_min": 3954
|
|
},
|
|
{
|
|
"epoch": 5.029821073558648,
|
|
"grad_norm": 0.30217719030448426,
|
|
"learning_rate": 8.956583621963996e-06,
|
|
"loss": 0.3667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11961351335048676,
|
|
"step": 845,
|
|
"valid_targets_mean": 9328.7,
|
|
"valid_targets_min": 2441
|
|
},
|
|
{
|
|
"epoch": 5.059642147117296,
|
|
"grad_norm": 0.2636234100864308,
|
|
"learning_rate": 8.710244318805406e-06,
|
|
"loss": 0.3641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11540426313877106,
|
|
"step": 850,
|
|
"valid_targets_mean": 9178.5,
|
|
"valid_targets_min": 2654
|
|
},
|
|
{
|
|
"epoch": 5.0894632206759445,
|
|
"grad_norm": 0.2243454259706472,
|
|
"learning_rate": 8.466393557702659e-06,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11149504780769348,
|
|
"step": 855,
|
|
"valid_targets_mean": 9037.4,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 5.119284294234593,
|
|
"grad_norm": 0.2149403201957784,
|
|
"learning_rate": 8.225085089409231e-06,
|
|
"loss": 0.3652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1426597237586975,
|
|
"step": 860,
|
|
"valid_targets_mean": 10396.4,
|
|
"valid_targets_min": 3269
|
|
},
|
|
{
|
|
"epoch": 5.149105367793241,
|
|
"grad_norm": 0.22198093615836403,
|
|
"learning_rate": 7.98637210429422e-06,
|
|
"loss": 0.3676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12441132217645645,
|
|
"step": 865,
|
|
"valid_targets_mean": 10199.0,
|
|
"valid_targets_min": 2953
|
|
},
|
|
{
|
|
"epoch": 5.178926441351889,
|
|
"grad_norm": 0.21985018694532593,
|
|
"learning_rate": 7.750307220617892e-06,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12127161026000977,
|
|
"step": 870,
|
|
"valid_targets_mean": 9964.5,
|
|
"valid_targets_min": 5004
|
|
},
|
|
{
|
|
"epoch": 5.208747514910537,
|
|
"grad_norm": 0.21440275235872072,
|
|
"learning_rate": 7.5169424729333e-06,
|
|
"loss": 0.3626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12604345381259918,
|
|
"step": 875,
|
|
"valid_targets_mean": 9950.9,
|
|
"valid_targets_min": 3104
|
|
},
|
|
{
|
|
"epoch": 5.238568588469185,
|
|
"grad_norm": 0.22878390162081763,
|
|
"learning_rate": 7.286329300616575e-06,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13689112663269043,
|
|
"step": 880,
|
|
"valid_targets_mean": 10335.2,
|
|
"valid_targets_min": 3606
|
|
},
|
|
{
|
|
"epoch": 5.2683896620278325,
|
|
"grad_norm": 0.2218645567270629,
|
|
"learning_rate": 7.058518536528427e-06,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12371527403593063,
|
|
"step": 885,
|
|
"valid_targets_mean": 9315.7,
|
|
"valid_targets_min": 3386
|
|
},
|
|
{
|
|
"epoch": 5.298210735586481,
|
|
"grad_norm": 0.20525056421525675,
|
|
"learning_rate": 6.833560395809307e-06,
|
|
"loss": 0.3666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12798547744750977,
|
|
"step": 890,
|
|
"valid_targets_mean": 9637.3,
|
|
"valid_targets_min": 2280
|
|
},
|
|
{
|
|
"epoch": 5.328031809145129,
|
|
"grad_norm": 0.22438108847516308,
|
|
"learning_rate": 6.611504464810754e-06,
|
|
"loss": 0.3633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12381379306316376,
|
|
"step": 895,
|
|
"valid_targets_mean": 9930.8,
|
|
"valid_targets_min": 3306
|
|
},
|
|
{
|
|
"epoch": 5.357852882703777,
|
|
"grad_norm": 0.2014227570589018,
|
|
"learning_rate": 6.392399690165328e-06,
|
|
"loss": 0.3586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10803110897541046,
|
|
"step": 900,
|
|
"valid_targets_mean": 8890.2,
|
|
"valid_targets_min": 2494
|
|
},
|
|
{
|
|
"epoch": 5.387673956262425,
|
|
"grad_norm": 0.20843428620879534,
|
|
"learning_rate": 6.176294367997564e-06,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1383419781923294,
|
|
"step": 905,
|
|
"valid_targets_mean": 10496.9,
|
|
"valid_targets_min": 2517
|
|
},
|
|
{
|
|
"epoch": 5.417495029821073,
|
|
"grad_norm": 0.2279795640183649,
|
|
"learning_rate": 5.9632361332783075e-06,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12038109451532364,
|
|
"step": 910,
|
|
"valid_targets_mean": 9726.5,
|
|
"valid_targets_min": 3070
|
|
},
|
|
{
|
|
"epoch": 5.4473161033797215,
|
|
"grad_norm": 0.21438762423589458,
|
|
"learning_rate": 5.753271949324779e-06,
|
|
"loss": 0.3678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11860641837120056,
|
|
"step": 915,
|
|
"valid_targets_mean": 8619.2,
|
|
"valid_targets_min": 3587
|
|
},
|
|
{
|
|
"epoch": 5.47713717693837,
|
|
"grad_norm": 0.2042751340473231,
|
|
"learning_rate": 5.546448097448709e-06,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12780383229255676,
|
|
"step": 920,
|
|
"valid_targets_mean": 10348.6,
|
|
"valid_targets_min": 3762
|
|
},
|
|
{
|
|
"epoch": 5.506958250497018,
|
|
"grad_norm": 0.20548277545029267,
|
|
"learning_rate": 5.342810166754773e-06,
|
|
"loss": 0.3635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10209870338439941,
|
|
"step": 925,
|
|
"valid_targets_mean": 8156.8,
|
|
"valid_targets_min": 2395
|
|
},
|
|
{
|
|
"epoch": 5.536779324055666,
|
|
"grad_norm": 0.21298707779747966,
|
|
"learning_rate": 5.142403044091635e-06,
|
|
"loss": 0.3635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12702594697475433,
|
|
"step": 930,
|
|
"valid_targets_mean": 10764.0,
|
|
"valid_targets_min": 4226
|
|
},
|
|
{
|
|
"epoch": 5.566600397614314,
|
|
"grad_norm": 0.21096182884472378,
|
|
"learning_rate": 4.945270904157766e-06,
|
|
"loss": 0.3693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1142885610461235,
|
|
"step": 935,
|
|
"valid_targets_mean": 8712.3,
|
|
"valid_targets_min": 3123
|
|
},
|
|
{
|
|
"epoch": 5.596421471172962,
|
|
"grad_norm": 0.2137029350386158,
|
|
"learning_rate": 4.751457199764249e-06,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12835736572742462,
|
|
"step": 940,
|
|
"valid_targets_mean": 9765.0,
|
|
"valid_targets_min": 2831
|
|
},
|
|
{
|
|
"epoch": 5.6262425447316105,
|
|
"grad_norm": 0.22355702289752202,
|
|
"learning_rate": 4.5610046522567e-06,
|
|
"loss": 0.3718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12691718339920044,
|
|
"step": 945,
|
|
"valid_targets_mean": 9729.5,
|
|
"valid_targets_min": 3585
|
|
},
|
|
{
|
|
"epoch": 5.656063618290259,
|
|
"grad_norm": 0.21492851974551722,
|
|
"learning_rate": 4.373955242098427e-06,
|
|
"loss": 0.3683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11885521560907364,
|
|
"step": 950,
|
|
"valid_targets_mean": 9381.0,
|
|
"valid_targets_min": 4095
|
|
},
|
|
{
|
|
"epoch": 5.685884691848907,
|
|
"grad_norm": 0.20233385939777984,
|
|
"learning_rate": 4.190350199616888e-06,
|
|
"loss": 0.3672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12096765637397766,
|
|
"step": 955,
|
|
"valid_targets_mean": 10414.7,
|
|
"valid_targets_min": 3631
|
|
},
|
|
{
|
|
"epoch": 5.715705765407555,
|
|
"grad_norm": 0.22049322690289747,
|
|
"learning_rate": 4.01022999591552e-06,
|
|
"loss": 0.3625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13061478734016418,
|
|
"step": 960,
|
|
"valid_targets_mean": 9200.2,
|
|
"valid_targets_min": 2105
|
|
},
|
|
{
|
|
"epoch": 5.745526838966203,
|
|
"grad_norm": 0.20901029363729942,
|
|
"learning_rate": 3.833634333952882e-06,
|
|
"loss": 0.3584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1363891214132309,
|
|
"step": 965,
|
|
"valid_targets_mean": 9776.2,
|
|
"valid_targets_min": 3927
|
|
},
|
|
{
|
|
"epoch": 5.775347912524851,
|
|
"grad_norm": 0.26991051093015467,
|
|
"learning_rate": 3.6606021397911605e-06,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12989675998687744,
|
|
"step": 970,
|
|
"valid_targets_mean": 10042.0,
|
|
"valid_targets_min": 3138
|
|
},
|
|
{
|
|
"epoch": 5.805168986083499,
|
|
"grad_norm": 0.19237954867107937,
|
|
"learning_rate": 3.491171554015886e-06,
|
|
"loss": 0.3661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10952335596084595,
|
|
"step": 975,
|
|
"valid_targets_mean": 9541.4,
|
|
"valid_targets_min": 2110
|
|
},
|
|
{
|
|
"epoch": 5.834990059642147,
|
|
"grad_norm": 0.2239480154769859,
|
|
"learning_rate": 3.3253799233288064e-06,
|
|
"loss": 0.3686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12480639666318893,
|
|
"step": 980,
|
|
"valid_targets_mean": 9702.5,
|
|
"valid_targets_min": 3851
|
|
},
|
|
{
|
|
"epoch": 5.864811133200796,
|
|
"grad_norm": 0.19907388113430682,
|
|
"learning_rate": 3.1632637923157517e-06,
|
|
"loss": 0.3629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11497035622596741,
|
|
"step": 985,
|
|
"valid_targets_mean": 9566.5,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 5.894632206759443,
|
|
"grad_norm": 0.20959374164502262,
|
|
"learning_rate": 3.004858895391294e-06,
|
|
"loss": 0.3634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11463524401187897,
|
|
"step": 990,
|
|
"valid_targets_mean": 8378.8,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 5.924453280318091,
|
|
"grad_norm": 0.1941612766228534,
|
|
"learning_rate": 2.8502001489220067e-06,
|
|
"loss": 0.3623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11086033284664154,
|
|
"step": 995,
|
|
"valid_targets_mean": 8698.3,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 5.954274353876739,
|
|
"grad_norm": 0.19209598607356373,
|
|
"learning_rate": 2.6993216435300194e-06,
|
|
"loss": 0.3578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1245051771402359,
|
|
"step": 1000,
|
|
"valid_targets_mean": 9976.9,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 5.9840954274353875,
|
|
"grad_norm": 0.1970281152353345,
|
|
"learning_rate": 2.5522566365786094e-06,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12308112531900406,
|
|
"step": 1005,
|
|
"valid_targets_mean": 10133.8,
|
|
"valid_targets_min": 4146
|
|
},
|
|
{
|
|
"epoch": 6.01192842942346,
|
|
"grad_norm": 0.20447659959488607,
|
|
"learning_rate": 2.4090375448414505e-06,
|
|
"loss": 0.3636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12563642859458923,
|
|
"step": 1010,
|
|
"valid_targets_mean": 9465.2,
|
|
"valid_targets_min": 2441
|
|
},
|
|
{
|
|
"epoch": 6.041749502982108,
|
|
"grad_norm": 0.20437038455108572,
|
|
"learning_rate": 2.26969593735715e-06,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11522137373685837,
|
|
"step": 1015,
|
|
"valid_targets_mean": 9140.0,
|
|
"valid_targets_min": 2534
|
|
},
|
|
{
|
|
"epoch": 6.071570576540755,
|
|
"grad_norm": 0.22257426260166877,
|
|
"learning_rate": 2.1342625284706565e-06,
|
|
"loss": 0.364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1390356570482254,
|
|
"step": 1020,
|
|
"valid_targets_mean": 10661.7,
|
|
"valid_targets_min": 4246
|
|
},
|
|
{
|
|
"epoch": 6.101391650099403,
|
|
"grad_norm": 0.19365813495938394,
|
|
"learning_rate": 2.002767171063047e-06,
|
|
"loss": 0.3585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11969684064388275,
|
|
"step": 1025,
|
|
"valid_targets_mean": 9768.3,
|
|
"valid_targets_min": 5211
|
|
},
|
|
{
|
|
"epoch": 6.131212723658051,
|
|
"grad_norm": 0.20378104515618026,
|
|
"learning_rate": 1.875238849971226e-06,
|
|
"loss": 0.3634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1135728657245636,
|
|
"step": 1030,
|
|
"valid_targets_mean": 9446.8,
|
|
"valid_targets_min": 3712
|
|
},
|
|
{
|
|
"epoch": 6.1610337972166995,
|
|
"grad_norm": 0.239141322851671,
|
|
"learning_rate": 1.7517056755989336e-06,
|
|
"loss": 0.3607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12127785384654999,
|
|
"step": 1035,
|
|
"valid_targets_mean": 8748.2,
|
|
"valid_targets_min": 2842
|
|
},
|
|
{
|
|
"epoch": 6.190854870775348,
|
|
"grad_norm": 0.19410931594859926,
|
|
"learning_rate": 1.6321948777205232e-06,
|
|
"loss": 0.3594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11646410822868347,
|
|
"step": 1040,
|
|
"valid_targets_mean": 8943.3,
|
|
"valid_targets_min": 2508
|
|
},
|
|
{
|
|
"epoch": 6.220675944333996,
|
|
"grad_norm": 0.19578149244049262,
|
|
"learning_rate": 1.5167327994788484e-06,
|
|
"loss": 0.3653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13031047582626343,
|
|
"step": 1045,
|
|
"valid_targets_mean": 11259.7,
|
|
"valid_targets_min": 4629
|
|
},
|
|
{
|
|
"epoch": 6.250497017892644,
|
|
"grad_norm": 0.1934858659957491,
|
|
"learning_rate": 1.405344891578566e-06,
|
|
"loss": 0.3558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11782944202423096,
|
|
"step": 1050,
|
|
"valid_targets_mean": 10102.9,
|
|
"valid_targets_min": 5316
|
|
},
|
|
{
|
|
"epoch": 6.280318091451292,
|
|
"grad_norm": 0.19646734609934835,
|
|
"learning_rate": 1.2980557066761912e-06,
|
|
"loss": 0.364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1253916323184967,
|
|
"step": 1055,
|
|
"valid_targets_mean": 10088.6,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 6.31013916500994,
|
|
"grad_norm": 0.20590729415474335,
|
|
"learning_rate": 1.1948888939680647e-06,
|
|
"loss": 0.366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12973612546920776,
|
|
"step": 1060,
|
|
"valid_targets_mean": 10507.1,
|
|
"valid_targets_min": 2498
|
|
},
|
|
{
|
|
"epoch": 6.3399602385685885,
|
|
"grad_norm": 0.194011111497666,
|
|
"learning_rate": 1.0958671939774935e-06,
|
|
"loss": 0.3563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12194149196147919,
|
|
"step": 1065,
|
|
"valid_targets_mean": 9570.9,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 6.369781312127237,
|
|
"grad_norm": 0.20139731344154319,
|
|
"learning_rate": 1.0010124335421722e-06,
|
|
"loss": 0.3626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11091062426567078,
|
|
"step": 1070,
|
|
"valid_targets_mean": 9067.2,
|
|
"valid_targets_min": 2482
|
|
},
|
|
{
|
|
"epoch": 6.399602385685885,
|
|
"grad_norm": 0.19867046434302968,
|
|
"learning_rate": 9.103455210030066e-07,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11365881562232971,
|
|
"step": 1075,
|
|
"valid_targets_mean": 8556.3,
|
|
"valid_targets_min": 2374
|
|
},
|
|
{
|
|
"epoch": 6.429423459244533,
|
|
"grad_norm": 0.1853582529262351,
|
|
"learning_rate": 8.238864415954029e-07,
|
|
"loss": 0.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1302383840084076,
|
|
"step": 1080,
|
|
"valid_targets_mean": 9907.8,
|
|
"valid_targets_min": 2467
|
|
},
|
|
{
|
|
"epoch": 6.459244532803181,
|
|
"grad_norm": 0.20238865724155874,
|
|
"learning_rate": 7.416542530440174e-07,
|
|
"loss": 0.3685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1176309734582901,
|
|
"step": 1085,
|
|
"valid_targets_mean": 9380.4,
|
|
"valid_targets_min": 2382
|
|
},
|
|
{
|
|
"epoch": 6.489065606361829,
|
|
"grad_norm": 0.18362044771096164,
|
|
"learning_rate": 6.636670813619584e-07,
|
|
"loss": 0.3614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11380837857723236,
|
|
"step": 1090,
|
|
"valid_targets_mean": 10026.3,
|
|
"valid_targets_min": 4177
|
|
},
|
|
{
|
|
"epoch": 6.518886679920477,
|
|
"grad_norm": 0.20523149374890035,
|
|
"learning_rate": 5.899421168553887e-07,
|
|
"loss": 0.3669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12352012097835541,
|
|
"step": 1095,
|
|
"valid_targets_mean": 9891.7,
|
|
"valid_targets_min": 2916
|
|
},
|
|
{
|
|
"epoch": 6.548707753479126,
|
|
"grad_norm": 0.20207344241537156,
|
|
"learning_rate": 5.204956103343217e-07,
|
|
"loss": 0.3696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12412673979997635,
|
|
"step": 1100,
|
|
"valid_targets_mean": 9182.8,
|
|
"valid_targets_min": 2008
|
|
},
|
|
{
|
|
"epoch": 6.578528827037774,
|
|
"grad_norm": 0.2681278098515819,
|
|
"learning_rate": 4.5534286953056617e-07,
|
|
"loss": 0.3614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12740249931812286,
|
|
"step": 1105,
|
|
"valid_targets_mean": 10408.3,
|
|
"valid_targets_min": 3345
|
|
},
|
|
{
|
|
"epoch": 6.608349900596421,
|
|
"grad_norm": 0.2219640884288412,
|
|
"learning_rate": 3.9449825572350777e-07,
|
|
"loss": 0.3632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13584564626216888,
|
|
"step": 1110,
|
|
"valid_targets_mean": 10947.2,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 6.63817097415507,
|
|
"grad_norm": 0.20125432861092377,
|
|
"learning_rate": 3.379751805745257e-07,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12210185825824738,
|
|
"step": 1115,
|
|
"valid_targets_mean": 9717.5,
|
|
"valid_targets_min": 3124
|
|
},
|
|
{
|
|
"epoch": 6.667992047713717,
|
|
"grad_norm": 0.1994889274316608,
|
|
"learning_rate": 2.857861031707532e-07,
|
|
"loss": 0.3618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11578874289989471,
|
|
"step": 1120,
|
|
"valid_targets_mean": 8496.7,
|
|
"valid_targets_min": 2884
|
|
},
|
|
{
|
|
"epoch": 6.6978131212723655,
|
|
"grad_norm": 0.19330788740500873,
|
|
"learning_rate": 2.3794252727875611e-07,
|
|
"loss": 0.3562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11735907196998596,
|
|
"step": 1125,
|
|
"valid_targets_mean": 9695.8,
|
|
"valid_targets_min": 2649
|
|
},
|
|
{
|
|
"epoch": 6.727634194831014,
|
|
"grad_norm": 0.21484500199290843,
|
|
"learning_rate": 1.9445499880883067e-07,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12110140174627304,
|
|
"step": 1130,
|
|
"valid_targets_mean": 9903.2,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 6.757455268389662,
|
|
"grad_norm": 0.1965552602542897,
|
|
"learning_rate": 1.553331034904293e-07,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12987342476844788,
|
|
"step": 1135,
|
|
"valid_targets_mean": 10510.9,
|
|
"valid_targets_min": 4514
|
|
},
|
|
{
|
|
"epoch": 6.78727634194831,
|
|
"grad_norm": 0.19572567045219616,
|
|
"learning_rate": 1.2058546475921305e-07,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12360957264900208,
|
|
"step": 1140,
|
|
"valid_targets_mean": 9654.7,
|
|
"valid_targets_min": 2551
|
|
},
|
|
{
|
|
"epoch": 6.817097415506958,
|
|
"grad_norm": 0.20329684782457239,
|
|
"learning_rate": 9.021974185625004e-08,
|
|
"loss": 0.3644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12088020890951157,
|
|
"step": 1145,
|
|
"valid_targets_mean": 9744.9,
|
|
"valid_targets_min": 2669
|
|
},
|
|
{
|
|
"epoch": 6.846918489065606,
|
|
"grad_norm": 0.19353349526148012,
|
|
"learning_rate": 6.424262813971904e-08,
|
|
"loss": 0.3656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11435583233833313,
|
|
"step": 1150,
|
|
"valid_targets_mean": 9121.9,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 6.8767395626242545,
|
|
"grad_norm": 0.1849991984465579,
|
|
"learning_rate": 4.2659849609520966e-08,
|
|
"loss": 0.3632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12063400447368622,
|
|
"step": 1155,
|
|
"valid_targets_mean": 10022.1,
|
|
"valid_targets_min": 2233
|
|
},
|
|
{
|
|
"epoch": 6.906560636182903,
|
|
"grad_norm": 0.19007497403166831,
|
|
"learning_rate": 2.5476163645143936e-08,
|
|
"loss": 0.3687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11399763822555542,
|
|
"step": 1160,
|
|
"valid_targets_mean": 9254.8,
|
|
"valid_targets_min": 3517
|
|
},
|
|
{
|
|
"epoch": 6.936381709741551,
|
|
"grad_norm": 0.1948261449647107,
|
|
"learning_rate": 1.2695357957002163e-08,
|
|
"loss": 0.3579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09775931388139725,
|
|
"step": 1165,
|
|
"valid_targets_mean": 8721.5,
|
|
"valid_targets_min": 2682
|
|
},
|
|
{
|
|
"epoch": 6.966202783300199,
|
|
"grad_norm": 0.21107993966883115,
|
|
"learning_rate": 4.32024975154155e-09,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11073368787765503,
|
|
"step": 1170,
|
|
"valid_targets_mean": 8784.6,
|
|
"valid_targets_min": 2911
|
|
},
|
|
{
|
|
"epoch": 6.996023856858847,
|
|
"grad_norm": 0.19162092330786054,
|
|
"learning_rate": 3.5268511025421393e-10,
|
|
"loss": 0.3673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11796814203262329,
|
|
"step": 1175,
|
|
"valid_targets_mean": 9922.6,
|
|
"valid_targets_min": 3370
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"step": 1176,
|
|
"total_flos": 4.68228205146905e+18,
|
|
"train_loss": 0.0,
|
|
"train_runtime": 1.3887,
|
|
"train_samples_per_second": 81133.704,
|
|
"train_steps_per_second": 846.823
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1176,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 4.68228205146905e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|