3007 lines
83 KiB
JSON
3007 lines
83 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1345,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0186219739292365,
|
|
"grad_norm": 21.642117448143836,
|
|
"learning_rate": 1.1851851851851854e-06,
|
|
"loss": 0.8194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38135290145874023,
|
|
"step": 5,
|
|
"valid_targets_mean": 6579.2,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 0.037243947858473,
|
|
"grad_norm": 4.087277364340966,
|
|
"learning_rate": 2.666666666666667e-06,
|
|
"loss": 0.7007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29667899012565613,
|
|
"step": 10,
|
|
"valid_targets_mean": 6844.8,
|
|
"valid_targets_min": 2928
|
|
},
|
|
{
|
|
"epoch": 0.055865921787709494,
|
|
"grad_norm": 1.3577392450412775,
|
|
"learning_rate": 4.1481481481481485e-06,
|
|
"loss": 0.5605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29086098074913025,
|
|
"step": 15,
|
|
"valid_targets_mean": 6970.2,
|
|
"valid_targets_min": 1816
|
|
},
|
|
{
|
|
"epoch": 0.074487895716946,
|
|
"grad_norm": 0.7405497195075723,
|
|
"learning_rate": 5.62962962962963e-06,
|
|
"loss": 0.5243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757507264614105,
|
|
"step": 20,
|
|
"valid_targets_mean": 7589.0,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 0.0931098696461825,
|
|
"grad_norm": 0.5342294038072763,
|
|
"learning_rate": 7.111111111111112e-06,
|
|
"loss": 0.4723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23901812732219696,
|
|
"step": 25,
|
|
"valid_targets_mean": 7366.7,
|
|
"valid_targets_min": 3293
|
|
},
|
|
{
|
|
"epoch": 0.11173184357541899,
|
|
"grad_norm": 0.3675025735899647,
|
|
"learning_rate": 8.592592592592593e-06,
|
|
"loss": 0.4134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20101700723171234,
|
|
"step": 30,
|
|
"valid_targets_mean": 6763.6,
|
|
"valid_targets_min": 2844
|
|
},
|
|
{
|
|
"epoch": 0.1303538175046555,
|
|
"grad_norm": 0.28406425939888624,
|
|
"learning_rate": 1.0074074074074074e-05,
|
|
"loss": 0.4061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2010914832353592,
|
|
"step": 35,
|
|
"valid_targets_mean": 6678.9,
|
|
"valid_targets_min": 2465
|
|
},
|
|
{
|
|
"epoch": 0.148975791433892,
|
|
"grad_norm": 0.24209059303362065,
|
|
"learning_rate": 1.1555555555555556e-05,
|
|
"loss": 0.3775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1803324669599533,
|
|
"step": 40,
|
|
"valid_targets_mean": 7128.3,
|
|
"valid_targets_min": 2482
|
|
},
|
|
{
|
|
"epoch": 0.16759776536312848,
|
|
"grad_norm": 0.2198398559021447,
|
|
"learning_rate": 1.303703703703704e-05,
|
|
"loss": 0.3596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17511887848377228,
|
|
"step": 45,
|
|
"valid_targets_mean": 6365.0,
|
|
"valid_targets_min": 1594
|
|
},
|
|
{
|
|
"epoch": 0.186219739292365,
|
|
"grad_norm": 0.23483233838383402,
|
|
"learning_rate": 1.4518518518518521e-05,
|
|
"loss": 0.3496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17358100414276123,
|
|
"step": 50,
|
|
"valid_targets_mean": 6929.4,
|
|
"valid_targets_min": 2099
|
|
},
|
|
{
|
|
"epoch": 0.2048417132216015,
|
|
"grad_norm": 0.23317500582763062,
|
|
"learning_rate": 1.6000000000000003e-05,
|
|
"loss": 0.3422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14544479548931122,
|
|
"step": 55,
|
|
"valid_targets_mean": 6168.6,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 0.22346368715083798,
|
|
"grad_norm": 0.22927400488720545,
|
|
"learning_rate": 1.7481481481481483e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14921344816684723,
|
|
"step": 60,
|
|
"valid_targets_mean": 6729.7,
|
|
"valid_targets_min": 2910
|
|
},
|
|
{
|
|
"epoch": 0.24208566108007448,
|
|
"grad_norm": 0.6336152567068057,
|
|
"learning_rate": 1.8962962962962966e-05,
|
|
"loss": 0.3681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24772310256958008,
|
|
"step": 65,
|
|
"valid_targets_mean": 4494.7,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 0.260707635009311,
|
|
"grad_norm": 0.3813258332534016,
|
|
"learning_rate": 2.0444444444444446e-05,
|
|
"loss": 0.4898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23514796793460846,
|
|
"step": 70,
|
|
"valid_targets_mean": 4495.6,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 0.27932960893854747,
|
|
"grad_norm": 0.35453929366021575,
|
|
"learning_rate": 2.192592592592593e-05,
|
|
"loss": 0.4593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22719019651412964,
|
|
"step": 75,
|
|
"valid_targets_mean": 4864.8,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 0.297951582867784,
|
|
"grad_norm": 0.3727662336939942,
|
|
"learning_rate": 2.3407407407407406e-05,
|
|
"loss": 0.4488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.204828143119812,
|
|
"step": 80,
|
|
"valid_targets_mean": 4095.2,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 0.3165735567970205,
|
|
"grad_norm": 0.3292902280153117,
|
|
"learning_rate": 2.4888888888888893e-05,
|
|
"loss": 0.4398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23676007986068726,
|
|
"step": 85,
|
|
"valid_targets_mean": 4658.2,
|
|
"valid_targets_min": 1772
|
|
},
|
|
{
|
|
"epoch": 0.33519553072625696,
|
|
"grad_norm": 0.30524766317944435,
|
|
"learning_rate": 2.637037037037037e-05,
|
|
"loss": 0.4274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22691433131694794,
|
|
"step": 90,
|
|
"valid_targets_mean": 4365.0,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 0.3538175046554935,
|
|
"grad_norm": 0.2951571873200663,
|
|
"learning_rate": 2.7851851851851856e-05,
|
|
"loss": 0.4306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22553832828998566,
|
|
"step": 95,
|
|
"valid_targets_mean": 4563.3,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 0.37243947858473,
|
|
"grad_norm": 0.36390699372185614,
|
|
"learning_rate": 2.9333333333333333e-05,
|
|
"loss": 0.4196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21875639259815216,
|
|
"step": 100,
|
|
"valid_targets_mean": 4605.9,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 0.39106145251396646,
|
|
"grad_norm": 0.3165125581017608,
|
|
"learning_rate": 3.0814814814814816e-05,
|
|
"loss": 0.4131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20045466721057892,
|
|
"step": 105,
|
|
"valid_targets_mean": 4428.8,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 0.409683426443203,
|
|
"grad_norm": 0.29704668214079993,
|
|
"learning_rate": 3.22962962962963e-05,
|
|
"loss": 0.4105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2023867964744568,
|
|
"step": 110,
|
|
"valid_targets_mean": 4861.6,
|
|
"valid_targets_min": 2146
|
|
},
|
|
{
|
|
"epoch": 0.42830540037243947,
|
|
"grad_norm": 0.3129499722925657,
|
|
"learning_rate": 3.377777777777778e-05,
|
|
"loss": 0.4089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19118505716323853,
|
|
"step": 115,
|
|
"valid_targets_mean": 4324.2,
|
|
"valid_targets_min": 1820
|
|
},
|
|
{
|
|
"epoch": 0.44692737430167595,
|
|
"grad_norm": 0.29818090952293125,
|
|
"learning_rate": 3.5259259259259266e-05,
|
|
"loss": 0.4142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2282976657152176,
|
|
"step": 120,
|
|
"valid_targets_mean": 4377.3,
|
|
"valid_targets_min": 1703
|
|
},
|
|
{
|
|
"epoch": 0.4655493482309125,
|
|
"grad_norm": 0.318290618063663,
|
|
"learning_rate": 3.674074074074074e-05,
|
|
"loss": 0.4009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18989060819149017,
|
|
"step": 125,
|
|
"valid_targets_mean": 4578.9,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 0.48417132216014896,
|
|
"grad_norm": 0.3245155267672114,
|
|
"learning_rate": 3.8222222222222226e-05,
|
|
"loss": 0.4124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19848603010177612,
|
|
"step": 130,
|
|
"valid_targets_mean": 4194.0,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 0.5027932960893855,
|
|
"grad_norm": 0.28543573898331964,
|
|
"learning_rate": 3.970370370370371e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23786954581737518,
|
|
"step": 135,
|
|
"valid_targets_mean": 5342.8,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 0.521415270018622,
|
|
"grad_norm": 0.28802986563270455,
|
|
"learning_rate": 3.999892143807746e-05,
|
|
"loss": 0.401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20227207243442535,
|
|
"step": 140,
|
|
"valid_targets_mean": 4513.6,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 0.5400372439478585,
|
|
"grad_norm": 0.30386893239084545,
|
|
"learning_rate": 3.9994539979639836e-05,
|
|
"loss": 0.405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18734104931354523,
|
|
"step": 145,
|
|
"valid_targets_mean": 4314.5,
|
|
"valid_targets_min": 2202
|
|
},
|
|
{
|
|
"epoch": 0.5586592178770949,
|
|
"grad_norm": 0.3383103695230139,
|
|
"learning_rate": 3.99867889523818e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18549704551696777,
|
|
"step": 150,
|
|
"valid_targets_mean": 4813.7,
|
|
"valid_targets_min": 2071
|
|
},
|
|
{
|
|
"epoch": 0.5772811918063314,
|
|
"grad_norm": 0.2904479068591015,
|
|
"learning_rate": 3.997566966254095e-05,
|
|
"loss": 0.3914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1885179728269577,
|
|
"step": 155,
|
|
"valid_targets_mean": 4968.1,
|
|
"valid_targets_min": 2122
|
|
},
|
|
{
|
|
"epoch": 0.595903165735568,
|
|
"grad_norm": 0.25874316987433926,
|
|
"learning_rate": 3.996118398398948e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18153317272663116,
|
|
"step": 160,
|
|
"valid_targets_mean": 4805.8,
|
|
"valid_targets_min": 2169
|
|
},
|
|
{
|
|
"epoch": 0.6145251396648045,
|
|
"grad_norm": 0.24000968016526364,
|
|
"learning_rate": 3.9943334357918374e-05,
|
|
"loss": 0.3737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17824988067150116,
|
|
"step": 165,
|
|
"valid_targets_mean": 5026.0,
|
|
"valid_targets_min": 2262
|
|
},
|
|
{
|
|
"epoch": 0.633147113594041,
|
|
"grad_norm": 0.2622565490667829,
|
|
"learning_rate": 3.992212379242601e-05,
|
|
"loss": 0.365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1855766922235489,
|
|
"step": 170,
|
|
"valid_targets_mean": 5019.6,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 0.6517690875232774,
|
|
"grad_norm": 0.2813347193628216,
|
|
"learning_rate": 3.989755586201125e-05,
|
|
"loss": 0.3753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21819989383220673,
|
|
"step": 175,
|
|
"valid_targets_mean": 5755.8,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 0.6703910614525139,
|
|
"grad_norm": 0.35070472645512296,
|
|
"learning_rate": 3.9869634706971e-05,
|
|
"loss": 0.3622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17688250541687012,
|
|
"step": 180,
|
|
"valid_targets_mean": 4580.4,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 0.6890130353817505,
|
|
"grad_norm": 0.2455164839195718,
|
|
"learning_rate": 3.983836503270254e-05,
|
|
"loss": 0.3633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16190500557422638,
|
|
"step": 185,
|
|
"valid_targets_mean": 4942.4,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 0.707635009310987,
|
|
"grad_norm": 0.2517618296962006,
|
|
"learning_rate": 3.9803752108910435e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1465204805135727,
|
|
"step": 190,
|
|
"valid_targets_mean": 5849.2,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 0.7262569832402235,
|
|
"grad_norm": 0.21962381487947016,
|
|
"learning_rate": 3.9765801768718606e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271316260099411,
|
|
"step": 195,
|
|
"valid_targets_mean": 5372.0,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 0.74487895716946,
|
|
"grad_norm": 0.22504863778674275,
|
|
"learning_rate": 3.972452040768718e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1281280517578125,
|
|
"step": 200,
|
|
"valid_targets_mean": 5450.6,
|
|
"valid_targets_min": 1560
|
|
},
|
|
{
|
|
"epoch": 0.7635009310986964,
|
|
"grad_norm": 0.20034946812157745,
|
|
"learning_rate": 3.9679914982734765e-05,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13299940526485443,
|
|
"step": 205,
|
|
"valid_targets_mean": 5811.3,
|
|
"valid_targets_min": 3371
|
|
},
|
|
{
|
|
"epoch": 0.7821229050279329,
|
|
"grad_norm": 0.21586328587416592,
|
|
"learning_rate": 3.9631993010966e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12790830433368683,
|
|
"step": 210,
|
|
"valid_targets_mean": 5681.6,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 0.8007448789571695,
|
|
"grad_norm": 0.19845096912413016,
|
|
"learning_rate": 3.958076256840472e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13116233050823212,
|
|
"step": 215,
|
|
"valid_targets_mean": 5787.3,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 0.819366852886406,
|
|
"grad_norm": 0.20961259827538878,
|
|
"learning_rate": 3.952623228863301e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13585473597049713,
|
|
"step": 220,
|
|
"valid_targets_mean": 5298.0,
|
|
"valid_targets_min": 2592
|
|
},
|
|
{
|
|
"epoch": 0.8379888268156425,
|
|
"grad_norm": 0.20265238224585524,
|
|
"learning_rate": 3.946841136133619e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11897260695695877,
|
|
"step": 225,
|
|
"valid_targets_mean": 5558.0,
|
|
"valid_targets_min": 1887
|
|
},
|
|
{
|
|
"epoch": 0.8566108007448789,
|
|
"grad_norm": 0.2308437781547286,
|
|
"learning_rate": 3.940730953075414e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11843226104974747,
|
|
"step": 230,
|
|
"valid_targets_mean": 5469.3,
|
|
"valid_targets_min": 1983
|
|
},
|
|
{
|
|
"epoch": 0.8752327746741154,
|
|
"grad_norm": 0.1901545351670762,
|
|
"learning_rate": 3.934293709403915e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12269283086061478,
|
|
"step": 235,
|
|
"valid_targets_mean": 6020.0,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 0.8938547486033519,
|
|
"grad_norm": 0.20390172292403214,
|
|
"learning_rate": 3.9275304899520595e-05,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12945251166820526,
|
|
"step": 240,
|
|
"valid_targets_mean": 5460.2,
|
|
"valid_targets_min": 2296
|
|
},
|
|
{
|
|
"epoch": 0.9124767225325885,
|
|
"grad_norm": 0.2018105761072753,
|
|
"learning_rate": 3.920442434487676e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11418928951025009,
|
|
"step": 245,
|
|
"valid_targets_mean": 5293.8,
|
|
"valid_targets_min": 2419
|
|
},
|
|
{
|
|
"epoch": 0.931098696461825,
|
|
"grad_norm": 0.2113162885633886,
|
|
"learning_rate": 3.913030737521401e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13120897114276886,
|
|
"step": 250,
|
|
"valid_targets_mean": 6017.7,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 0.9497206703910615,
|
|
"grad_norm": 0.20370322196516943,
|
|
"learning_rate": 3.905296648105379e-05,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11392738670110703,
|
|
"step": 255,
|
|
"valid_targets_mean": 5287.6,
|
|
"valid_targets_min": 2147
|
|
},
|
|
{
|
|
"epoch": 0.9683426443202979,
|
|
"grad_norm": 0.2091507978308365,
|
|
"learning_rate": 3.8972414696227606e-05,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11450576782226562,
|
|
"step": 260,
|
|
"valid_targets_mean": 6001.4,
|
|
"valid_targets_min": 3237
|
|
},
|
|
{
|
|
"epoch": 0.9869646182495344,
|
|
"grad_norm": 0.19044462299382905,
|
|
"learning_rate": 3.888866559568056e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12079953402280807,
|
|
"step": 265,
|
|
"valid_targets_mean": 5754.3,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 1.0037243947858474,
|
|
"grad_norm": 0.23254152478175197,
|
|
"learning_rate": 3.880173329318363e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1569366604089737,
|
|
"step": 270,
|
|
"valid_targets_mean": 7176.8,
|
|
"valid_targets_min": 2475
|
|
},
|
|
{
|
|
"epoch": 1.0223463687150838,
|
|
"grad_norm": 0.23126772971993143,
|
|
"learning_rate": 3.871163243895514e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16103023290634155,
|
|
"step": 275,
|
|
"valid_targets_mean": 7152.2,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 1.0409683426443204,
|
|
"grad_norm": 0.20910745368528044,
|
|
"learning_rate": 3.861837821719184e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1427866518497467,
|
|
"step": 280,
|
|
"valid_targets_mean": 6413.1,
|
|
"valid_targets_min": 2694
|
|
},
|
|
{
|
|
"epoch": 1.0595903165735567,
|
|
"grad_norm": 0.21857108825645447,
|
|
"learning_rate": 3.852198634351002e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15423469245433807,
|
|
"step": 285,
|
|
"valid_targets_mean": 6742.4,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 1.0782122905027933,
|
|
"grad_norm": 0.24290514151500678,
|
|
"learning_rate": 3.8422473062297e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13684120774269104,
|
|
"step": 290,
|
|
"valid_targets_mean": 6891.3,
|
|
"valid_targets_min": 2165
|
|
},
|
|
{
|
|
"epoch": 1.0968342644320297,
|
|
"grad_norm": 0.2381895141979772,
|
|
"learning_rate": 3.831985514397363e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1476469486951828,
|
|
"step": 295,
|
|
"valid_targets_mean": 6714.7,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 1.1154562383612663,
|
|
"grad_norm": 0.2204640407674478,
|
|
"learning_rate": 3.8214149882167973e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14708954095840454,
|
|
"step": 300,
|
|
"valid_targets_mean": 7105.4,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 1.1340782122905029,
|
|
"grad_norm": 0.20271638221684102,
|
|
"learning_rate": 3.810537509080096e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1319805383682251,
|
|
"step": 305,
|
|
"valid_targets_mean": 6475.6,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 1.1527001862197392,
|
|
"grad_norm": 0.19882669398564898,
|
|
"learning_rate": 3.79935491010843e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14669251441955566,
|
|
"step": 310,
|
|
"valid_targets_mean": 6744.1,
|
|
"valid_targets_min": 2793
|
|
},
|
|
{
|
|
"epoch": 1.1713221601489758,
|
|
"grad_norm": 0.19672205460405268,
|
|
"learning_rate": 3.787869075843124e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271461695432663,
|
|
"step": 315,
|
|
"valid_targets_mean": 6725.4,
|
|
"valid_targets_min": 2520
|
|
},
|
|
{
|
|
"epoch": 1.1899441340782122,
|
|
"grad_norm": 0.21185057390384063,
|
|
"learning_rate": 3.77608194192806e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1285659223794937,
|
|
"step": 320,
|
|
"valid_targets_mean": 6286.6,
|
|
"valid_targets_min": 2643
|
|
},
|
|
{
|
|
"epoch": 1.2085661080074488,
|
|
"grad_norm": 0.19816569561566144,
|
|
"learning_rate": 3.76399549478348e-05,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13813738524913788,
|
|
"step": 325,
|
|
"valid_targets_mean": 6647.1,
|
|
"valid_targets_min": 2218
|
|
},
|
|
{
|
|
"epoch": 1.2271880819366854,
|
|
"grad_norm": 0.2196810339151432,
|
|
"learning_rate": 3.75161177127122e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14068274199962616,
|
|
"step": 330,
|
|
"valid_targets_mean": 6991.6,
|
|
"valid_targets_min": 3271
|
|
},
|
|
{
|
|
"epoch": 1.2458100558659218,
|
|
"grad_norm": 0.3172944180604243,
|
|
"learning_rate": 3.7389328583514554e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535311281681061,
|
|
"step": 335,
|
|
"valid_targets_mean": 5593.6,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 1.2644320297951583,
|
|
"grad_norm": 0.6100509748963104,
|
|
"learning_rate": 3.725960892730991e-05,
|
|
"loss": 0.3686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18261736631393433,
|
|
"step": 340,
|
|
"valid_targets_mean": 4560.6,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 1.2830540037243947,
|
|
"grad_norm": 0.4772667013712657,
|
|
"learning_rate": 3.712698060503178e-05,
|
|
"loss": 0.3539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18201421201229095,
|
|
"step": 345,
|
|
"valid_targets_mean": 4523.4,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 1.3016759776536313,
|
|
"grad_norm": 0.273306718239659,
|
|
"learning_rate": 3.699146596779501e-05,
|
|
"loss": 0.3495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16353599727153778,
|
|
"step": 350,
|
|
"valid_targets_mean": 4178.9,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 1.3202979515828677,
|
|
"grad_norm": 0.24477989967733027,
|
|
"learning_rate": 3.6853087853129076e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1676838994026184,
|
|
"step": 355,
|
|
"valid_targets_mean": 4510.9,
|
|
"valid_targets_min": 1718
|
|
},
|
|
{
|
|
"epoch": 1.3389199255121043,
|
|
"grad_norm": 0.2650679861397239,
|
|
"learning_rate": 3.6711869581129436e-05,
|
|
"loss": 0.3419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1828334778547287,
|
|
"step": 360,
|
|
"valid_targets_mean": 5023.4,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 1.3575418994413408,
|
|
"grad_norm": 0.25430947626656825,
|
|
"learning_rate": 3.6567834950527463e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14467652142047882,
|
|
"step": 365,
|
|
"valid_targets_mean": 4319.0,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 1.3761638733705772,
|
|
"grad_norm": 0.28579601865971616,
|
|
"learning_rate": 3.6421008234679834e-05,
|
|
"loss": 0.3391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2052292674779892,
|
|
"step": 370,
|
|
"valid_targets_mean": 5234.1,
|
|
"valid_targets_min": 1586
|
|
},
|
|
{
|
|
"epoch": 1.3947858472998138,
|
|
"grad_norm": 0.33454042737790046,
|
|
"learning_rate": 3.627141417747783e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16307473182678223,
|
|
"step": 375,
|
|
"valid_targets_mean": 4537.7,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 1.4134078212290504,
|
|
"grad_norm": 0.31924227832532165,
|
|
"learning_rate": 3.611907798917743e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17609496414661407,
|
|
"step": 380,
|
|
"valid_targets_mean": 4638.2,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 1.4320297951582868,
|
|
"grad_norm": 0.3108323026088264,
|
|
"learning_rate": 3.596402534215074e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1662149280309677,
|
|
"step": 385,
|
|
"valid_targets_mean": 3677.5,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 1.4506517690875234,
|
|
"grad_norm": 0.3400629224739461,
|
|
"learning_rate": 3.580628236655955e-05,
|
|
"loss": 0.3241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1697298288345337,
|
|
"step": 390,
|
|
"valid_targets_mean": 4185.1,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 1.4692737430167597,
|
|
"grad_norm": 0.2692076563805843,
|
|
"learning_rate": 3.564587564595182e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14816735684871674,
|
|
"step": 395,
|
|
"valid_targets_mean": 4103.6,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 1.4878957169459963,
|
|
"grad_norm": 0.2938902558984829,
|
|
"learning_rate": 3.5482832212781655e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14214275777339935,
|
|
"step": 400,
|
|
"valid_targets_mean": 4546.2,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 1.5065176908752327,
|
|
"grad_norm": 0.2779669217184944,
|
|
"learning_rate": 3.5317179543853676e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17869967222213745,
|
|
"step": 405,
|
|
"valid_targets_mean": 4563.6,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 1.5251396648044693,
|
|
"grad_norm": 0.3519340618992221,
|
|
"learning_rate": 3.514894555569255e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1719929575920105,
|
|
"step": 410,
|
|
"valid_targets_mean": 5359.0,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 1.5437616387337059,
|
|
"grad_norm": 0.2895011222215137,
|
|
"learning_rate": 3.497815859983831e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16711927950382233,
|
|
"step": 415,
|
|
"valid_targets_mean": 5427.6,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 1.5623836126629422,
|
|
"grad_norm": 0.30331945108732455,
|
|
"learning_rate": 3.4804847458068504e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1559574156999588,
|
|
"step": 420,
|
|
"valid_targets_mean": 4616.0,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 1.5810055865921788,
|
|
"grad_norm": 0.24000502620110592,
|
|
"learning_rate": 3.462904133754767e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13982824981212616,
|
|
"step": 425,
|
|
"valid_targets_mean": 5337.4,
|
|
"valid_targets_min": 2186
|
|
},
|
|
{
|
|
"epoch": 1.5996275605214154,
|
|
"grad_norm": 0.26300498381054566,
|
|
"learning_rate": 3.445076986590531e-05,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15317204594612122,
|
|
"step": 430,
|
|
"valid_targets_mean": 5250.1,
|
|
"valid_targets_min": 2190
|
|
},
|
|
{
|
|
"epoch": 1.6182495344506518,
|
|
"grad_norm": 0.2445293076857602,
|
|
"learning_rate": 3.427006308624282e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13379313051700592,
|
|
"step": 435,
|
|
"valid_targets_mean": 5520.8,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 1.6368715083798882,
|
|
"grad_norm": 0.291611256050332,
|
|
"learning_rate": 3.408695145207058e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14052408933639526,
|
|
"step": 440,
|
|
"valid_targets_mean": 4284.8,
|
|
"valid_targets_min": 1917
|
|
},
|
|
{
|
|
"epoch": 1.6554934823091247,
|
|
"grad_norm": 0.26344185927865893,
|
|
"learning_rate": 3.390146582217572e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14575421810150146,
|
|
"step": 445,
|
|
"valid_targets_mean": 4883.9,
|
|
"valid_targets_min": 2201
|
|
},
|
|
{
|
|
"epoch": 1.6741154562383613,
|
|
"grad_norm": 0.27491339530871106,
|
|
"learning_rate": 3.3713637455421694e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13534614443778992,
|
|
"step": 450,
|
|
"valid_targets_mean": 4947.1,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 1.6927374301675977,
|
|
"grad_norm": 0.23557643948797238,
|
|
"learning_rate": 3.352349800548039e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12307044118642807,
|
|
"step": 455,
|
|
"valid_targets_mean": 4546.2,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 1.7113594040968343,
|
|
"grad_norm": 0.23755658794564719,
|
|
"learning_rate": 3.333107951549773e-05,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09926241636276245,
|
|
"step": 460,
|
|
"valid_targets_mean": 5677.3,
|
|
"valid_targets_min": 2077
|
|
},
|
|
{
|
|
"epoch": 1.7299813780260709,
|
|
"grad_norm": 0.24052245151458515,
|
|
"learning_rate": 3.313641441269361e-05,
|
|
"loss": 0.194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09152091294527054,
|
|
"step": 465,
|
|
"valid_targets_mean": 5346.2,
|
|
"valid_targets_min": 1922
|
|
},
|
|
{
|
|
"epoch": 1.7486033519553073,
|
|
"grad_norm": 0.20262692112304034,
|
|
"learning_rate": 3.2939535502897075e-05,
|
|
"loss": 0.1991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10032620280981064,
|
|
"step": 470,
|
|
"valid_targets_mean": 5635.6,
|
|
"valid_targets_min": 2843
|
|
},
|
|
{
|
|
"epoch": 1.7672253258845436,
|
|
"grad_norm": 0.2082760355908499,
|
|
"learning_rate": 3.27404759650178e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10044640302658081,
|
|
"step": 475,
|
|
"valid_targets_mean": 5877.1,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 1.7858472998137802,
|
|
"grad_norm": 0.18839411974053805,
|
|
"learning_rate": 3.253926934545459e-05,
|
|
"loss": 0.1902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09364160150289536,
|
|
"step": 480,
|
|
"valid_targets_mean": 5282.3,
|
|
"valid_targets_min": 1863
|
|
},
|
|
{
|
|
"epoch": 1.8044692737430168,
|
|
"grad_norm": 0.2078067971535603,
|
|
"learning_rate": 3.233594955244202e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08583924919366837,
|
|
"step": 485,
|
|
"valid_targets_mean": 5719.1,
|
|
"valid_targets_min": 3505
|
|
},
|
|
{
|
|
"epoch": 1.8230912476722532,
|
|
"grad_norm": 0.2067606733374613,
|
|
"learning_rate": 3.213055085033607e-05,
|
|
"loss": 0.1869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09971226006746292,
|
|
"step": 490,
|
|
"valid_targets_mean": 5770.4,
|
|
"valid_targets_min": 3200
|
|
},
|
|
{
|
|
"epoch": 1.8417132216014898,
|
|
"grad_norm": 0.22001102128513125,
|
|
"learning_rate": 3.192310785383967e-05,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09830441325902939,
|
|
"step": 495,
|
|
"valid_targets_mean": 5594.3,
|
|
"valid_targets_min": 2835
|
|
},
|
|
{
|
|
"epoch": 1.8603351955307263,
|
|
"grad_norm": 0.20769566942714865,
|
|
"learning_rate": 3.1713655522169396e-05,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09986946731805801,
|
|
"step": 500,
|
|
"valid_targets_mean": 5947.1,
|
|
"valid_targets_min": 2162
|
|
},
|
|
{
|
|
"epoch": 1.8789571694599627,
|
|
"grad_norm": 0.20824203625139057,
|
|
"learning_rate": 3.15022291531639e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09173315018415451,
|
|
"step": 505,
|
|
"valid_targets_mean": 5633.9,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 1.8975791433891993,
|
|
"grad_norm": 0.1895160390922454,
|
|
"learning_rate": 3.128886437733539e-05,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09285343438386917,
|
|
"step": 510,
|
|
"valid_targets_mean": 5799.0,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 1.916201117318436,
|
|
"grad_norm": 0.2073882558654316,
|
|
"learning_rate": 3.1073597151865e-05,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08280744403600693,
|
|
"step": 515,
|
|
"valid_targets_mean": 5286.2,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 1.9348230912476723,
|
|
"grad_norm": 0.19321114943812392,
|
|
"learning_rate": 3.085646375454317e-05,
|
|
"loss": 0.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09179046005010605,
|
|
"step": 520,
|
|
"valid_targets_mean": 5621.4,
|
|
"valid_targets_min": 1620
|
|
},
|
|
{
|
|
"epoch": 1.9534450651769086,
|
|
"grad_norm": 0.20782347219627428,
|
|
"learning_rate": 3.0637500777655886e-05,
|
|
"loss": 0.1757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09478215128183365,
|
|
"step": 525,
|
|
"valid_targets_mean": 5574.6,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 1.9720670391061452,
|
|
"grad_norm": 0.1995258445427204,
|
|
"learning_rate": 3.0416745121818062e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10429661720991135,
|
|
"step": 530,
|
|
"valid_targets_mean": 5663.5,
|
|
"valid_targets_min": 3093
|
|
},
|
|
{
|
|
"epoch": 1.9906890130353818,
|
|
"grad_norm": 0.19212440943218573,
|
|
"learning_rate": 3.019423398975481e-05,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0856226310133934,
|
|
"step": 535,
|
|
"valid_targets_mean": 5346.0,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 2.007448789571695,
|
|
"grad_norm": 0.2640487831217621,
|
|
"learning_rate": 2.9970004880031918e-05,
|
|
"loss": 0.2138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1243792250752449,
|
|
"step": 540,
|
|
"valid_targets_mean": 6556.2,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 2.026070763500931,
|
|
"grad_norm": 0.22297988123685433,
|
|
"learning_rate": 2.974409558073641e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313825100660324,
|
|
"step": 545,
|
|
"valid_targets_mean": 6745.2,
|
|
"valid_targets_min": 2595
|
|
},
|
|
{
|
|
"epoch": 2.0446927374301676,
|
|
"grad_norm": 0.20812569653815188,
|
|
"learning_rate": 2.9516544163108335e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1266535073518753,
|
|
"step": 550,
|
|
"valid_targets_mean": 6414.2,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 2.063314711359404,
|
|
"grad_norm": 0.21892809836030894,
|
|
"learning_rate": 2.928738897512481e-05,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1333746314048767,
|
|
"step": 555,
|
|
"valid_targets_mean": 7147.8,
|
|
"valid_targets_min": 2654
|
|
},
|
|
{
|
|
"epoch": 2.0819366852886407,
|
|
"grad_norm": 0.2105458445254568,
|
|
"learning_rate": 2.90566686350375e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12962865829467773,
|
|
"step": 560,
|
|
"valid_targets_mean": 7391.1,
|
|
"valid_targets_min": 2506
|
|
},
|
|
{
|
|
"epoch": 2.100558659217877,
|
|
"grad_norm": 0.23229526744342008,
|
|
"learning_rate": 2.8824422024864427e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0939510241150856,
|
|
"step": 565,
|
|
"valid_targets_mean": 5653.9,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 2.1191806331471135,
|
|
"grad_norm": 0.21648288691431336,
|
|
"learning_rate": 2.859068828383747e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11028096079826355,
|
|
"step": 570,
|
|
"valid_targets_mean": 6577.6,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 2.1378026070763503,
|
|
"grad_norm": 0.24028170421075354,
|
|
"learning_rate": 2.8355506801806392e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1300063580274582,
|
|
"step": 575,
|
|
"valid_targets_mean": 7243.6,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 2.1564245810055866,
|
|
"grad_norm": 0.21050370735020552,
|
|
"learning_rate": 2.8118917212600715e-05,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09919723123311996,
|
|
"step": 580,
|
|
"valid_targets_mean": 5979.6,
|
|
"valid_targets_min": 2261
|
|
},
|
|
{
|
|
"epoch": 2.175046554934823,
|
|
"grad_norm": 0.2074491330303158,
|
|
"learning_rate": 2.7880959387350458e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10763826221227646,
|
|
"step": 585,
|
|
"valid_targets_mean": 6526.0,
|
|
"valid_targets_min": 2889
|
|
},
|
|
{
|
|
"epoch": 2.1936685288640594,
|
|
"grad_norm": 0.23033689702375226,
|
|
"learning_rate": 2.7641673427766847e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11596024036407471,
|
|
"step": 590,
|
|
"valid_targets_mean": 6637.4,
|
|
"valid_targets_min": 2198
|
|
},
|
|
{
|
|
"epoch": 2.212290502793296,
|
|
"grad_norm": 0.32105132805524217,
|
|
"learning_rate": 2.740109965938423e-05,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11476022750139236,
|
|
"step": 595,
|
|
"valid_targets_mean": 6630.0,
|
|
"valid_targets_min": 2447
|
|
},
|
|
{
|
|
"epoch": 2.2309124767225326,
|
|
"grad_norm": 0.22409633977119747,
|
|
"learning_rate": 2.715927862476421e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10554740577936172,
|
|
"step": 600,
|
|
"valid_targets_mean": 6784.3,
|
|
"valid_targets_min": 2018
|
|
},
|
|
{
|
|
"epoch": 2.249534450651769,
|
|
"grad_norm": 0.2699928714484659,
|
|
"learning_rate": 2.6916251076663252e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1415012925863266,
|
|
"step": 605,
|
|
"valid_targets_mean": 4402.8,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 2.2681564245810057,
|
|
"grad_norm": 0.3721989864420147,
|
|
"learning_rate": 2.667205797116484e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13999386131763458,
|
|
"step": 610,
|
|
"valid_targets_mean": 4164.7,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 2.286778398510242,
|
|
"grad_norm": 0.3223866147310536,
|
|
"learning_rate": 2.642674046077737e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12863564491271973,
|
|
"step": 615,
|
|
"valid_targets_mean": 4361.9,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 2.3054003724394785,
|
|
"grad_norm": 0.31856607726998415,
|
|
"learning_rate": 2.618033988749895e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13214845955371857,
|
|
"step": 620,
|
|
"valid_targets_mean": 4420.2,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 2.3240223463687153,
|
|
"grad_norm": 0.32666505138818375,
|
|
"learning_rate": 2.5932897775850276e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12275639921426773,
|
|
"step": 625,
|
|
"valid_targets_mean": 4191.0,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 2.3426443202979517,
|
|
"grad_norm": 0.35120524548526266,
|
|
"learning_rate": 2.568445582587672e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1437913030385971,
|
|
"step": 630,
|
|
"valid_targets_mean": 5161.3,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 2.361266294227188,
|
|
"grad_norm": 0.31946963446152915,
|
|
"learning_rate": 2.5435055906120837e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11732056736946106,
|
|
"step": 635,
|
|
"valid_targets_mean": 4429.2,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 2.3798882681564244,
|
|
"grad_norm": 0.3289667519465328,
|
|
"learning_rate": 2.5184740046566537e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12818960845470428,
|
|
"step": 640,
|
|
"valid_targets_mean": 4688.0,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 2.398510242085661,
|
|
"grad_norm": 0.33406726170658674,
|
|
"learning_rate": 2.4933550431555973e-05,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14254990220069885,
|
|
"step": 645,
|
|
"valid_targets_mean": 4809.8,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 2.4171322160148976,
|
|
"grad_norm": 0.3212253397329,
|
|
"learning_rate": 2.468152939268044e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12575781345367432,
|
|
"step": 650,
|
|
"valid_targets_mean": 4856.3,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 2.435754189944134,
|
|
"grad_norm": 0.4193177434942788,
|
|
"learning_rate": 2.4428719401646494e-05,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13185112178325653,
|
|
"step": 655,
|
|
"valid_targets_mean": 4368.7,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 2.4543761638733708,
|
|
"grad_norm": 0.3218928559582912,
|
|
"learning_rate": 2.4175163063118416e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11782893538475037,
|
|
"step": 660,
|
|
"valid_targets_mean": 4609.1,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 2.472998137802607,
|
|
"grad_norm": 0.39047332217518843,
|
|
"learning_rate": 2.392090310753829e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11099883168935776,
|
|
"step": 665,
|
|
"valid_targets_mean": 3889.4,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 2.4916201117318435,
|
|
"grad_norm": 0.35689901255313794,
|
|
"learning_rate": 2.366598238392487e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11990585923194885,
|
|
"step": 670,
|
|
"valid_targets_mean": 4407.1,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 2.51024208566108,
|
|
"grad_norm": 0.3515897774313373,
|
|
"learning_rate": 2.341044385265248e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12714000046253204,
|
|
"step": 675,
|
|
"valid_targets_mean": 4711.8,
|
|
"valid_targets_min": 1891
|
|
},
|
|
{
|
|
"epoch": 2.5288640595903167,
|
|
"grad_norm": 0.32159496474421606,
|
|
"learning_rate": 2.315433057821113e-05,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311168670654297,
|
|
"step": 680,
|
|
"valid_targets_mean": 5252.6,
|
|
"valid_targets_min": 2010
|
|
},
|
|
{
|
|
"epoch": 2.547486033519553,
|
|
"grad_norm": 0.2942349994028514,
|
|
"learning_rate": 2.289768572194913e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14049138128757477,
|
|
"step": 685,
|
|
"valid_targets_mean": 5910.4,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 2.5661080074487894,
|
|
"grad_norm": 0.2829531013214266,
|
|
"learning_rate": 2.26405525347993e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10512509196996689,
|
|
"step": 690,
|
|
"valid_targets_mean": 4626.5,
|
|
"valid_targets_min": 2055
|
|
},
|
|
{
|
|
"epoch": 2.5847299813780262,
|
|
"grad_norm": 0.29066488817804154,
|
|
"learning_rate": 2.238297434999016e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12394720315933228,
|
|
"step": 695,
|
|
"valid_targets_mean": 5047.3,
|
|
"valid_targets_min": 1900
|
|
},
|
|
{
|
|
"epoch": 2.6033519553072626,
|
|
"grad_norm": 0.31069669583595805,
|
|
"learning_rate": 2.212499457574321e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12280986458063126,
|
|
"step": 700,
|
|
"valid_targets_mean": 4993.0,
|
|
"valid_targets_min": 1957
|
|
},
|
|
{
|
|
"epoch": 2.621973929236499,
|
|
"grad_norm": 0.2847297085592331,
|
|
"learning_rate": 2.1866656687957607e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12000254541635513,
|
|
"step": 705,
|
|
"valid_targets_mean": 4902.9,
|
|
"valid_targets_min": 2061
|
|
},
|
|
{
|
|
"epoch": 2.6405959031657353,
|
|
"grad_norm": 0.2907401533736448,
|
|
"learning_rate": 2.160800422288338e-05,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1304769217967987,
|
|
"step": 710,
|
|
"valid_targets_mean": 5751.9,
|
|
"valid_targets_min": 2160
|
|
},
|
|
{
|
|
"epoch": 2.659217877094972,
|
|
"grad_norm": 0.2859926818666239,
|
|
"learning_rate": 2.134908076978452e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11222833395004272,
|
|
"step": 715,
|
|
"valid_targets_mean": 5082.8,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 2.6778398510242085,
|
|
"grad_norm": 0.27206168365644984,
|
|
"learning_rate": 2.1089929963593126e-05,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.101720429956913,
|
|
"step": 720,
|
|
"valid_targets_mean": 5274.8,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 2.6964618249534453,
|
|
"grad_norm": 0.3043304137788501,
|
|
"learning_rate": 2.0830595477555864e-05,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07684291154146194,
|
|
"step": 725,
|
|
"valid_targets_mean": 5933.8,
|
|
"valid_targets_min": 3347
|
|
},
|
|
{
|
|
"epoch": 2.7150837988826817,
|
|
"grad_norm": 0.2402351751603468,
|
|
"learning_rate": 2.0571121015873924e-05,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07310352474451065,
|
|
"step": 730,
|
|
"valid_targets_mean": 5624.6,
|
|
"valid_targets_min": 2049
|
|
},
|
|
{
|
|
"epoch": 2.733705772811918,
|
|
"grad_norm": 0.24128869777290585,
|
|
"learning_rate": 2.031155030633784e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07828167825937271,
|
|
"step": 735,
|
|
"valid_targets_mean": 5613.7,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 2.7523277467411544,
|
|
"grad_norm": 0.2424343463905981,
|
|
"learning_rate": 2.005192709295824e-05,
|
|
"loss": 0.15,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07069131731987,
|
|
"step": 740,
|
|
"valid_targets_mean": 5804.0,
|
|
"valid_targets_min": 2140
|
|
},
|
|
{
|
|
"epoch": 2.770949720670391,
|
|
"grad_norm": 0.23424931942920957,
|
|
"learning_rate": 1.979229512859395e-05,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0725865438580513,
|
|
"step": 745,
|
|
"valid_targets_mean": 5391.5,
|
|
"valid_targets_min": 2179
|
|
},
|
|
{
|
|
"epoch": 2.7895716945996276,
|
|
"grad_norm": 0.21095459317824178,
|
|
"learning_rate": 1.953269816757853e-05,
|
|
"loss": 0.141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060710322111845016,
|
|
"step": 750,
|
|
"valid_targets_mean": 5356.8,
|
|
"valid_targets_min": 2059
|
|
},
|
|
{
|
|
"epoch": 2.808193668528864,
|
|
"grad_norm": 0.20598792165943527,
|
|
"learning_rate": 1.9273179958346646e-05,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07347074151039124,
|
|
"step": 755,
|
|
"valid_targets_mean": 5635.1,
|
|
"valid_targets_min": 3283
|
|
},
|
|
{
|
|
"epoch": 2.826815642458101,
|
|
"grad_norm": 0.22337773162145105,
|
|
"learning_rate": 1.9013784236061337e-05,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08201978355646133,
|
|
"step": 760,
|
|
"valid_targets_mean": 5853.5,
|
|
"valid_targets_min": 2873
|
|
},
|
|
{
|
|
"epoch": 2.845437616387337,
|
|
"grad_norm": 0.2198077783372861,
|
|
"learning_rate": 1.875455471524362e-05,
|
|
"loss": 0.1435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06954735517501831,
|
|
"step": 765,
|
|
"valid_targets_mean": 5576.1,
|
|
"valid_targets_min": 2043
|
|
},
|
|
{
|
|
"epoch": 2.8640595903165735,
|
|
"grad_norm": 0.20508966703816744,
|
|
"learning_rate": 1.8495535082405476e-05,
|
|
"loss": 0.1461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06442942470312119,
|
|
"step": 770,
|
|
"valid_targets_mean": 5982.4,
|
|
"valid_targets_min": 2448
|
|
},
|
|
{
|
|
"epoch": 2.88268156424581,
|
|
"grad_norm": 0.23390173228904518,
|
|
"learning_rate": 1.8236768988687665e-05,
|
|
"loss": 0.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06609448045492172,
|
|
"step": 775,
|
|
"valid_targets_mean": 5523.9,
|
|
"valid_targets_min": 2423
|
|
},
|
|
{
|
|
"epoch": 2.9013035381750467,
|
|
"grad_norm": 0.2498082897395668,
|
|
"learning_rate": 1.797830004250338e-05,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07419461756944656,
|
|
"step": 780,
|
|
"valid_targets_mean": 5512.2,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 2.919925512104283,
|
|
"grad_norm": 0.23855742502441213,
|
|
"learning_rate": 1.772017180218919e-05,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06143729388713837,
|
|
"step": 785,
|
|
"valid_targets_mean": 5664.5,
|
|
"valid_targets_min": 2283
|
|
},
|
|
{
|
|
"epoch": 2.9385474860335195,
|
|
"grad_norm": 0.2311888603873581,
|
|
"learning_rate": 1.746242776866441e-05,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06190266087651253,
|
|
"step": 790,
|
|
"valid_targets_mean": 5318.5,
|
|
"valid_targets_min": 2009
|
|
},
|
|
{
|
|
"epoch": 2.9571694599627563,
|
|
"grad_norm": 0.2385347395394115,
|
|
"learning_rate": 1.7205111378100097e-05,
|
|
"loss": 0.1344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0655934065580368,
|
|
"step": 795,
|
|
"valid_targets_mean": 5128.0,
|
|
"valid_targets_min": 2322
|
|
},
|
|
{
|
|
"epoch": 2.9757914338919926,
|
|
"grad_norm": 0.22210726395021782,
|
|
"learning_rate": 1.6948265994599042e-05,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06535261124372482,
|
|
"step": 800,
|
|
"valid_targets_mean": 5618.6,
|
|
"valid_targets_min": 2370
|
|
},
|
|
{
|
|
"epoch": 2.994413407821229,
|
|
"grad_norm": 0.22478461073835102,
|
|
"learning_rate": 1.669193490288781e-05,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07586240768432617,
|
|
"step": 805,
|
|
"valid_targets_mean": 5800.1,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 3.011173184357542,
|
|
"grad_norm": 0.3923546745207127,
|
|
"learning_rate": 1.6436161301022215e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12482082843780518,
|
|
"step": 810,
|
|
"valid_targets_mean": 7708.5,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 3.0297951582867784,
|
|
"grad_norm": 0.2769532328786158,
|
|
"learning_rate": 1.618098829310744e-05,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11922207474708557,
|
|
"step": 815,
|
|
"valid_targets_mean": 7049.1,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 3.0484171322160147,
|
|
"grad_norm": 0.2181540258492701,
|
|
"learning_rate": 1.5926458882033876e-05,
|
|
"loss": 0.2119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11395502835512161,
|
|
"step": 820,
|
|
"valid_targets_mean": 6744.9,
|
|
"valid_targets_min": 2415
|
|
},
|
|
{
|
|
"epoch": 3.0670391061452515,
|
|
"grad_norm": 0.2193962929759941,
|
|
"learning_rate": 1.567261596223011e-05,
|
|
"loss": 0.2119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12162897735834122,
|
|
"step": 825,
|
|
"valid_targets_mean": 7223.4,
|
|
"valid_targets_min": 1917
|
|
},
|
|
{
|
|
"epoch": 3.085661080074488,
|
|
"grad_norm": 0.20490169049334603,
|
|
"learning_rate": 1.5419502312434177e-05,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10778558254241943,
|
|
"step": 830,
|
|
"valid_targets_mean": 6793.6,
|
|
"valid_targets_min": 2699
|
|
},
|
|
{
|
|
"epoch": 3.1042830540037243,
|
|
"grad_norm": 0.2229350850738388,
|
|
"learning_rate": 1.5167160588484287e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08931157737970352,
|
|
"step": 835,
|
|
"valid_targets_mean": 6319.4,
|
|
"valid_targets_min": 2331
|
|
},
|
|
{
|
|
"epoch": 3.122905027932961,
|
|
"grad_norm": 0.22327323724804218,
|
|
"learning_rate": 1.4915633316130267e-05,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09623245149850845,
|
|
"step": 840,
|
|
"valid_targets_mean": 6464.3,
|
|
"valid_targets_min": 2301
|
|
},
|
|
{
|
|
"epoch": 3.1415270018621975,
|
|
"grad_norm": 0.2268340495199551,
|
|
"learning_rate": 1.4664962883866936e-05,
|
|
"loss": 0.1997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0771939679980278,
|
|
"step": 845,
|
|
"valid_targets_mean": 5905.9,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 3.160148975791434,
|
|
"grad_norm": 0.2326664565154385,
|
|
"learning_rate": 1.4415191535790605e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09210383892059326,
|
|
"step": 850,
|
|
"valid_targets_mean": 6846.8,
|
|
"valid_targets_min": 2291
|
|
},
|
|
{
|
|
"epoch": 3.17877094972067,
|
|
"grad_norm": 0.23061554940146764,
|
|
"learning_rate": 1.4166361364479946e-05,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09809307008981705,
|
|
"step": 855,
|
|
"valid_targets_mean": 6919.4,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 3.197392923649907,
|
|
"grad_norm": 0.24843559414613484,
|
|
"learning_rate": 1.3918514303902307e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10550827533006668,
|
|
"step": 860,
|
|
"valid_targets_mean": 7691.2,
|
|
"valid_targets_min": 2510
|
|
},
|
|
{
|
|
"epoch": 3.2160148975791434,
|
|
"grad_norm": 0.22805182583679512,
|
|
"learning_rate": 1.3671692122346843e-05,
|
|
"loss": 0.1935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09740543365478516,
|
|
"step": 865,
|
|
"valid_targets_mean": 6916.7,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 3.2346368715083798,
|
|
"grad_norm": 0.22827913971204664,
|
|
"learning_rate": 1.3425936415385557e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10098141431808472,
|
|
"step": 870,
|
|
"valid_targets_mean": 6807.6,
|
|
"valid_targets_min": 2726
|
|
},
|
|
{
|
|
"epoch": 3.2532588454376166,
|
|
"grad_norm": 0.31207084774804683,
|
|
"learning_rate": 1.318128859886339e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12020784616470337,
|
|
"step": 875,
|
|
"valid_targets_mean": 4461.2,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 3.271880819366853,
|
|
"grad_norm": 0.3393354652534016,
|
|
"learning_rate": 1.2937789901918671e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1211516484618187,
|
|
"step": 880,
|
|
"valid_targets_mean": 4668.2,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 3.2905027932960893,
|
|
"grad_norm": 0.3629688521529729,
|
|
"learning_rate": 1.2695481360034978e-05,
|
|
"loss": 0.2131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10855499655008316,
|
|
"step": 885,
|
|
"valid_targets_mean": 4609.4,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 3.3091247672253257,
|
|
"grad_norm": 0.3504360151541152,
|
|
"learning_rate": 1.245440380812566e-05,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10300320386886597,
|
|
"step": 890,
|
|
"valid_targets_mean": 4465.0,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 3.3277467411545625,
|
|
"grad_norm": 0.32251798545786803,
|
|
"learning_rate": 1.2214597873652172e-05,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0905773714184761,
|
|
"step": 895,
|
|
"valid_targets_mean": 4838.2,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 3.346368715083799,
|
|
"grad_norm": 0.3336622000100127,
|
|
"learning_rate": 1.1976103969777336e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09432216733694077,
|
|
"step": 900,
|
|
"valid_targets_mean": 4426.6,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 3.364990689013035,
|
|
"grad_norm": 0.3929612418575331,
|
|
"learning_rate": 1.1738962288554745e-05,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0892234817147255,
|
|
"step": 905,
|
|
"valid_targets_mean": 4027.8,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 3.383612662942272,
|
|
"grad_norm": 0.3446626418867246,
|
|
"learning_rate": 1.1503212794155406e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0939427986741066,
|
|
"step": 910,
|
|
"valid_targets_mean": 4722.4,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 3.4022346368715084,
|
|
"grad_norm": 0.35774264340118445,
|
|
"learning_rate": 1.1268895216132818e-05,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11662470549345016,
|
|
"step": 915,
|
|
"valid_targets_mean": 4746.3,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 3.4208566108007448,
|
|
"grad_norm": 0.34279937816191636,
|
|
"learning_rate": 1.1036049042727557e-05,
|
|
"loss": 0.185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08681117743253708,
|
|
"step": 920,
|
|
"valid_targets_mean": 4164.0,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 3.439478584729981,
|
|
"grad_norm": 0.3763744700811407,
|
|
"learning_rate": 1.0804713514212554e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09903687238693237,
|
|
"step": 925,
|
|
"valid_targets_mean": 4449.6,
|
|
"valid_targets_min": 2173
|
|
},
|
|
{
|
|
"epoch": 3.458100558659218,
|
|
"grad_norm": 0.35837853241908835,
|
|
"learning_rate": 1.0574927616280139e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09481087327003479,
|
|
"step": 930,
|
|
"valid_targets_mean": 4874.1,
|
|
"valid_targets_min": 2117
|
|
},
|
|
{
|
|
"epoch": 3.4767225325884543,
|
|
"grad_norm": 0.40429914472262846,
|
|
"learning_rate": 1.0346730073471993e-05,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11803603172302246,
|
|
"step": 935,
|
|
"valid_targets_mean": 5221.1,
|
|
"valid_targets_min": 2154
|
|
},
|
|
{
|
|
"epoch": 3.4953445065176907,
|
|
"grad_norm": 0.3711739680040434,
|
|
"learning_rate": 1.0120159342653153e-05,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08699009567499161,
|
|
"step": 940,
|
|
"valid_targets_mean": 4491.3,
|
|
"valid_targets_min": 1967
|
|
},
|
|
{
|
|
"epoch": 3.5139664804469275,
|
|
"grad_norm": 0.37363867967479586,
|
|
"learning_rate": 9.895253606531038e-06,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08354979753494263,
|
|
"step": 945,
|
|
"valid_targets_mean": 4046.6,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 3.532588454376164,
|
|
"grad_norm": 0.44263358663951935,
|
|
"learning_rate": 9.672050767220765e-06,
|
|
"loss": 0.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11277249455451965,
|
|
"step": 950,
|
|
"valid_targets_mean": 5051.2,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 3.5512104283054002,
|
|
"grad_norm": 0.3336089325731764,
|
|
"learning_rate": 9.450588439857697e-06,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11583083122968674,
|
|
"step": 955,
|
|
"valid_targets_mean": 5088.0,
|
|
"valid_targets_min": 1865
|
|
},
|
|
{
|
|
"epoch": 3.5698324022346366,
|
|
"grad_norm": 0.29491303529078194,
|
|
"learning_rate": 9.230903946258391e-06,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11058211326599121,
|
|
"step": 960,
|
|
"valid_targets_mean": 5648.7,
|
|
"valid_targets_min": 1663
|
|
},
|
|
{
|
|
"epoch": 3.5884543761638734,
|
|
"grad_norm": 0.27522508073944885,
|
|
"learning_rate": 9.013034308630945e-06,
|
|
"loss": 0.1878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08331070095300674,
|
|
"step": 965,
|
|
"valid_targets_mean": 4394.6,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 3.60707635009311,
|
|
"grad_norm": 0.30907108754902735,
|
|
"learning_rate": 8.79701624333585e-06,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09504259377717972,
|
|
"step": 970,
|
|
"valid_targets_mean": 4734.1,
|
|
"valid_targets_min": 2147
|
|
},
|
|
{
|
|
"epoch": 3.6256983240223466,
|
|
"grad_norm": 0.3078382923065214,
|
|
"learning_rate": 8.582886154698407e-06,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08389315009117126,
|
|
"step": 975,
|
|
"valid_targets_mean": 4702.8,
|
|
"valid_targets_min": 1900
|
|
},
|
|
{
|
|
"epoch": 3.644320297951583,
|
|
"grad_norm": 0.2927314109800218,
|
|
"learning_rate": 8.370680128873679e-06,
|
|
"loss": 0.1825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08893483877182007,
|
|
"step": 980,
|
|
"valid_targets_mean": 5112.3,
|
|
"valid_targets_min": 2065
|
|
},
|
|
{
|
|
"epoch": 3.6629422718808193,
|
|
"grad_norm": 0.3049682447465429,
|
|
"learning_rate": 8.160433927765097e-06,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08556380122900009,
|
|
"step": 985,
|
|
"valid_targets_mean": 5058.2,
|
|
"valid_targets_min": 2522
|
|
},
|
|
{
|
|
"epoch": 3.6815642458100557,
|
|
"grad_norm": 0.2770762255480885,
|
|
"learning_rate": 7.952182982997743e-06,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09861546754837036,
|
|
"step": 990,
|
|
"valid_targets_mean": 5131.3,
|
|
"valid_targets_min": 2161
|
|
},
|
|
{
|
|
"epoch": 3.7001862197392925,
|
|
"grad_norm": 0.3549830142853625,
|
|
"learning_rate": 7.745962389947195e-06,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06256135553121567,
|
|
"step": 995,
|
|
"valid_targets_mean": 5891.1,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 3.718808193668529,
|
|
"grad_norm": 0.28452100847613737,
|
|
"learning_rate": 7.541806901825141e-06,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06664276123046875,
|
|
"step": 1000,
|
|
"valid_targets_mean": 6142.4,
|
|
"valid_targets_min": 2831
|
|
},
|
|
{
|
|
"epoch": 3.7374301675977653,
|
|
"grad_norm": 0.2744053870236639,
|
|
"learning_rate": 7.339750923822595e-06,
|
|
"loss": 0.1267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06239338591694832,
|
|
"step": 1005,
|
|
"valid_targets_mean": 5398.8,
|
|
"valid_targets_min": 2088
|
|
},
|
|
{
|
|
"epoch": 3.756052141527002,
|
|
"grad_norm": 0.23916132063233655,
|
|
"learning_rate": 7.139828507311792e-06,
|
|
"loss": 0.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06390925496816635,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5946.6,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 3.7746741154562384,
|
|
"grad_norm": 0.23032557623436256,
|
|
"learning_rate": 6.942073344107682e-06,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05467592179775238,
|
|
"step": 1015,
|
|
"valid_targets_mean": 5572.2,
|
|
"valid_targets_min": 1718
|
|
},
|
|
{
|
|
"epoch": 3.793296089385475,
|
|
"grad_norm": 0.24678198189145398,
|
|
"learning_rate": 6.746518760790071e-06,
|
|
"loss": 0.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06743580102920532,
|
|
"step": 1020,
|
|
"valid_targets_mean": 5919.4,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 3.811918063314711,
|
|
"grad_norm": 0.2381354700325367,
|
|
"learning_rate": 6.553197713087227e-06,
|
|
"loss": 0.1094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0567602775990963,
|
|
"step": 1025,
|
|
"valid_targets_mean": 5579.0,
|
|
"valid_targets_min": 3195
|
|
},
|
|
{
|
|
"epoch": 3.830540037243948,
|
|
"grad_norm": 0.2276090324661052,
|
|
"learning_rate": 6.3621427803220735e-06,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05728829279541969,
|
|
"step": 1030,
|
|
"valid_targets_mean": 5613.9,
|
|
"valid_targets_min": 2897
|
|
},
|
|
{
|
|
"epoch": 3.8491620111731844,
|
|
"grad_norm": 0.21642846836817936,
|
|
"learning_rate": 6.173386159921766e-06,
|
|
"loss": 0.1133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06424218416213989,
|
|
"step": 1035,
|
|
"valid_targets_mean": 5535.1,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 3.8677839851024207,
|
|
"grad_norm": 0.23163902556844526,
|
|
"learning_rate": 5.98695966199163e-06,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048331666737794876,
|
|
"step": 1040,
|
|
"valid_targets_mean": 5154.7,
|
|
"valid_targets_min": 2315
|
|
},
|
|
{
|
|
"epoch": 3.8864059590316575,
|
|
"grad_norm": 0.21070714474137134,
|
|
"learning_rate": 5.802894703954382e-06,
|
|
"loss": 0.1032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05012443661689758,
|
|
"step": 1045,
|
|
"valid_targets_mean": 5605.2,
|
|
"valid_targets_min": 1984
|
|
},
|
|
{
|
|
"epoch": 3.905027932960894,
|
|
"grad_norm": 0.22853410654463693,
|
|
"learning_rate": 5.621222305255554e-06,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05816968157887459,
|
|
"step": 1050,
|
|
"valid_targets_mean": 6049.4,
|
|
"valid_targets_min": 2764
|
|
},
|
|
{
|
|
"epoch": 3.9236499068901303,
|
|
"grad_norm": 0.2379166167854269,
|
|
"learning_rate": 5.441973082135907e-06,
|
|
"loss": 0.1028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05406898260116577,
|
|
"step": 1055,
|
|
"valid_targets_mean": 5502.0,
|
|
"valid_targets_min": 2260
|
|
},
|
|
{
|
|
"epoch": 3.9422718808193666,
|
|
"grad_norm": 0.20973140421298134,
|
|
"learning_rate": 5.265177242471899e-06,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05013309791684151,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5478.0,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 3.9608938547486034,
|
|
"grad_norm": 0.22236062413735788,
|
|
"learning_rate": 5.09086458068488e-06,
|
|
"loss": 0.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05143527686595917,
|
|
"step": 1065,
|
|
"valid_targets_mean": 5612.8,
|
|
"valid_targets_min": 2468
|
|
},
|
|
{
|
|
"epoch": 3.97951582867784,
|
|
"grad_norm": 0.21206442494809297,
|
|
"learning_rate": 4.919064472720014e-06,
|
|
"loss": 0.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05190473794937134,
|
|
"step": 1070,
|
|
"valid_targets_mean": 5816.8,
|
|
"valid_targets_min": 1834
|
|
},
|
|
{
|
|
"epoch": 3.998137802607076,
|
|
"grad_norm": 0.20629239879718422,
|
|
"learning_rate": 4.749805871095732e-06,
|
|
"loss": 0.1029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046846091747283936,
|
|
"step": 1075,
|
|
"valid_targets_mean": 5753.0,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 4.01489757914339,
|
|
"grad_norm": 0.5308448838529755,
|
|
"learning_rate": 4.5831173000245e-06,
|
|
"loss": 0.1836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12533260881900787,
|
|
"step": 1080,
|
|
"valid_targets_mean": 7661.5,
|
|
"valid_targets_min": 2439
|
|
},
|
|
{
|
|
"epoch": 4.033519553072626,
|
|
"grad_norm": 0.24098689788800592,
|
|
"learning_rate": 4.4190268506058074e-06,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09956145286560059,
|
|
"step": 1085,
|
|
"valid_targets_mean": 6907.5,
|
|
"valid_targets_min": 2738
|
|
},
|
|
{
|
|
"epoch": 4.052141527001862,
|
|
"grad_norm": 0.24383435515867613,
|
|
"learning_rate": 4.257562176092127e-06,
|
|
"loss": 0.1818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0828259289264679,
|
|
"step": 1090,
|
|
"valid_targets_mean": 6410.7,
|
|
"valid_targets_min": 2445
|
|
},
|
|
{
|
|
"epoch": 4.070763500931099,
|
|
"grad_norm": 0.2163950525445711,
|
|
"learning_rate": 4.098750487228653e-06,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08502789586782455,
|
|
"step": 1095,
|
|
"valid_targets_mean": 6399.4,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 4.089385474860335,
|
|
"grad_norm": 0.21083241045721227,
|
|
"learning_rate": 3.942618547667656e-06,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07837125658988953,
|
|
"step": 1100,
|
|
"valid_targets_mean": 6164.7,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 4.1080074487895715,
|
|
"grad_norm": 0.20466030093676923,
|
|
"learning_rate": 3.7891926694581216e-06,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07821375131607056,
|
|
"step": 1105,
|
|
"valid_targets_mean": 6921.8,
|
|
"valid_targets_min": 2809
|
|
},
|
|
{
|
|
"epoch": 4.126629422718808,
|
|
"grad_norm": 0.22208208771018614,
|
|
"learning_rate": 3.6384987086115353e-06,
|
|
"loss": 0.1719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09721019864082336,
|
|
"step": 1110,
|
|
"valid_targets_mean": 6955.7,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 4.145251396648045,
|
|
"grad_norm": 0.2135551173088567,
|
|
"learning_rate": 3.49056206074452e-06,
|
|
"loss": 0.171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08980279415845871,
|
|
"step": 1115,
|
|
"valid_targets_mean": 7009.1,
|
|
"valid_targets_min": 1790
|
|
},
|
|
{
|
|
"epoch": 4.1638733705772815,
|
|
"grad_norm": 0.22700578037823518,
|
|
"learning_rate": 3.345407656799058e-06,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07464779168367386,
|
|
"step": 1120,
|
|
"valid_targets_mean": 6647.1,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 4.182495344506518,
|
|
"grad_norm": 0.2492117954904908,
|
|
"learning_rate": 3.203059958840999e-06,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08767087012529373,
|
|
"step": 1125,
|
|
"valid_targets_mean": 7243.6,
|
|
"valid_targets_min": 2074
|
|
},
|
|
{
|
|
"epoch": 4.201117318435754,
|
|
"grad_norm": 0.25428629048582396,
|
|
"learning_rate": 3.063542955937615e-06,
|
|
"loss": 0.1654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09225225448608398,
|
|
"step": 1130,
|
|
"valid_targets_mean": 7255.7,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 4.219739292364991,
|
|
"grad_norm": 0.25094232783984594,
|
|
"learning_rate": 2.9268801601148555e-06,
|
|
"loss": 0.1654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07944104820489883,
|
|
"step": 1135,
|
|
"valid_targets_mean": 7046.5,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 4.238361266294227,
|
|
"grad_norm": 0.24866583116101532,
|
|
"learning_rate": 2.793094602395008e-06,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06737985461950302,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5729.9,
|
|
"valid_targets_min": 1609
|
|
},
|
|
{
|
|
"epoch": 4.256983240223463,
|
|
"grad_norm": 0.39758422193173765,
|
|
"learning_rate": 2.6622088289153804e-06,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09932229667901993,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4044.7,
|
|
"valid_targets_min": 1709
|
|
},
|
|
{
|
|
"epoch": 4.275605214152701,
|
|
"grad_norm": 0.3591756238366576,
|
|
"learning_rate": 2.534244897128748e-06,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0849931463599205,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4355.0,
|
|
"valid_targets_min": 1978
|
|
},
|
|
{
|
|
"epoch": 4.294227188081937,
|
|
"grad_norm": 0.31402517638422384,
|
|
"learning_rate": 2.4092243720861276e-06,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12222952395677567,
|
|
"step": 1155,
|
|
"valid_targets_mean": 5516.1,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 4.312849162011173,
|
|
"grad_norm": 0.33093787158415533,
|
|
"learning_rate": 2.287168322802533e-06,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09058675169944763,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4852.4,
|
|
"valid_targets_min": 2071
|
|
},
|
|
{
|
|
"epoch": 4.33147113594041,
|
|
"grad_norm": 0.29044590186071456,
|
|
"learning_rate": 2.1680973187063415e-06,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07333887368440628,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4316.4,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 4.350093109869646,
|
|
"grad_norm": 0.310482032598639,
|
|
"learning_rate": 2.0520314261728357e-06,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07164377719163895,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4140.9,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 4.368715083798882,
|
|
"grad_norm": 0.29736588517519846,
|
|
"learning_rate": 1.938990205142526e-06,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07602109760046005,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4135.1,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 4.387337057728119,
|
|
"grad_norm": 0.3160724687733261,
|
|
"learning_rate": 1.8289927058248325e-06,
|
|
"loss": 0.1615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07650075107812881,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4301.7,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 4.405959031657356,
|
|
"grad_norm": 0.28031984965761936,
|
|
"learning_rate": 1.7220574654876453e-06,
|
|
"loss": 0.1528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07221689075231552,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4130.4,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 4.424581005586592,
|
|
"grad_norm": 0.3076875162786684,
|
|
"learning_rate": 1.6182025053333595e-06,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06376394629478455,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3774.1,
|
|
"valid_targets_min": 1587
|
|
},
|
|
{
|
|
"epoch": 4.443202979515829,
|
|
"grad_norm": 0.2836853100448112,
|
|
"learning_rate": 1.5174453274618416e-06,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0705125704407692,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4669.5,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 4.461824953445065,
|
|
"grad_norm": 0.31779131062906657,
|
|
"learning_rate": 1.4198029119209112e-06,
|
|
"loss": 0.1476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06056446209549904,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3839.8,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 4.4804469273743015,
|
|
"grad_norm": 0.2857165674407037,
|
|
"learning_rate": 1.325291713844785e-06,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06362733244895935,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4157.1,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 4.499068901303538,
|
|
"grad_norm": 0.29043499467358436,
|
|
"learning_rate": 1.2339276606809824e-06,
|
|
"loss": 0.142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07273241877555847,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4532.7,
|
|
"valid_targets_min": 2162
|
|
},
|
|
{
|
|
"epoch": 4.517690875232775,
|
|
"grad_norm": 0.27187077336242454,
|
|
"learning_rate": 1.145726149506161e-06,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058025311678647995,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3950.7,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 4.5363128491620115,
|
|
"grad_norm": 0.4861947344266717,
|
|
"learning_rate": 1.0607020444313431e-06,
|
|
"loss": 0.1827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0979158952832222,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5058.6,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 4.554934823091248,
|
|
"grad_norm": 0.32419104546438365,
|
|
"learning_rate": 9.788696740969295e-07,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0731668546795845,
|
|
"step": 1225,
|
|
"valid_targets_mean": 4897.2,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 4.573556797020484,
|
|
"grad_norm": 0.29019151441664376,
|
|
"learning_rate": 9.002428292579912e-07,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0815751776099205,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4843.5,
|
|
"valid_targets_min": 2196
|
|
},
|
|
{
|
|
"epoch": 4.592178770949721,
|
|
"grad_norm": 0.28619939519110477,
|
|
"learning_rate": 8.248347604601803e-07,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06282777339220047,
|
|
"step": 1235,
|
|
"valid_targets_mean": 4174.7,
|
|
"valid_targets_min": 1942
|
|
},
|
|
{
|
|
"epoch": 4.610800744878957,
|
|
"grad_norm": 0.24908229050606268,
|
|
"learning_rate": 7.526581758066931e-07,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06715219467878342,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4759.8,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 4.629422718808193,
|
|
"grad_norm": 0.25880591606207554,
|
|
"learning_rate": 6.837252388166416e-07,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07651010155677795,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5333.6,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 4.648044692737431,
|
|
"grad_norm": 0.2497181360656177,
|
|
"learning_rate": 6.180475663752106e-07,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07430170476436615,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4628.3,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 4.666666666666667,
|
|
"grad_norm": 0.23020108011760998,
|
|
"learning_rate": 5.556362267759153e-07,
|
|
"loss": 0.1574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07936949282884598,
|
|
"step": 1255,
|
|
"valid_targets_mean": 5335.1,
|
|
"valid_targets_min": 1892
|
|
},
|
|
{
|
|
"epoch": 4.685288640595903,
|
|
"grad_norm": 0.2454009759799112,
|
|
"learning_rate": 4.965017378553349e-07,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09208754450082779,
|
|
"step": 1260,
|
|
"valid_targets_mean": 5511.2,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 4.70391061452514,
|
|
"grad_norm": 0.3235628120281909,
|
|
"learning_rate": 4.4065406522059374e-07,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05675048008561134,
|
|
"step": 1265,
|
|
"valid_targets_mean": 5457.9,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 4.722532588454376,
|
|
"grad_norm": 0.3334445932900592,
|
|
"learning_rate": 3.8810262056991676e-07,
|
|
"loss": 0.1087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056893110275268555,
|
|
"step": 1270,
|
|
"valid_targets_mean": 6020.2,
|
|
"valid_targets_min": 3332
|
|
},
|
|
{
|
|
"epoch": 4.741154562383612,
|
|
"grad_norm": 0.25685520224274744,
|
|
"learning_rate": 3.3885626010652153e-07,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049848925322294235,
|
|
"step": 1275,
|
|
"valid_targets_mean": 5805.6,
|
|
"valid_targets_min": 2057
|
|
},
|
|
{
|
|
"epoch": 4.759776536312849,
|
|
"grad_norm": 0.2188109546053078,
|
|
"learning_rate": 2.929232830461404e-07,
|
|
"loss": 0.1028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04564082622528076,
|
|
"step": 1280,
|
|
"valid_targets_mean": 5415.1,
|
|
"valid_targets_min": 3408
|
|
},
|
|
{
|
|
"epoch": 4.778398510242086,
|
|
"grad_norm": 0.2187677112583894,
|
|
"learning_rate": 2.503114302183951e-07,
|
|
"loss": 0.0952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04444780945777893,
|
|
"step": 1285,
|
|
"valid_targets_mean": 5522.8,
|
|
"valid_targets_min": 1887
|
|
},
|
|
{
|
|
"epoch": 4.797020484171322,
|
|
"grad_norm": 0.22208851636510005,
|
|
"learning_rate": 2.110278827622758e-07,
|
|
"loss": 0.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05595209822058678,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5780.2,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 4.815642458100559,
|
|
"grad_norm": 0.21060016822581026,
|
|
"learning_rate": 1.7507926091594685e-07,
|
|
"loss": 0.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04811148717999458,
|
|
"step": 1295,
|
|
"valid_targets_mean": 5595.6,
|
|
"valid_targets_min": 2959
|
|
},
|
|
{
|
|
"epoch": 4.834264432029795,
|
|
"grad_norm": 0.19833964961713638,
|
|
"learning_rate": 1.4247162290107697e-07,
|
|
"loss": 0.0965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05214523896574974,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5988.6,
|
|
"valid_targets_min": 3215
|
|
},
|
|
{
|
|
"epoch": 4.8528864059590315,
|
|
"grad_norm": 0.2990227082543972,
|
|
"learning_rate": 1.1321046390187385e-07,
|
|
"loss": 0.0998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05182049795985222,
|
|
"step": 1305,
|
|
"valid_targets_mean": 5927.0,
|
|
"valid_targets_min": 2563
|
|
},
|
|
{
|
|
"epoch": 4.871508379888268,
|
|
"grad_norm": 0.2015079045157211,
|
|
"learning_rate": 8.730071513901594e-08,
|
|
"loss": 0.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045056309551000595,
|
|
"step": 1310,
|
|
"valid_targets_mean": 5677.5,
|
|
"valid_targets_min": 2261
|
|
},
|
|
{
|
|
"epoch": 4.890130353817504,
|
|
"grad_norm": 0.20975848200966107,
|
|
"learning_rate": 6.474674303862172e-08,
|
|
"loss": 0.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04260837659239769,
|
|
"step": 1315,
|
|
"valid_targets_mean": 5403.8,
|
|
"valid_targets_min": 2460
|
|
},
|
|
{
|
|
"epoch": 4.9087523277467415,
|
|
"grad_norm": 0.19303968844504368,
|
|
"learning_rate": 4.555234849639823e-08,
|
|
"loss": 0.0965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04524529352784157,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5690.6,
|
|
"valid_targets_min": 2096
|
|
},
|
|
{
|
|
"epoch": 4.927374301675978,
|
|
"grad_norm": 0.19699741738755241,
|
|
"learning_rate": 2.9720766237095745e-08,
|
|
"loss": 0.0904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05184662342071533,
|
|
"step": 1325,
|
|
"valid_targets_mean": 5530.8,
|
|
"valid_targets_min": 2383
|
|
},
|
|
{
|
|
"epoch": 4.945996275605214,
|
|
"grad_norm": 0.18957844995965004,
|
|
"learning_rate": 1.7254664269381604e-08,
|
|
"loss": 0.0894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03667488321661949,
|
|
"step": 1330,
|
|
"valid_targets_mean": 5664.4,
|
|
"valid_targets_min": 2242
|
|
},
|
|
{
|
|
"epoch": 4.964618249534451,
|
|
"grad_norm": 0.19863766435376254,
|
|
"learning_rate": 8.156143436215403e-09,
|
|
"loss": 0.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049284886568784714,
|
|
"step": 1335,
|
|
"valid_targets_mean": 5771.7,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 4.983240223463687,
|
|
"grad_norm": 0.2042161969158567,
|
|
"learning_rate": 2.426737060798878e-09,
|
|
"loss": 0.0893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052975624799728394,
|
|
"step": 1340,
|
|
"valid_targets_mean": 5368.6,
|
|
"valid_targets_min": 1541
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.34259468317161895,
|
|
"learning_rate": 6.741068818261198e-11,
|
|
"loss": 0.0868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08489986509084702,
|
|
"step": 1345,
|
|
"valid_targets_mean": 7030.6,
|
|
"valid_targets_min": 2798
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08489986509084702,
|
|
"step": 1345,
|
|
"total_flos": 3.418313925563777e+18,
|
|
"train_loss": 0.2346252706192683,
|
|
"train_runtime": 20740.2268,
|
|
"train_samples_per_second": 6.206,
|
|
"train_steps_per_second": 0.065,
|
|
"valid_targets_mean": 7030.6,
|
|
"valid_targets_min": 2798
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1345,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 750,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3.418313925563777e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|