Files
g1_weighted_31600_cap10_8b/trainer_state.json
ModelHub XC b417287b07 初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/g1_weighted_31600_cap10_8b
Source: Original Platform
2026-04-27 09:11:08 +08:00

3007 lines
83 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1345,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0186219739292365,
"grad_norm": 21.642117448143836,
"learning_rate": 1.1851851851851854e-06,
"loss": 0.8194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.38135290145874023,
"step": 5,
"valid_targets_mean": 6579.2,
"valid_targets_min": 1651
},
{
"epoch": 0.037243947858473,
"grad_norm": 4.087277364340966,
"learning_rate": 2.666666666666667e-06,
"loss": 0.7007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29667899012565613,
"step": 10,
"valid_targets_mean": 6844.8,
"valid_targets_min": 2928
},
{
"epoch": 0.055865921787709494,
"grad_norm": 1.3577392450412775,
"learning_rate": 4.1481481481481485e-06,
"loss": 0.5605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29086098074913025,
"step": 15,
"valid_targets_mean": 6970.2,
"valid_targets_min": 1816
},
{
"epoch": 0.074487895716946,
"grad_norm": 0.7405497195075723,
"learning_rate": 5.62962962962963e-06,
"loss": 0.5243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2757507264614105,
"step": 20,
"valid_targets_mean": 7589.0,
"valid_targets_min": 2058
},
{
"epoch": 0.0931098696461825,
"grad_norm": 0.5342294038072763,
"learning_rate": 7.111111111111112e-06,
"loss": 0.4723,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23901812732219696,
"step": 25,
"valid_targets_mean": 7366.7,
"valid_targets_min": 3293
},
{
"epoch": 0.11173184357541899,
"grad_norm": 0.3675025735899647,
"learning_rate": 8.592592592592593e-06,
"loss": 0.4134,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20101700723171234,
"step": 30,
"valid_targets_mean": 6763.6,
"valid_targets_min": 2844
},
{
"epoch": 0.1303538175046555,
"grad_norm": 0.28406425939888624,
"learning_rate": 1.0074074074074074e-05,
"loss": 0.4061,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2010914832353592,
"step": 35,
"valid_targets_mean": 6678.9,
"valid_targets_min": 2465
},
{
"epoch": 0.148975791433892,
"grad_norm": 0.24209059303362065,
"learning_rate": 1.1555555555555556e-05,
"loss": 0.3775,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1803324669599533,
"step": 40,
"valid_targets_mean": 7128.3,
"valid_targets_min": 2482
},
{
"epoch": 0.16759776536312848,
"grad_norm": 0.2198398559021447,
"learning_rate": 1.303703703703704e-05,
"loss": 0.3596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17511887848377228,
"step": 45,
"valid_targets_mean": 6365.0,
"valid_targets_min": 1594
},
{
"epoch": 0.186219739292365,
"grad_norm": 0.23483233838383402,
"learning_rate": 1.4518518518518521e-05,
"loss": 0.3496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17358100414276123,
"step": 50,
"valid_targets_mean": 6929.4,
"valid_targets_min": 2099
},
{
"epoch": 0.2048417132216015,
"grad_norm": 0.23317500582763062,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.3422,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14544479548931122,
"step": 55,
"valid_targets_mean": 6168.6,
"valid_targets_min": 1277
},
{
"epoch": 0.22346368715083798,
"grad_norm": 0.22927400488720545,
"learning_rate": 1.7481481481481483e-05,
"loss": 0.3347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14921344816684723,
"step": 60,
"valid_targets_mean": 6729.7,
"valid_targets_min": 2910
},
{
"epoch": 0.24208566108007448,
"grad_norm": 0.6336152567068057,
"learning_rate": 1.8962962962962966e-05,
"loss": 0.3681,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24772310256958008,
"step": 65,
"valid_targets_mean": 4494.7,
"valid_targets_min": 1555
},
{
"epoch": 0.260707635009311,
"grad_norm": 0.3813258332534016,
"learning_rate": 2.0444444444444446e-05,
"loss": 0.4898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23514796793460846,
"step": 70,
"valid_targets_mean": 4495.6,
"valid_targets_min": 1665
},
{
"epoch": 0.27932960893854747,
"grad_norm": 0.35453929366021575,
"learning_rate": 2.192592592592593e-05,
"loss": 0.4593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22719019651412964,
"step": 75,
"valid_targets_mean": 4864.8,
"valid_targets_min": 1754
},
{
"epoch": 0.297951582867784,
"grad_norm": 0.3727662336939942,
"learning_rate": 2.3407407407407406e-05,
"loss": 0.4488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.204828143119812,
"step": 80,
"valid_targets_mean": 4095.2,
"valid_targets_min": 1680
},
{
"epoch": 0.3165735567970205,
"grad_norm": 0.3292902280153117,
"learning_rate": 2.4888888888888893e-05,
"loss": 0.4398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23676007986068726,
"step": 85,
"valid_targets_mean": 4658.2,
"valid_targets_min": 1772
},
{
"epoch": 0.33519553072625696,
"grad_norm": 0.30524766317944435,
"learning_rate": 2.637037037037037e-05,
"loss": 0.4274,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22691433131694794,
"step": 90,
"valid_targets_mean": 4365.0,
"valid_targets_min": 1554
},
{
"epoch": 0.3538175046554935,
"grad_norm": 0.2951571873200663,
"learning_rate": 2.7851851851851856e-05,
"loss": 0.4306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22553832828998566,
"step": 95,
"valid_targets_mean": 4563.3,
"valid_targets_min": 1295
},
{
"epoch": 0.37243947858473,
"grad_norm": 0.36390699372185614,
"learning_rate": 2.9333333333333333e-05,
"loss": 0.4196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21875639259815216,
"step": 100,
"valid_targets_mean": 4605.9,
"valid_targets_min": 1345
},
{
"epoch": 0.39106145251396646,
"grad_norm": 0.3165125581017608,
"learning_rate": 3.0814814814814816e-05,
"loss": 0.4131,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20045466721057892,
"step": 105,
"valid_targets_mean": 4428.8,
"valid_targets_min": 1371
},
{
"epoch": 0.409683426443203,
"grad_norm": 0.29704668214079993,
"learning_rate": 3.22962962962963e-05,
"loss": 0.4105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2023867964744568,
"step": 110,
"valid_targets_mean": 4861.6,
"valid_targets_min": 2146
},
{
"epoch": 0.42830540037243947,
"grad_norm": 0.3129499722925657,
"learning_rate": 3.377777777777778e-05,
"loss": 0.4089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19118505716323853,
"step": 115,
"valid_targets_mean": 4324.2,
"valid_targets_min": 1820
},
{
"epoch": 0.44692737430167595,
"grad_norm": 0.29818090952293125,
"learning_rate": 3.5259259259259266e-05,
"loss": 0.4142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2282976657152176,
"step": 120,
"valid_targets_mean": 4377.3,
"valid_targets_min": 1703
},
{
"epoch": 0.4655493482309125,
"grad_norm": 0.318290618063663,
"learning_rate": 3.674074074074074e-05,
"loss": 0.4009,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18989060819149017,
"step": 125,
"valid_targets_mean": 4578.9,
"valid_targets_min": 1563
},
{
"epoch": 0.48417132216014896,
"grad_norm": 0.3245155267672114,
"learning_rate": 3.8222222222222226e-05,
"loss": 0.4124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19848603010177612,
"step": 130,
"valid_targets_mean": 4194.0,
"valid_targets_min": 1500
},
{
"epoch": 0.5027932960893855,
"grad_norm": 0.28543573898331964,
"learning_rate": 3.970370370370371e-05,
"loss": 0.4016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23786954581737518,
"step": 135,
"valid_targets_mean": 5342.8,
"valid_targets_min": 1610
},
{
"epoch": 0.521415270018622,
"grad_norm": 0.28802986563270455,
"learning_rate": 3.999892143807746e-05,
"loss": 0.401,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20227207243442535,
"step": 140,
"valid_targets_mean": 4513.6,
"valid_targets_min": 1404
},
{
"epoch": 0.5400372439478585,
"grad_norm": 0.30386893239084545,
"learning_rate": 3.9994539979639836e-05,
"loss": 0.405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18734104931354523,
"step": 145,
"valid_targets_mean": 4314.5,
"valid_targets_min": 2202
},
{
"epoch": 0.5586592178770949,
"grad_norm": 0.3383103695230139,
"learning_rate": 3.99867889523818e-05,
"loss": 0.4053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18549704551696777,
"step": 150,
"valid_targets_mean": 4813.7,
"valid_targets_min": 2071
},
{
"epoch": 0.5772811918063314,
"grad_norm": 0.2904479068591015,
"learning_rate": 3.997566966254095e-05,
"loss": 0.3914,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1885179728269577,
"step": 155,
"valid_targets_mean": 4968.1,
"valid_targets_min": 2122
},
{
"epoch": 0.595903165735568,
"grad_norm": 0.25874316987433926,
"learning_rate": 3.996118398398948e-05,
"loss": 0.3708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18153317272663116,
"step": 160,
"valid_targets_mean": 4805.8,
"valid_targets_min": 2169
},
{
"epoch": 0.6145251396648045,
"grad_norm": 0.24000968016526364,
"learning_rate": 3.9943334357918374e-05,
"loss": 0.3737,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17824988067150116,
"step": 165,
"valid_targets_mean": 5026.0,
"valid_targets_min": 2262
},
{
"epoch": 0.633147113594041,
"grad_norm": 0.2622565490667829,
"learning_rate": 3.992212379242601e-05,
"loss": 0.365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1855766922235489,
"step": 170,
"valid_targets_mean": 5019.6,
"valid_targets_min": 1638
},
{
"epoch": 0.6517690875232774,
"grad_norm": 0.2813347193628216,
"learning_rate": 3.989755586201125e-05,
"loss": 0.3753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21819989383220673,
"step": 175,
"valid_targets_mean": 5755.8,
"valid_targets_min": 1629
},
{
"epoch": 0.6703910614525139,
"grad_norm": 0.35070472645512296,
"learning_rate": 3.9869634706971e-05,
"loss": 0.3622,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17688250541687012,
"step": 180,
"valid_targets_mean": 4580.4,
"valid_targets_min": 1779
},
{
"epoch": 0.6890130353817505,
"grad_norm": 0.2455164839195718,
"learning_rate": 3.983836503270254e-05,
"loss": 0.3633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16190500557422638,
"step": 185,
"valid_targets_mean": 4942.4,
"valid_targets_min": 1684
},
{
"epoch": 0.707635009310987,
"grad_norm": 0.2517618296962006,
"learning_rate": 3.9803752108910435e-05,
"loss": 0.3062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1465204805135727,
"step": 190,
"valid_targets_mean": 5849.2,
"valid_targets_min": 1692
},
{
"epoch": 0.7262569832402235,
"grad_norm": 0.21962381487947016,
"learning_rate": 3.9765801768718606e-05,
"loss": 0.2655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1271316260099411,
"step": 195,
"valid_targets_mean": 5372.0,
"valid_targets_min": 1636
},
{
"epoch": 0.74487895716946,
"grad_norm": 0.22504863778674275,
"learning_rate": 3.972452040768718e-05,
"loss": 0.2684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1281280517578125,
"step": 200,
"valid_targets_mean": 5450.6,
"valid_targets_min": 1560
},
{
"epoch": 0.7635009310986964,
"grad_norm": 0.20034946812157745,
"learning_rate": 3.9679914982734765e-05,
"loss": 0.2578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13299940526485443,
"step": 205,
"valid_targets_mean": 5811.3,
"valid_targets_min": 3371
},
{
"epoch": 0.7821229050279329,
"grad_norm": 0.21586328587416592,
"learning_rate": 3.9631993010966e-05,
"loss": 0.2544,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12790830433368683,
"step": 210,
"valid_targets_mean": 5681.6,
"valid_targets_min": 2255
},
{
"epoch": 0.8007448789571695,
"grad_norm": 0.19845096912413016,
"learning_rate": 3.958076256840472e-05,
"loss": 0.254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13116233050823212,
"step": 215,
"valid_targets_mean": 5787.3,
"valid_targets_min": 1115
},
{
"epoch": 0.819366852886406,
"grad_norm": 0.20961259827538878,
"learning_rate": 3.952623228863301e-05,
"loss": 0.2465,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13585473597049713,
"step": 220,
"valid_targets_mean": 5298.0,
"valid_targets_min": 2592
},
{
"epoch": 0.8379888268156425,
"grad_norm": 0.20265238224585524,
"learning_rate": 3.946841136133619e-05,
"loss": 0.2481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11897260695695877,
"step": 225,
"valid_targets_mean": 5558.0,
"valid_targets_min": 1887
},
{
"epoch": 0.8566108007448789,
"grad_norm": 0.2308437781547286,
"learning_rate": 3.940730953075414e-05,
"loss": 0.2521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11843226104974747,
"step": 230,
"valid_targets_mean": 5469.3,
"valid_targets_min": 1983
},
{
"epoch": 0.8752327746741154,
"grad_norm": 0.1901545351670762,
"learning_rate": 3.934293709403915e-05,
"loss": 0.2419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12269283086061478,
"step": 235,
"valid_targets_mean": 6020.0,
"valid_targets_min": 2484
},
{
"epoch": 0.8938547486033519,
"grad_norm": 0.20390172292403214,
"learning_rate": 3.9275304899520595e-05,
"loss": 0.2353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12945251166820526,
"step": 240,
"valid_targets_mean": 5460.2,
"valid_targets_min": 2296
},
{
"epoch": 0.9124767225325885,
"grad_norm": 0.2018105761072753,
"learning_rate": 3.920442434487676e-05,
"loss": 0.2391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11418928951025009,
"step": 245,
"valid_targets_mean": 5293.8,
"valid_targets_min": 2419
},
{
"epoch": 0.931098696461825,
"grad_norm": 0.2113162885633886,
"learning_rate": 3.913030737521401e-05,
"loss": 0.2391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13120897114276886,
"step": 250,
"valid_targets_mean": 6017.7,
"valid_targets_min": 1306
},
{
"epoch": 0.9497206703910615,
"grad_norm": 0.20370322196516943,
"learning_rate": 3.905296648105379e-05,
"loss": 0.2295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11392738670110703,
"step": 255,
"valid_targets_mean": 5287.6,
"valid_targets_min": 2147
},
{
"epoch": 0.9683426443202979,
"grad_norm": 0.2091507978308365,
"learning_rate": 3.8972414696227606e-05,
"loss": 0.2339,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11450576782226562,
"step": 260,
"valid_targets_mean": 6001.4,
"valid_targets_min": 3237
},
{
"epoch": 0.9869646182495344,
"grad_norm": 0.19044462299382905,
"learning_rate": 3.888866559568056e-05,
"loss": 0.2371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12079953402280807,
"step": 265,
"valid_targets_mean": 5754.3,
"valid_targets_min": 2672
},
{
"epoch": 1.0037243947858474,
"grad_norm": 0.23254152478175197,
"learning_rate": 3.880173329318363e-05,
"loss": 0.2601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1569366604089737,
"step": 270,
"valid_targets_mean": 7176.8,
"valid_targets_min": 2475
},
{
"epoch": 1.0223463687150838,
"grad_norm": 0.23126772971993143,
"learning_rate": 3.871163243895514e-05,
"loss": 0.3195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16103023290634155,
"step": 275,
"valid_targets_mean": 7152.2,
"valid_targets_min": 1817
},
{
"epoch": 1.0409683426443204,
"grad_norm": 0.20910745368528044,
"learning_rate": 3.861837821719184e-05,
"loss": 0.3114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1427866518497467,
"step": 280,
"valid_targets_mean": 6413.1,
"valid_targets_min": 2694
},
{
"epoch": 1.0595903165735567,
"grad_norm": 0.21857108825645447,
"learning_rate": 3.852198634351002e-05,
"loss": 0.3012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15423469245433807,
"step": 285,
"valid_targets_mean": 6742.4,
"valid_targets_min": 2529
},
{
"epoch": 1.0782122905027933,
"grad_norm": 0.24290514151500678,
"learning_rate": 3.8422473062297e-05,
"loss": 0.3037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13684120774269104,
"step": 290,
"valid_targets_mean": 6891.3,
"valid_targets_min": 2165
},
{
"epoch": 1.0968342644320297,
"grad_norm": 0.2381895141979772,
"learning_rate": 3.831985514397363e-05,
"loss": 0.2963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1476469486951828,
"step": 295,
"valid_targets_mean": 6714.7,
"valid_targets_min": 1734
},
{
"epoch": 1.1154562383612663,
"grad_norm": 0.2204640407674478,
"learning_rate": 3.8214149882167973e-05,
"loss": 0.2813,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14708954095840454,
"step": 300,
"valid_targets_mean": 7105.4,
"valid_targets_min": 1845
},
{
"epoch": 1.1340782122905029,
"grad_norm": 0.20271638221684102,
"learning_rate": 3.810537509080096e-05,
"loss": 0.2861,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1319805383682251,
"step": 305,
"valid_targets_mean": 6475.6,
"valid_targets_min": 2297
},
{
"epoch": 1.1527001862197392,
"grad_norm": 0.19882669398564898,
"learning_rate": 3.79935491010843e-05,
"loss": 0.2812,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14669251441955566,
"step": 310,
"valid_targets_mean": 6744.1,
"valid_targets_min": 2793
},
{
"epoch": 1.1713221601489758,
"grad_norm": 0.19672205460405268,
"learning_rate": 3.787869075843124e-05,
"loss": 0.2742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1271461695432663,
"step": 315,
"valid_targets_mean": 6725.4,
"valid_targets_min": 2520
},
{
"epoch": 1.1899441340782122,
"grad_norm": 0.21185057390384063,
"learning_rate": 3.77608194192806e-05,
"loss": 0.2768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1285659223794937,
"step": 320,
"valid_targets_mean": 6286.6,
"valid_targets_min": 2643
},
{
"epoch": 1.2085661080074488,
"grad_norm": 0.19816569561566144,
"learning_rate": 3.76399549478348e-05,
"loss": 0.278,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13813738524913788,
"step": 325,
"valid_targets_mean": 6647.1,
"valid_targets_min": 2218
},
{
"epoch": 1.2271880819366854,
"grad_norm": 0.2196810339151432,
"learning_rate": 3.75161177127122e-05,
"loss": 0.2762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14068274199962616,
"step": 330,
"valid_targets_mean": 6991.6,
"valid_targets_min": 3271
},
{
"epoch": 1.2458100558659218,
"grad_norm": 0.3172944180604243,
"learning_rate": 3.7389328583514554e-05,
"loss": 0.3238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2535311281681061,
"step": 335,
"valid_targets_mean": 5593.6,
"valid_targets_min": 1902
},
{
"epoch": 1.2644320297951583,
"grad_norm": 0.6100509748963104,
"learning_rate": 3.725960892730991e-05,
"loss": 0.3686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18261736631393433,
"step": 340,
"valid_targets_mean": 4560.6,
"valid_targets_min": 1726
},
{
"epoch": 1.2830540037243947,
"grad_norm": 0.4772667013712657,
"learning_rate": 3.712698060503178e-05,
"loss": 0.3539,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18201421201229095,
"step": 345,
"valid_targets_mean": 4523.4,
"valid_targets_min": 1801
},
{
"epoch": 1.3016759776536313,
"grad_norm": 0.273306718239659,
"learning_rate": 3.699146596779501e-05,
"loss": 0.3495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16353599727153778,
"step": 350,
"valid_targets_mean": 4178.9,
"valid_targets_min": 1509
},
{
"epoch": 1.3202979515828677,
"grad_norm": 0.24477989967733027,
"learning_rate": 3.6853087853129076e-05,
"loss": 0.3471,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1676838994026184,
"step": 355,
"valid_targets_mean": 4510.9,
"valid_targets_min": 1718
},
{
"epoch": 1.3389199255121043,
"grad_norm": 0.2650679861397239,
"learning_rate": 3.6711869581129436e-05,
"loss": 0.3419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1828334778547287,
"step": 360,
"valid_targets_mean": 5023.4,
"valid_targets_min": 1480
},
{
"epoch": 1.3575418994413408,
"grad_norm": 0.25430947626656825,
"learning_rate": 3.6567834950527463e-05,
"loss": 0.3288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14467652142047882,
"step": 365,
"valid_targets_mean": 4319.0,
"valid_targets_min": 1555
},
{
"epoch": 1.3761638733705772,
"grad_norm": 0.28579601865971616,
"learning_rate": 3.6421008234679834e-05,
"loss": 0.3391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2052292674779892,
"step": 370,
"valid_targets_mean": 5234.1,
"valid_targets_min": 1586
},
{
"epoch": 1.3947858472998138,
"grad_norm": 0.33454042737790046,
"learning_rate": 3.627141417747783e-05,
"loss": 0.3213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16307473182678223,
"step": 375,
"valid_targets_mean": 4537.7,
"valid_targets_min": 1861
},
{
"epoch": 1.4134078212290504,
"grad_norm": 0.31924227832532165,
"learning_rate": 3.611907798917743e-05,
"loss": 0.3218,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17609496414661407,
"step": 380,
"valid_targets_mean": 4638.2,
"valid_targets_min": 1752
},
{
"epoch": 1.4320297951582868,
"grad_norm": 0.3108323026088264,
"learning_rate": 3.596402534215074e-05,
"loss": 0.3201,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1662149280309677,
"step": 385,
"valid_targets_mean": 3677.5,
"valid_targets_min": 1683
},
{
"epoch": 1.4506517690875234,
"grad_norm": 0.3400629224739461,
"learning_rate": 3.580628236655955e-05,
"loss": 0.3241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1697298288345337,
"step": 390,
"valid_targets_mean": 4185.1,
"valid_targets_min": 1365
},
{
"epoch": 1.4692737430167597,
"grad_norm": 0.2692076563805843,
"learning_rate": 3.564587564595182e-05,
"loss": 0.3105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14816735684871674,
"step": 395,
"valid_targets_mean": 4103.6,
"valid_targets_min": 1774
},
{
"epoch": 1.4878957169459963,
"grad_norm": 0.2938902558984829,
"learning_rate": 3.5482832212781655e-05,
"loss": 0.3195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14214275777339935,
"step": 400,
"valid_targets_mean": 4546.2,
"valid_targets_min": 1444
},
{
"epoch": 1.5065176908752327,
"grad_norm": 0.2779669217184944,
"learning_rate": 3.5317179543853676e-05,
"loss": 0.3116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17869967222213745,
"step": 405,
"valid_targets_mean": 4563.6,
"valid_targets_min": 1730
},
{
"epoch": 1.5251396648044693,
"grad_norm": 0.3519340618992221,
"learning_rate": 3.514894555569255e-05,
"loss": 0.3168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1719929575920105,
"step": 410,
"valid_targets_mean": 5359.0,
"valid_targets_min": 1927
},
{
"epoch": 1.5437616387337059,
"grad_norm": 0.2895011222215137,
"learning_rate": 3.497815859983831e-05,
"loss": 0.3164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16711927950382233,
"step": 415,
"valid_targets_mean": 5427.6,
"valid_targets_min": 1828
},
{
"epoch": 1.5623836126629422,
"grad_norm": 0.30331945108732455,
"learning_rate": 3.4804847458068504e-05,
"loss": 0.3082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1559574156999588,
"step": 420,
"valid_targets_mean": 4616.0,
"valid_targets_min": 1635
},
{
"epoch": 1.5810055865921788,
"grad_norm": 0.24000502620110592,
"learning_rate": 3.462904133754767e-05,
"loss": 0.3025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13982824981212616,
"step": 425,
"valid_targets_mean": 5337.4,
"valid_targets_min": 2186
},
{
"epoch": 1.5996275605214154,
"grad_norm": 0.26300498381054566,
"learning_rate": 3.445076986590531e-05,
"loss": 0.2898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15317204594612122,
"step": 430,
"valid_targets_mean": 5250.1,
"valid_targets_min": 2190
},
{
"epoch": 1.6182495344506518,
"grad_norm": 0.2445293076857602,
"learning_rate": 3.427006308624282e-05,
"loss": 0.2912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13379313051700592,
"step": 435,
"valid_targets_mean": 5520.8,
"valid_targets_min": 1685
},
{
"epoch": 1.6368715083798882,
"grad_norm": 0.291611256050332,
"learning_rate": 3.408695145207058e-05,
"loss": 0.2914,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14052408933639526,
"step": 440,
"valid_targets_mean": 4284.8,
"valid_targets_min": 1917
},
{
"epoch": 1.6554934823091247,
"grad_norm": 0.26344185927865893,
"learning_rate": 3.390146582217572e-05,
"loss": 0.3026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14575421810150146,
"step": 445,
"valid_targets_mean": 4883.9,
"valid_targets_min": 2201
},
{
"epoch": 1.6741154562383613,
"grad_norm": 0.27491339530871106,
"learning_rate": 3.3713637455421694e-05,
"loss": 0.296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13534614443778992,
"step": 450,
"valid_targets_mean": 4947.1,
"valid_targets_min": 2005
},
{
"epoch": 1.6927374301675977,
"grad_norm": 0.23557643948797238,
"learning_rate": 3.352349800548039e-05,
"loss": 0.2856,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12307044118642807,
"step": 455,
"valid_targets_mean": 4546.2,
"valid_targets_min": 1287
},
{
"epoch": 1.7113594040968343,
"grad_norm": 0.23755658794564719,
"learning_rate": 3.333107951549773e-05,
"loss": 0.2116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09926241636276245,
"step": 460,
"valid_targets_mean": 5677.3,
"valid_targets_min": 2077
},
{
"epoch": 1.7299813780260709,
"grad_norm": 0.24052245151458515,
"learning_rate": 3.313641441269361e-05,
"loss": 0.194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09152091294527054,
"step": 465,
"valid_targets_mean": 5346.2,
"valid_targets_min": 1922
},
{
"epoch": 1.7486033519553073,
"grad_norm": 0.20262692112304034,
"learning_rate": 3.2939535502897075e-05,
"loss": 0.1991,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10032620280981064,
"step": 470,
"valid_targets_mean": 5635.6,
"valid_targets_min": 2843
},
{
"epoch": 1.7672253258845436,
"grad_norm": 0.2082760355908499,
"learning_rate": 3.27404759650178e-05,
"loss": 0.1914,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10044640302658081,
"step": 475,
"valid_targets_mean": 5877.1,
"valid_targets_min": 1542
},
{
"epoch": 1.7858472998137802,
"grad_norm": 0.18839411974053805,
"learning_rate": 3.253926934545459e-05,
"loss": 0.1902,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09364160150289536,
"step": 480,
"valid_targets_mean": 5282.3,
"valid_targets_min": 1863
},
{
"epoch": 1.8044692737430168,
"grad_norm": 0.2078067971535603,
"learning_rate": 3.233594955244202e-05,
"loss": 0.1863,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08583924919366837,
"step": 485,
"valid_targets_mean": 5719.1,
"valid_targets_min": 3505
},
{
"epoch": 1.8230912476722532,
"grad_norm": 0.2067606733374613,
"learning_rate": 3.213055085033607e-05,
"loss": 0.1869,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09971226006746292,
"step": 490,
"valid_targets_mean": 5770.4,
"valid_targets_min": 3200
},
{
"epoch": 1.8417132216014898,
"grad_norm": 0.22001102128513125,
"learning_rate": 3.192310785383967e-05,
"loss": 0.1879,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09830441325902939,
"step": 495,
"valid_targets_mean": 5594.3,
"valid_targets_min": 2835
},
{
"epoch": 1.8603351955307263,
"grad_norm": 0.20769566942714865,
"learning_rate": 3.1713655522169396e-05,
"loss": 0.1922,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09986946731805801,
"step": 500,
"valid_targets_mean": 5947.1,
"valid_targets_min": 2162
},
{
"epoch": 1.8789571694599627,
"grad_norm": 0.20824203625139057,
"learning_rate": 3.15022291531639e-05,
"loss": 0.1777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09173315018415451,
"step": 505,
"valid_targets_mean": 5633.9,
"valid_targets_min": 962
},
{
"epoch": 1.8975791433891993,
"grad_norm": 0.1895160390922454,
"learning_rate": 3.128886437733539e-05,
"loss": 0.1804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09285343438386917,
"step": 510,
"valid_targets_mean": 5799.0,
"valid_targets_min": 1817
},
{
"epoch": 1.916201117318436,
"grad_norm": 0.2073882558654316,
"learning_rate": 3.1073597151865e-05,
"loss": 0.1807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08280744403600693,
"step": 515,
"valid_targets_mean": 5286.2,
"valid_targets_min": 1184
},
{
"epoch": 1.9348230912476723,
"grad_norm": 0.19321114943812392,
"learning_rate": 3.085646375454317e-05,
"loss": 0.184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09179046005010605,
"step": 520,
"valid_targets_mean": 5621.4,
"valid_targets_min": 1620
},
{
"epoch": 1.9534450651769086,
"grad_norm": 0.20782347219627428,
"learning_rate": 3.0637500777655886e-05,
"loss": 0.1757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09478215128183365,
"step": 525,
"valid_targets_mean": 5574.6,
"valid_targets_min": 1495
},
{
"epoch": 1.9720670391061452,
"grad_norm": 0.1995258445427204,
"learning_rate": 3.0416745121818062e-05,
"loss": 0.1826,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10429661720991135,
"step": 530,
"valid_targets_mean": 5663.5,
"valid_targets_min": 3093
},
{
"epoch": 1.9906890130353818,
"grad_norm": 0.19212440943218573,
"learning_rate": 3.019423398975481e-05,
"loss": 0.1796,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0856226310133934,
"step": 535,
"valid_targets_mean": 5346.0,
"valid_targets_min": 1972
},
{
"epoch": 2.007448789571695,
"grad_norm": 0.2640487831217621,
"learning_rate": 2.9970004880031918e-05,
"loss": 0.2138,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1243792250752449,
"step": 540,
"valid_targets_mean": 6556.2,
"valid_targets_min": 1732
},
{
"epoch": 2.026070763500931,
"grad_norm": 0.22297988123685433,
"learning_rate": 2.974409558073641e-05,
"loss": 0.2688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1313825100660324,
"step": 545,
"valid_targets_mean": 6745.2,
"valid_targets_min": 2595
},
{
"epoch": 2.0446927374301676,
"grad_norm": 0.20812569653815188,
"learning_rate": 2.9516544163108335e-05,
"loss": 0.2521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1266535073518753,
"step": 550,
"valid_targets_mean": 6414.2,
"valid_targets_min": 1804
},
{
"epoch": 2.063314711359404,
"grad_norm": 0.21892809836030894,
"learning_rate": 2.928738897512481e-05,
"loss": 0.2519,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1333746314048767,
"step": 555,
"valid_targets_mean": 7147.8,
"valid_targets_min": 2654
},
{
"epoch": 2.0819366852886407,
"grad_norm": 0.2105458445254568,
"learning_rate": 2.90566686350375e-05,
"loss": 0.257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12962865829467773,
"step": 560,
"valid_targets_mean": 7391.1,
"valid_targets_min": 2506
},
{
"epoch": 2.100558659217877,
"grad_norm": 0.23229526744342008,
"learning_rate": 2.8824422024864427e-05,
"loss": 0.2446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0939510241150856,
"step": 565,
"valid_targets_mean": 5653.9,
"valid_targets_min": 2050
},
{
"epoch": 2.1191806331471135,
"grad_norm": 0.21648288691431336,
"learning_rate": 2.859068828383747e-05,
"loss": 0.2375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11028096079826355,
"step": 570,
"valid_targets_mean": 6577.6,
"valid_targets_min": 2180
},
{
"epoch": 2.1378026070763503,
"grad_norm": 0.24028170421075354,
"learning_rate": 2.8355506801806392e-05,
"loss": 0.2403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1300063580274582,
"step": 575,
"valid_targets_mean": 7243.6,
"valid_targets_min": 1462
},
{
"epoch": 2.1564245810055866,
"grad_norm": 0.21050370735020552,
"learning_rate": 2.8118917212600715e-05,
"loss": 0.229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09919723123311996,
"step": 580,
"valid_targets_mean": 5979.6,
"valid_targets_min": 2261
},
{
"epoch": 2.175046554934823,
"grad_norm": 0.2074491330303158,
"learning_rate": 2.7880959387350458e-05,
"loss": 0.2248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10763826221227646,
"step": 585,
"valid_targets_mean": 6526.0,
"valid_targets_min": 2889
},
{
"epoch": 2.1936685288640594,
"grad_norm": 0.23033689702375226,
"learning_rate": 2.7641673427766847e-05,
"loss": 0.2338,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11596024036407471,
"step": 590,
"valid_targets_mean": 6637.4,
"valid_targets_min": 2198
},
{
"epoch": 2.212290502793296,
"grad_norm": 0.32105132805524217,
"learning_rate": 2.740109965938423e-05,
"loss": 0.2302,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11476022750139236,
"step": 595,
"valid_targets_mean": 6630.0,
"valid_targets_min": 2447
},
{
"epoch": 2.2309124767225326,
"grad_norm": 0.22409633977119747,
"learning_rate": 2.715927862476421e-05,
"loss": 0.2317,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10554740577936172,
"step": 600,
"valid_targets_mean": 6784.3,
"valid_targets_min": 2018
},
{
"epoch": 2.249534450651769,
"grad_norm": 0.2699928714484659,
"learning_rate": 2.6916251076663252e-05,
"loss": 0.274,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1415012925863266,
"step": 605,
"valid_targets_mean": 4402.8,
"valid_targets_min": 1761
},
{
"epoch": 2.2681564245810057,
"grad_norm": 0.3721989864420147,
"learning_rate": 2.667205797116484e-05,
"loss": 0.2939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13999386131763458,
"step": 610,
"valid_targets_mean": 4164.7,
"valid_targets_min": 1572
},
{
"epoch": 2.286778398510242,
"grad_norm": 0.3223866147310536,
"learning_rate": 2.642674046077737e-05,
"loss": 0.2744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12863564491271973,
"step": 615,
"valid_targets_mean": 4361.9,
"valid_targets_min": 1692
},
{
"epoch": 2.3054003724394785,
"grad_norm": 0.31856607726998415,
"learning_rate": 2.618033988749895e-05,
"loss": 0.2649,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13214845955371857,
"step": 620,
"valid_targets_mean": 4420.2,
"valid_targets_min": 1890
},
{
"epoch": 2.3240223463687153,
"grad_norm": 0.32666505138818375,
"learning_rate": 2.5932897775850276e-05,
"loss": 0.2591,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12275639921426773,
"step": 625,
"valid_targets_mean": 4191.0,
"valid_targets_min": 1736
},
{
"epoch": 2.3426443202979517,
"grad_norm": 0.35120524548526266,
"learning_rate": 2.568445582587672e-05,
"loss": 0.2585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1437913030385971,
"step": 630,
"valid_targets_mean": 5161.3,
"valid_targets_min": 1889
},
{
"epoch": 2.361266294227188,
"grad_norm": 0.31946963446152915,
"learning_rate": 2.5435055906120837e-05,
"loss": 0.2389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11732056736946106,
"step": 635,
"valid_targets_mean": 4429.2,
"valid_targets_min": 1278
},
{
"epoch": 2.3798882681564244,
"grad_norm": 0.3289667519465328,
"learning_rate": 2.5184740046566537e-05,
"loss": 0.2651,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12818960845470428,
"step": 640,
"valid_targets_mean": 4688.0,
"valid_targets_min": 1434
},
{
"epoch": 2.398510242085661,
"grad_norm": 0.33406726170658674,
"learning_rate": 2.4933550431555973e-05,
"loss": 0.2483,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14254990220069885,
"step": 645,
"valid_targets_mean": 4809.8,
"valid_targets_min": 1383
},
{
"epoch": 2.4171322160148976,
"grad_norm": 0.3212253397329,
"learning_rate": 2.468152939268044e-05,
"loss": 0.2535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12575781345367432,
"step": 650,
"valid_targets_mean": 4856.3,
"valid_targets_min": 1622
},
{
"epoch": 2.435754189944134,
"grad_norm": 0.4193177434942788,
"learning_rate": 2.4428719401646494e-05,
"loss": 0.2432,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13185112178325653,
"step": 655,
"valid_targets_mean": 4368.7,
"valid_targets_min": 1635
},
{
"epoch": 2.4543761638733708,
"grad_norm": 0.3218928559582912,
"learning_rate": 2.4175163063118416e-05,
"loss": 0.2479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11782893538475037,
"step": 660,
"valid_targets_mean": 4609.1,
"valid_targets_min": 1411
},
{
"epoch": 2.472998137802607,
"grad_norm": 0.39047332217518843,
"learning_rate": 2.392090310753829e-05,
"loss": 0.2326,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11099883168935776,
"step": 665,
"valid_targets_mean": 3889.4,
"valid_targets_min": 1389
},
{
"epoch": 2.4916201117318435,
"grad_norm": 0.35689901255313794,
"learning_rate": 2.366598238392487e-05,
"loss": 0.2395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11990585923194885,
"step": 670,
"valid_targets_mean": 4407.1,
"valid_targets_min": 1660
},
{
"epoch": 2.51024208566108,
"grad_norm": 0.3515897774313373,
"learning_rate": 2.341044385265248e-05,
"loss": 0.2448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12714000046253204,
"step": 675,
"valid_targets_mean": 4711.8,
"valid_targets_min": 1891
},
{
"epoch": 2.5288640595903167,
"grad_norm": 0.32159496474421606,
"learning_rate": 2.315433057821113e-05,
"loss": 0.2418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1311168670654297,
"step": 680,
"valid_targets_mean": 5252.6,
"valid_targets_min": 2010
},
{
"epoch": 2.547486033519553,
"grad_norm": 0.2942349994028514,
"learning_rate": 2.289768572194913e-05,
"loss": 0.265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14049138128757477,
"step": 685,
"valid_targets_mean": 5910.4,
"valid_targets_min": 2167
},
{
"epoch": 2.5661080074487894,
"grad_norm": 0.2829531013214266,
"learning_rate": 2.26405525347993e-05,
"loss": 0.2454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10512509196996689,
"step": 690,
"valid_targets_mean": 4626.5,
"valid_targets_min": 2055
},
{
"epoch": 2.5847299813780262,
"grad_norm": 0.29066488817804154,
"learning_rate": 2.238297434999016e-05,
"loss": 0.2397,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12394720315933228,
"step": 695,
"valid_targets_mean": 5047.3,
"valid_targets_min": 1900
},
{
"epoch": 2.6033519553072626,
"grad_norm": 0.31069669583595805,
"learning_rate": 2.212499457574321e-05,
"loss": 0.2313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12280986458063126,
"step": 700,
"valid_targets_mean": 4993.0,
"valid_targets_min": 1957
},
{
"epoch": 2.621973929236499,
"grad_norm": 0.2847297085592331,
"learning_rate": 2.1866656687957607e-05,
"loss": 0.2297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12000254541635513,
"step": 705,
"valid_targets_mean": 4902.9,
"valid_targets_min": 2061
},
{
"epoch": 2.6405959031657353,
"grad_norm": 0.2907401533736448,
"learning_rate": 2.160800422288338e-05,
"loss": 0.233,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1304769217967987,
"step": 710,
"valid_targets_mean": 5751.9,
"valid_targets_min": 2160
},
{
"epoch": 2.659217877094972,
"grad_norm": 0.2859926818666239,
"learning_rate": 2.134908076978452e-05,
"loss": 0.2397,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11222833395004272,
"step": 715,
"valid_targets_mean": 5082.8,
"valid_targets_min": 1988
},
{
"epoch": 2.6778398510242085,
"grad_norm": 0.27206168365644984,
"learning_rate": 2.1089929963593126e-05,
"loss": 0.2315,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.101720429956913,
"step": 720,
"valid_targets_mean": 5274.8,
"valid_targets_min": 1647
},
{
"epoch": 2.6964618249534453,
"grad_norm": 0.3043304137788501,
"learning_rate": 2.0830595477555864e-05,
"loss": 0.225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07684291154146194,
"step": 725,
"valid_targets_mean": 5933.8,
"valid_targets_min": 3347
},
{
"epoch": 2.7150837988826817,
"grad_norm": 0.2402351751603468,
"learning_rate": 2.0571121015873924e-05,
"loss": 0.1535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07310352474451065,
"step": 730,
"valid_targets_mean": 5624.6,
"valid_targets_min": 2049
},
{
"epoch": 2.733705772811918,
"grad_norm": 0.24128869777290585,
"learning_rate": 2.031155030633784e-05,
"loss": 0.1558,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07828167825937271,
"step": 735,
"valid_targets_mean": 5613.7,
"valid_targets_min": 1873
},
{
"epoch": 2.7523277467411544,
"grad_norm": 0.2424343463905981,
"learning_rate": 2.005192709295824e-05,
"loss": 0.15,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07069131731987,
"step": 740,
"valid_targets_mean": 5804.0,
"valid_targets_min": 2140
},
{
"epoch": 2.770949720670391,
"grad_norm": 0.23424931942920957,
"learning_rate": 1.979229512859395e-05,
"loss": 0.1465,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0725865438580513,
"step": 745,
"valid_targets_mean": 5391.5,
"valid_targets_min": 2179
},
{
"epoch": 2.7895716945996276,
"grad_norm": 0.21095459317824178,
"learning_rate": 1.953269816757853e-05,
"loss": 0.141,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.060710322111845016,
"step": 750,
"valid_targets_mean": 5356.8,
"valid_targets_min": 2059
},
{
"epoch": 2.808193668528864,
"grad_norm": 0.20598792165943527,
"learning_rate": 1.9273179958346646e-05,
"loss": 0.1421,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07347074151039124,
"step": 755,
"valid_targets_mean": 5635.1,
"valid_targets_min": 3283
},
{
"epoch": 2.826815642458101,
"grad_norm": 0.22337773162145105,
"learning_rate": 1.9013784236061337e-05,
"loss": 0.1466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08201978355646133,
"step": 760,
"valid_targets_mean": 5853.5,
"valid_targets_min": 2873
},
{
"epoch": 2.845437616387337,
"grad_norm": 0.2198077783372861,
"learning_rate": 1.875455471524362e-05,
"loss": 0.1435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06954735517501831,
"step": 765,
"valid_targets_mean": 5576.1,
"valid_targets_min": 2043
},
{
"epoch": 2.8640595903165735,
"grad_norm": 0.20508966703816744,
"learning_rate": 1.8495535082405476e-05,
"loss": 0.1461,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06442942470312119,
"step": 770,
"valid_targets_mean": 5982.4,
"valid_targets_min": 2448
},
{
"epoch": 2.88268156424581,
"grad_norm": 0.23390173228904518,
"learning_rate": 1.8236768988687665e-05,
"loss": 0.1345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06609448045492172,
"step": 775,
"valid_targets_mean": 5523.9,
"valid_targets_min": 2423
},
{
"epoch": 2.9013035381750467,
"grad_norm": 0.2498082897395668,
"learning_rate": 1.797830004250338e-05,
"loss": 0.1395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07419461756944656,
"step": 780,
"valid_targets_mean": 5512.2,
"valid_targets_min": 1837
},
{
"epoch": 2.919925512104283,
"grad_norm": 0.23855742502441213,
"learning_rate": 1.772017180218919e-05,
"loss": 0.1342,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06143729388713837,
"step": 785,
"valid_targets_mean": 5664.5,
"valid_targets_min": 2283
},
{
"epoch": 2.9385474860335195,
"grad_norm": 0.2311888603873581,
"learning_rate": 1.746242776866441e-05,
"loss": 0.1369,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06190266087651253,
"step": 790,
"valid_targets_mean": 5318.5,
"valid_targets_min": 2009
},
{
"epoch": 2.9571694599627563,
"grad_norm": 0.2385347395394115,
"learning_rate": 1.7205111378100097e-05,
"loss": 0.1344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0655934065580368,
"step": 795,
"valid_targets_mean": 5128.0,
"valid_targets_min": 2322
},
{
"epoch": 2.9757914338919926,
"grad_norm": 0.22210726395021782,
"learning_rate": 1.6948265994599042e-05,
"loss": 0.1346,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06535261124372482,
"step": 800,
"valid_targets_mean": 5618.6,
"valid_targets_min": 2370
},
{
"epoch": 2.994413407821229,
"grad_norm": 0.22478461073835102,
"learning_rate": 1.669193490288781e-05,
"loss": 0.1364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07586240768432617,
"step": 805,
"valid_targets_mean": 5800.1,
"valid_targets_min": 1139
},
{
"epoch": 3.011173184357542,
"grad_norm": 0.3923546745207127,
"learning_rate": 1.6436161301022215e-05,
"loss": 0.1839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12482082843780518,
"step": 810,
"valid_targets_mean": 7708.5,
"valid_targets_min": 2114
},
{
"epoch": 3.0297951582867784,
"grad_norm": 0.2769532328786158,
"learning_rate": 1.618098829310744e-05,
"loss": 0.2342,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11922207474708557,
"step": 815,
"valid_targets_mean": 7049.1,
"valid_targets_min": 1582
},
{
"epoch": 3.0484171322160147,
"grad_norm": 0.2181540258492701,
"learning_rate": 1.5926458882033876e-05,
"loss": 0.2119,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11395502835512161,
"step": 820,
"valid_targets_mean": 6744.9,
"valid_targets_min": 2415
},
{
"epoch": 3.0670391061452515,
"grad_norm": 0.2193962929759941,
"learning_rate": 1.567261596223011e-05,
"loss": 0.2119,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12162897735834122,
"step": 825,
"valid_targets_mean": 7223.4,
"valid_targets_min": 1917
},
{
"epoch": 3.085661080074488,
"grad_norm": 0.20490169049334603,
"learning_rate": 1.5419502312434177e-05,
"loss": 0.2146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10778558254241943,
"step": 830,
"valid_targets_mean": 6793.6,
"valid_targets_min": 2699
},
{
"epoch": 3.1042830540037243,
"grad_norm": 0.2229350850738388,
"learning_rate": 1.5167160588484287e-05,
"loss": 0.2006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08931157737970352,
"step": 835,
"valid_targets_mean": 6319.4,
"valid_targets_min": 2331
},
{
"epoch": 3.122905027932961,
"grad_norm": 0.22327323724804218,
"learning_rate": 1.4915633316130267e-05,
"loss": 0.2012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09623245149850845,
"step": 840,
"valid_targets_mean": 6464.3,
"valid_targets_min": 2301
},
{
"epoch": 3.1415270018621975,
"grad_norm": 0.2268340495199551,
"learning_rate": 1.4664962883866936e-05,
"loss": 0.1997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0771939679980278,
"step": 845,
"valid_targets_mean": 5905.9,
"valid_targets_min": 1789
},
{
"epoch": 3.160148975791434,
"grad_norm": 0.2326664565154385,
"learning_rate": 1.4415191535790605e-05,
"loss": 0.1908,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09210383892059326,
"step": 850,
"valid_targets_mean": 6846.8,
"valid_targets_min": 2291
},
{
"epoch": 3.17877094972067,
"grad_norm": 0.23061554940146764,
"learning_rate": 1.4166361364479946e-05,
"loss": 0.1819,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09809307008981705,
"step": 855,
"valid_targets_mean": 6919.4,
"valid_targets_min": 1723
},
{
"epoch": 3.197392923649907,
"grad_norm": 0.24843559414613484,
"learning_rate": 1.3918514303902307e-05,
"loss": 0.1955,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10550827533006668,
"step": 860,
"valid_targets_mean": 7691.2,
"valid_targets_min": 2510
},
{
"epoch": 3.2160148975791434,
"grad_norm": 0.22805182583679512,
"learning_rate": 1.3671692122346843e-05,
"loss": 0.1935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09740543365478516,
"step": 865,
"valid_targets_mean": 6916.7,
"valid_targets_min": 2234
},
{
"epoch": 3.2346368715083798,
"grad_norm": 0.22827913971204664,
"learning_rate": 1.3425936415385557e-05,
"loss": 0.1911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10098141431808472,
"step": 870,
"valid_targets_mean": 6807.6,
"valid_targets_min": 2726
},
{
"epoch": 3.2532588454376166,
"grad_norm": 0.31207084774804683,
"learning_rate": 1.318128859886339e-05,
"loss": 0.2223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12020784616470337,
"step": 875,
"valid_targets_mean": 4461.2,
"valid_targets_min": 1463
},
{
"epoch": 3.271880819366853,
"grad_norm": 0.3393354652534016,
"learning_rate": 1.2937789901918671e-05,
"loss": 0.2344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1211516484618187,
"step": 880,
"valid_targets_mean": 4668.2,
"valid_targets_min": 1743
},
{
"epoch": 3.2905027932960893,
"grad_norm": 0.3629688521529729,
"learning_rate": 1.2695481360034978e-05,
"loss": 0.2131,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10855499655008316,
"step": 885,
"valid_targets_mean": 4609.4,
"valid_targets_min": 1633
},
{
"epoch": 3.3091247672253257,
"grad_norm": 0.3504360151541152,
"learning_rate": 1.245440380812566e-05,
"loss": 0.2012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10300320386886597,
"step": 890,
"valid_targets_mean": 4465.0,
"valid_targets_min": 2085
},
{
"epoch": 3.3277467411545625,
"grad_norm": 0.32251798545786803,
"learning_rate": 1.2214597873652172e-05,
"loss": 0.1936,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0905773714184761,
"step": 895,
"valid_targets_mean": 4838.2,
"valid_targets_min": 1799
},
{
"epoch": 3.346368715083799,
"grad_norm": 0.3336622000100127,
"learning_rate": 1.1976103969777336e-05,
"loss": 0.1995,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09432216733694077,
"step": 900,
"valid_targets_mean": 4426.6,
"valid_targets_min": 1714
},
{
"epoch": 3.364990689013035,
"grad_norm": 0.3929612418575331,
"learning_rate": 1.1738962288554745e-05,
"loss": 0.1841,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0892234817147255,
"step": 905,
"valid_targets_mean": 4027.8,
"valid_targets_min": 1946
},
{
"epoch": 3.383612662942272,
"grad_norm": 0.3446626418867246,
"learning_rate": 1.1503212794155406e-05,
"loss": 0.1979,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0939427986741066,
"step": 910,
"valid_targets_mean": 4722.4,
"valid_targets_min": 1696
},
{
"epoch": 3.4022346368715084,
"grad_norm": 0.35774264340118445,
"learning_rate": 1.1268895216132818e-05,
"loss": 0.1957,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11662470549345016,
"step": 915,
"valid_targets_mean": 4746.3,
"valid_targets_min": 1884
},
{
"epoch": 3.4208566108007448,
"grad_norm": 0.34279937816191636,
"learning_rate": 1.1036049042727557e-05,
"loss": 0.185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08681117743253708,
"step": 920,
"valid_targets_mean": 4164.0,
"valid_targets_min": 1738
},
{
"epoch": 3.439478584729981,
"grad_norm": 0.3763744700811407,
"learning_rate": 1.0804713514212554e-05,
"loss": 0.1833,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09903687238693237,
"step": 925,
"valid_targets_mean": 4449.6,
"valid_targets_min": 2173
},
{
"epoch": 3.458100558659218,
"grad_norm": 0.35837853241908835,
"learning_rate": 1.0574927616280139e-05,
"loss": 0.1858,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09481087327003479,
"step": 930,
"valid_targets_mean": 4874.1,
"valid_targets_min": 2117
},
{
"epoch": 3.4767225325884543,
"grad_norm": 0.40429914472262846,
"learning_rate": 1.0346730073471993e-05,
"loss": 0.1807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11803603172302246,
"step": 935,
"valid_targets_mean": 5221.1,
"valid_targets_min": 2154
},
{
"epoch": 3.4953445065176907,
"grad_norm": 0.3711739680040434,
"learning_rate": 1.0120159342653153e-05,
"loss": 0.1773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08699009567499161,
"step": 940,
"valid_targets_mean": 4491.3,
"valid_targets_min": 1967
},
{
"epoch": 3.5139664804469275,
"grad_norm": 0.37363867967479586,
"learning_rate": 9.895253606531038e-06,
"loss": 0.1831,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08354979753494263,
"step": 945,
"valid_targets_mean": 4046.6,
"valid_targets_min": 1432
},
{
"epoch": 3.532588454376164,
"grad_norm": 0.44263358663951935,
"learning_rate": 9.672050767220765e-06,
"loss": 0.2038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11277249455451965,
"step": 950,
"valid_targets_mean": 5051.2,
"valid_targets_min": 1855
},
{
"epoch": 3.5512104283054002,
"grad_norm": 0.3336089325731764,
"learning_rate": 9.450588439857697e-06,
"loss": 0.2213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11583083122968674,
"step": 955,
"valid_targets_mean": 5088.0,
"valid_targets_min": 1865
},
{
"epoch": 3.5698324022346366,
"grad_norm": 0.29491303529078194,
"learning_rate": 9.230903946258391e-06,
"loss": 0.1964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11058211326599121,
"step": 960,
"valid_targets_mean": 5648.7,
"valid_targets_min": 1663
},
{
"epoch": 3.5884543761638734,
"grad_norm": 0.27522508073944885,
"learning_rate": 9.013034308630945e-06,
"loss": 0.1878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08331070095300674,
"step": 965,
"valid_targets_mean": 4394.6,
"valid_targets_min": 2021
},
{
"epoch": 3.60707635009311,
"grad_norm": 0.30907108754902735,
"learning_rate": 8.79701624333585e-06,
"loss": 0.1864,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09504259377717972,
"step": 970,
"valid_targets_mean": 4734.1,
"valid_targets_min": 2147
},
{
"epoch": 3.6256983240223466,
"grad_norm": 0.3078382923065214,
"learning_rate": 8.582886154698407e-06,
"loss": 0.1727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08389315009117126,
"step": 975,
"valid_targets_mean": 4702.8,
"valid_targets_min": 1900
},
{
"epoch": 3.644320297951583,
"grad_norm": 0.2927314109800218,
"learning_rate": 8.370680128873679e-06,
"loss": 0.1825,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08893483877182007,
"step": 980,
"valid_targets_mean": 5112.3,
"valid_targets_min": 2065
},
{
"epoch": 3.6629422718808193,
"grad_norm": 0.3049682447465429,
"learning_rate": 8.160433927765097e-06,
"loss": 0.1907,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08556380122900009,
"step": 985,
"valid_targets_mean": 5058.2,
"valid_targets_min": 2522
},
{
"epoch": 3.6815642458100557,
"grad_norm": 0.2770762255480885,
"learning_rate": 7.952182982997743e-06,
"loss": 0.1779,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09861546754837036,
"step": 990,
"valid_targets_mean": 5131.3,
"valid_targets_min": 2161
},
{
"epoch": 3.7001862197392925,
"grad_norm": 0.3549830142853625,
"learning_rate": 7.745962389947195e-06,
"loss": 0.1668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06256135553121567,
"step": 995,
"valid_targets_mean": 5891.1,
"valid_targets_min": 1641
},
{
"epoch": 3.718808193668529,
"grad_norm": 0.28452100847613737,
"learning_rate": 7.541806901825141e-06,
"loss": 0.1246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06664276123046875,
"step": 1000,
"valid_targets_mean": 6142.4,
"valid_targets_min": 2831
},
{
"epoch": 3.7374301675977653,
"grad_norm": 0.2744053870236639,
"learning_rate": 7.339750923822595e-06,
"loss": 0.1267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06239338591694832,
"step": 1005,
"valid_targets_mean": 5398.8,
"valid_targets_min": 2088
},
{
"epoch": 3.756052141527002,
"grad_norm": 0.23916132063233655,
"learning_rate": 7.139828507311792e-06,
"loss": 0.1192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06390925496816635,
"step": 1010,
"valid_targets_mean": 5946.6,
"valid_targets_min": 1659
},
{
"epoch": 3.7746741154562384,
"grad_norm": 0.23032557623436256,
"learning_rate": 6.942073344107682e-06,
"loss": 0.1109,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05467592179775238,
"step": 1015,
"valid_targets_mean": 5572.2,
"valid_targets_min": 1718
},
{
"epoch": 3.793296089385475,
"grad_norm": 0.24678198189145398,
"learning_rate": 6.746518760790071e-06,
"loss": 0.1107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06743580102920532,
"step": 1020,
"valid_targets_mean": 5919.4,
"valid_targets_min": 1512
},
{
"epoch": 3.811918063314711,
"grad_norm": 0.2381354700325367,
"learning_rate": 6.553197713087227e-06,
"loss": 0.1094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0567602775990963,
"step": 1025,
"valid_targets_mean": 5579.0,
"valid_targets_min": 3195
},
{
"epoch": 3.830540037243948,
"grad_norm": 0.2276090324661052,
"learning_rate": 6.3621427803220735e-06,
"loss": 0.1102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05728829279541969,
"step": 1030,
"valid_targets_mean": 5613.9,
"valid_targets_min": 2897
},
{
"epoch": 3.8491620111731844,
"grad_norm": 0.21642846836817936,
"learning_rate": 6.173386159921766e-06,
"loss": 0.1133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06424218416213989,
"step": 1035,
"valid_targets_mean": 5535.1,
"valid_targets_min": 1371
},
{
"epoch": 3.8677839851024207,
"grad_norm": 0.23163902556844526,
"learning_rate": 5.98695966199163e-06,
"loss": 0.1126,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048331666737794876,
"step": 1040,
"valid_targets_mean": 5154.7,
"valid_targets_min": 2315
},
{
"epoch": 3.8864059590316575,
"grad_norm": 0.21070714474137134,
"learning_rate": 5.802894703954382e-06,
"loss": 0.1032,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05012443661689758,
"step": 1045,
"valid_targets_mean": 5605.2,
"valid_targets_min": 1984
},
{
"epoch": 3.905027932960894,
"grad_norm": 0.22853410654463693,
"learning_rate": 5.621222305255554e-06,
"loss": 0.11,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05816968157887459,
"step": 1050,
"valid_targets_mean": 6049.4,
"valid_targets_min": 2764
},
{
"epoch": 3.9236499068901303,
"grad_norm": 0.2379166167854269,
"learning_rate": 5.441973082135907e-06,
"loss": 0.1028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05406898260116577,
"step": 1055,
"valid_targets_mean": 5502.0,
"valid_targets_min": 2260
},
{
"epoch": 3.9422718808193666,
"grad_norm": 0.20973140421298134,
"learning_rate": 5.265177242471899e-06,
"loss": 0.1041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05013309791684151,
"step": 1060,
"valid_targets_mean": 5478.0,
"valid_targets_min": 1855
},
{
"epoch": 3.9608938547486034,
"grad_norm": 0.22236062413735788,
"learning_rate": 5.09086458068488e-06,
"loss": 0.1007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05143527686595917,
"step": 1065,
"valid_targets_mean": 5612.8,
"valid_targets_min": 2468
},
{
"epoch": 3.97951582867784,
"grad_norm": 0.21206442494809297,
"learning_rate": 4.919064472720014e-06,
"loss": 0.1031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05190473794937134,
"step": 1070,
"valid_targets_mean": 5816.8,
"valid_targets_min": 1834
},
{
"epoch": 3.998137802607076,
"grad_norm": 0.20629239879718422,
"learning_rate": 4.749805871095732e-06,
"loss": 0.1029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.046846091747283936,
"step": 1075,
"valid_targets_mean": 5753.0,
"valid_targets_min": 1235
},
{
"epoch": 4.01489757914339,
"grad_norm": 0.5308448838529755,
"learning_rate": 4.5831173000245e-06,
"loss": 0.1836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12533260881900787,
"step": 1080,
"valid_targets_mean": 7661.5,
"valid_targets_min": 2439
},
{
"epoch": 4.033519553072626,
"grad_norm": 0.24098689788800592,
"learning_rate": 4.4190268506058074e-06,
"loss": 0.2018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09956145286560059,
"step": 1085,
"valid_targets_mean": 6907.5,
"valid_targets_min": 2738
},
{
"epoch": 4.052141527001862,
"grad_norm": 0.24383435515867613,
"learning_rate": 4.257562176092127e-06,
"loss": 0.1818,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0828259289264679,
"step": 1090,
"valid_targets_mean": 6410.7,
"valid_targets_min": 2445
},
{
"epoch": 4.070763500931099,
"grad_norm": 0.2163950525445711,
"learning_rate": 4.098750487228653e-06,
"loss": 0.1885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08502789586782455,
"step": 1095,
"valid_targets_mean": 6399.4,
"valid_targets_min": 1929
},
{
"epoch": 4.089385474860335,
"grad_norm": 0.21083241045721227,
"learning_rate": 3.942618547667656e-06,
"loss": 0.1849,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07837125658988953,
"step": 1100,
"valid_targets_mean": 6164.7,
"valid_targets_min": 1823
},
{
"epoch": 4.1080074487895715,
"grad_norm": 0.20466030093676923,
"learning_rate": 3.7891926694581216e-06,
"loss": 0.1652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07821375131607056,
"step": 1105,
"valid_targets_mean": 6921.8,
"valid_targets_min": 2809
},
{
"epoch": 4.126629422718808,
"grad_norm": 0.22208208771018614,
"learning_rate": 3.6384987086115353e-06,
"loss": 0.1719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09721019864082336,
"step": 1110,
"valid_targets_mean": 6955.7,
"valid_targets_min": 2131
},
{
"epoch": 4.145251396648045,
"grad_norm": 0.2135551173088567,
"learning_rate": 3.49056206074452e-06,
"loss": 0.171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08980279415845871,
"step": 1115,
"valid_targets_mean": 7009.1,
"valid_targets_min": 1790
},
{
"epoch": 4.1638733705772815,
"grad_norm": 0.22700578037823518,
"learning_rate": 3.345407656799058e-06,
"loss": 0.158,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07464779168367386,
"step": 1120,
"valid_targets_mean": 6647.1,
"valid_targets_min": 1889
},
{
"epoch": 4.182495344506518,
"grad_norm": 0.2492117954904908,
"learning_rate": 3.203059958840999e-06,
"loss": 0.1538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08767087012529373,
"step": 1125,
"valid_targets_mean": 7243.6,
"valid_targets_min": 2074
},
{
"epoch": 4.201117318435754,
"grad_norm": 0.25428629048582396,
"learning_rate": 3.063542955937615e-06,
"loss": 0.1654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09225225448608398,
"step": 1130,
"valid_targets_mean": 7255.7,
"valid_targets_min": 1549
},
{
"epoch": 4.219739292364991,
"grad_norm": 0.25094232783984594,
"learning_rate": 2.9268801601148555e-06,
"loss": 0.1654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07944104820489883,
"step": 1135,
"valid_targets_mean": 7046.5,
"valid_targets_min": 1799
},
{
"epoch": 4.238361266294227,
"grad_norm": 0.24866583116101532,
"learning_rate": 2.793094602395008e-06,
"loss": 0.1565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06737985461950302,
"step": 1140,
"valid_targets_mean": 5729.9,
"valid_targets_min": 1609
},
{
"epoch": 4.256983240223463,
"grad_norm": 0.39758422193173765,
"learning_rate": 2.6622088289153804e-06,
"loss": 0.1931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09932229667901993,
"step": 1145,
"valid_targets_mean": 4044.7,
"valid_targets_min": 1709
},
{
"epoch": 4.275605214152701,
"grad_norm": 0.3591756238366576,
"learning_rate": 2.534244897128748e-06,
"loss": 0.1886,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0849931463599205,
"step": 1150,
"valid_targets_mean": 4355.0,
"valid_targets_min": 1978
},
{
"epoch": 4.294227188081937,
"grad_norm": 0.31402517638422384,
"learning_rate": 2.4092243720861276e-06,
"loss": 0.1809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12222952395677567,
"step": 1155,
"valid_targets_mean": 5516.1,
"valid_targets_min": 1489
},
{
"epoch": 4.312849162011173,
"grad_norm": 0.33093787158415533,
"learning_rate": 2.287168322802533e-06,
"loss": 0.1612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09058675169944763,
"step": 1160,
"valid_targets_mean": 4852.4,
"valid_targets_min": 2071
},
{
"epoch": 4.33147113594041,
"grad_norm": 0.29044590186071456,
"learning_rate": 2.1680973187063415e-06,
"loss": 0.1524,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07333887368440628,
"step": 1165,
"valid_targets_mean": 4316.4,
"valid_targets_min": 1565
},
{
"epoch": 4.350093109869646,
"grad_norm": 0.310482032598639,
"learning_rate": 2.0520314261728357e-06,
"loss": 0.1617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07164377719163895,
"step": 1170,
"valid_targets_mean": 4140.9,
"valid_targets_min": 1878
},
{
"epoch": 4.368715083798882,
"grad_norm": 0.29736588517519846,
"learning_rate": 1.938990205142526e-06,
"loss": 0.1449,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07602109760046005,
"step": 1175,
"valid_targets_mean": 4135.1,
"valid_targets_min": 1695
},
{
"epoch": 4.387337057728119,
"grad_norm": 0.3160724687733261,
"learning_rate": 1.8289927058248325e-06,
"loss": 0.1615,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07650075107812881,
"step": 1180,
"valid_targets_mean": 4301.7,
"valid_targets_min": 1418
},
{
"epoch": 4.405959031657356,
"grad_norm": 0.28031984965761936,
"learning_rate": 1.7220574654876453e-06,
"loss": 0.1528,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07221689075231552,
"step": 1185,
"valid_targets_mean": 4130.4,
"valid_targets_min": 1475
},
{
"epoch": 4.424581005586592,
"grad_norm": 0.3076875162786684,
"learning_rate": 1.6182025053333595e-06,
"loss": 0.145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06376394629478455,
"step": 1190,
"valid_targets_mean": 3774.1,
"valid_targets_min": 1587
},
{
"epoch": 4.443202979515829,
"grad_norm": 0.2836853100448112,
"learning_rate": 1.5174453274618416e-06,
"loss": 0.144,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0705125704407692,
"step": 1195,
"valid_targets_mean": 4669.5,
"valid_targets_min": 1641
},
{
"epoch": 4.461824953445065,
"grad_norm": 0.31779131062906657,
"learning_rate": 1.4198029119209112e-06,
"loss": 0.1476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06056446209549904,
"step": 1200,
"valid_targets_mean": 3839.8,
"valid_targets_min": 1878
},
{
"epoch": 4.4804469273743015,
"grad_norm": 0.2857165674407037,
"learning_rate": 1.325291713844785e-06,
"loss": 0.144,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06362733244895935,
"step": 1205,
"valid_targets_mean": 4157.1,
"valid_targets_min": 1513
},
{
"epoch": 4.499068901303538,
"grad_norm": 0.29043499467358436,
"learning_rate": 1.2339276606809824e-06,
"loss": 0.142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07273241877555847,
"step": 1210,
"valid_targets_mean": 4532.7,
"valid_targets_min": 2162
},
{
"epoch": 4.517690875232775,
"grad_norm": 0.27187077336242454,
"learning_rate": 1.145726149506161e-06,
"loss": 0.1403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.058025311678647995,
"step": 1215,
"valid_targets_mean": 3950.7,
"valid_targets_min": 1411
},
{
"epoch": 4.5363128491620115,
"grad_norm": 0.4861947344266717,
"learning_rate": 1.0607020444313431e-06,
"loss": 0.1827,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0979158952832222,
"step": 1220,
"valid_targets_mean": 5058.6,
"valid_targets_min": 1870
},
{
"epoch": 4.554934823091248,
"grad_norm": 0.32419104546438365,
"learning_rate": 9.788696740969295e-07,
"loss": 0.1901,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0731668546795845,
"step": 1225,
"valid_targets_mean": 4897.2,
"valid_targets_min": 1573
},
{
"epoch": 4.573556797020484,
"grad_norm": 0.29019151441664376,
"learning_rate": 9.002428292579912e-07,
"loss": 0.1748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0815751776099205,
"step": 1230,
"valid_targets_mean": 4843.5,
"valid_targets_min": 2196
},
{
"epoch": 4.592178770949721,
"grad_norm": 0.28619939519110477,
"learning_rate": 8.248347604601803e-07,
"loss": 0.1565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06282777339220047,
"step": 1235,
"valid_targets_mean": 4174.7,
"valid_targets_min": 1942
},
{
"epoch": 4.610800744878957,
"grad_norm": 0.24908229050606268,
"learning_rate": 7.526581758066931e-07,
"loss": 0.1613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06715219467878342,
"step": 1240,
"valid_targets_mean": 4759.8,
"valid_targets_min": 1873
},
{
"epoch": 4.629422718808193,
"grad_norm": 0.25880591606207554,
"learning_rate": 6.837252388166416e-07,
"loss": 0.1443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07651010155677795,
"step": 1245,
"valid_targets_mean": 5333.6,
"valid_targets_min": 2002
},
{
"epoch": 4.648044692737431,
"grad_norm": 0.2497181360656177,
"learning_rate": 6.180475663752106e-07,
"loss": 0.1587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07430170476436615,
"step": 1250,
"valid_targets_mean": 4628.3,
"valid_targets_min": 1927
},
{
"epoch": 4.666666666666667,
"grad_norm": 0.23020108011760998,
"learning_rate": 5.556362267759153e-07,
"loss": 0.1574,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07936949282884598,
"step": 1255,
"valid_targets_mean": 5335.1,
"valid_targets_min": 1892
},
{
"epoch": 4.685288640595903,
"grad_norm": 0.2454009759799112,
"learning_rate": 4.965017378553349e-07,
"loss": 0.1524,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09208754450082779,
"step": 1260,
"valid_targets_mean": 5511.2,
"valid_targets_min": 1992
},
{
"epoch": 4.70391061452514,
"grad_norm": 0.3235628120281909,
"learning_rate": 4.4065406522059374e-07,
"loss": 0.1284,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05675048008561134,
"step": 1265,
"valid_targets_mean": 5457.9,
"valid_targets_min": 1944
},
{
"epoch": 4.722532588454376,
"grad_norm": 0.3334445932900592,
"learning_rate": 3.8810262056991676e-07,
"loss": 0.1087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.056893110275268555,
"step": 1270,
"valid_targets_mean": 6020.2,
"valid_targets_min": 3332
},
{
"epoch": 4.741154562383612,
"grad_norm": 0.25685520224274744,
"learning_rate": 3.3885626010652153e-07,
"loss": 0.1112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.049848925322294235,
"step": 1275,
"valid_targets_mean": 5805.6,
"valid_targets_min": 2057
},
{
"epoch": 4.759776536312849,
"grad_norm": 0.2188109546053078,
"learning_rate": 2.929232830461404e-07,
"loss": 0.1028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04564082622528076,
"step": 1280,
"valid_targets_mean": 5415.1,
"valid_targets_min": 3408
},
{
"epoch": 4.778398510242086,
"grad_norm": 0.2187677112583894,
"learning_rate": 2.503114302183951e-07,
"loss": 0.0952,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04444780945777893,
"step": 1285,
"valid_targets_mean": 5522.8,
"valid_targets_min": 1887
},
{
"epoch": 4.797020484171322,
"grad_norm": 0.22208851636510005,
"learning_rate": 2.110278827622758e-07,
"loss": 0.0982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05595209822058678,
"step": 1290,
"valid_targets_mean": 5780.2,
"valid_targets_min": 2491
},
{
"epoch": 4.815642458100559,
"grad_norm": 0.21060016822581026,
"learning_rate": 1.7507926091594685e-07,
"loss": 0.0918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04811148717999458,
"step": 1295,
"valid_targets_mean": 5595.6,
"valid_targets_min": 2959
},
{
"epoch": 4.834264432029795,
"grad_norm": 0.19833964961713638,
"learning_rate": 1.4247162290107697e-07,
"loss": 0.0965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05214523896574974,
"step": 1300,
"valid_targets_mean": 5988.6,
"valid_targets_min": 3215
},
{
"epoch": 4.8528864059590315,
"grad_norm": 0.2990227082543972,
"learning_rate": 1.1321046390187385e-07,
"loss": 0.0998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05182049795985222,
"step": 1305,
"valid_targets_mean": 5927.0,
"valid_targets_min": 2563
},
{
"epoch": 4.871508379888268,
"grad_norm": 0.2015079045157211,
"learning_rate": 8.730071513901594e-08,
"loss": 0.0948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.045056309551000595,
"step": 1310,
"valid_targets_mean": 5677.5,
"valid_targets_min": 2261
},
{
"epoch": 4.890130353817504,
"grad_norm": 0.20975848200966107,
"learning_rate": 6.474674303862172e-08,
"loss": 0.09,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04260837659239769,
"step": 1315,
"valid_targets_mean": 5403.8,
"valid_targets_min": 2460
},
{
"epoch": 4.9087523277467415,
"grad_norm": 0.19303968844504368,
"learning_rate": 4.555234849639823e-08,
"loss": 0.0965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04524529352784157,
"step": 1320,
"valid_targets_mean": 5690.6,
"valid_targets_min": 2096
},
{
"epoch": 4.927374301675978,
"grad_norm": 0.19699741738755241,
"learning_rate": 2.9720766237095745e-08,
"loss": 0.0904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05184662342071533,
"step": 1325,
"valid_targets_mean": 5530.8,
"valid_targets_min": 2383
},
{
"epoch": 4.945996275605214,
"grad_norm": 0.18957844995965004,
"learning_rate": 1.7254664269381604e-08,
"loss": 0.0894,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03667488321661949,
"step": 1330,
"valid_targets_mean": 5664.4,
"valid_targets_min": 2242
},
{
"epoch": 4.964618249534451,
"grad_norm": 0.19863766435376254,
"learning_rate": 8.156143436215403e-09,
"loss": 0.0918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.049284886568784714,
"step": 1335,
"valid_targets_mean": 5771.7,
"valid_targets_min": 2131
},
{
"epoch": 4.983240223463687,
"grad_norm": 0.2042161969158567,
"learning_rate": 2.426737060798878e-09,
"loss": 0.0893,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.052975624799728394,
"step": 1340,
"valid_targets_mean": 5368.6,
"valid_targets_min": 1541
},
{
"epoch": 5.0,
"grad_norm": 0.34259468317161895,
"learning_rate": 6.741068818261198e-11,
"loss": 0.0868,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08489986509084702,
"step": 1345,
"valid_targets_mean": 7030.6,
"valid_targets_min": 2798
},
{
"epoch": 5.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08489986509084702,
"step": 1345,
"total_flos": 3.418313925563777e+18,
"train_loss": 0.2346252706192683,
"train_runtime": 20740.2268,
"train_samples_per_second": 6.206,
"train_steps_per_second": 0.065,
"valid_targets_mean": 7030.6,
"valid_targets_min": 2798
}
],
"logging_steps": 5,
"max_steps": 1345,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 750,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.418313925563777e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}